blob: 8da5447974b8b33be0423143e2c95381503000e3 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-gemm-minmax-fp32.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
27 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
28 TEST_REQUIRES_ARM_NEON_DOT;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(16)
32 .kr(4)
33 .sr(1)
34 .m(4)
35 .n(16)
36 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080038 }
39
40 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_DOT;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(16)
45 .kr(4)
46 .sr(1)
47 .m(4)
48 .n(16)
49 .k(16)
50 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080051 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080052 }
53
54 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
55 TEST_REQUIRES_ARM_NEON_DOT;
56 GemmMicrokernelTester()
57 .mr(4)
58 .nr(16)
59 .kr(4)
60 .sr(1)
61 .m(4)
62 .n(16)
63 .k(16)
64 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080065 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080066 }
67
68 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
69 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 16; n++) {
71 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080072 GemmMicrokernelTester()
73 .mr(4)
74 .nr(16)
75 .kr(4)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(16)
80 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080081 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080082 }
83 }
84 }
85
86 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
87 TEST_REQUIRES_ARM_NEON_DOT;
88 for (uint32_t m = 1; m <= 4; m++) {
89 GemmMicrokernelTester()
90 .mr(4)
91 .nr(16)
92 .kr(4)
93 .sr(1)
94 .m(m)
95 .n(16)
96 .k(16)
97 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080099 }
100 }
101
102 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
103 TEST_REQUIRES_ARM_NEON_DOT;
104 for (uint32_t n = 1; n <= 16; n++) {
105 GemmMicrokernelTester()
106 .mr(4)
107 .nr(16)
108 .kr(4)
109 .sr(1)
110 .m(4)
111 .n(n)
112 .k(16)
113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800114 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800115 }
116 }
117
118 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
119 TEST_REQUIRES_ARM_NEON_DOT;
120 for (size_t k = 1; k < 16; k++) {
121 GemmMicrokernelTester()
122 .mr(4)
123 .nr(16)
124 .kr(4)
125 .sr(1)
126 .m(4)
127 .n(16)
128 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800129 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800130 }
131 }
132
133 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_strided_a) {
134 TEST_REQUIRES_ARM_NEON_DOT;
135 for (size_t k = 1; k < 16; k++) {
136 GemmMicrokernelTester()
137 .mr(4)
138 .nr(16)
139 .kr(4)
140 .sr(1)
141 .m(4)
142 .n(16)
143 .k(k)
144 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800145 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800146 }
147 }
148
149 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
150 TEST_REQUIRES_ARM_NEON_DOT;
151 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800152 for (uint32_t n = 1; n <= 16; n++) {
153 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800154 GemmMicrokernelTester()
155 .mr(4)
156 .nr(16)
157 .kr(4)
158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800163 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800164 }
165 }
166 }
167 }
168
169 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
170 TEST_REQUIRES_ARM_NEON_DOT;
171 for (size_t k = 17; k < 32; k++) {
172 GemmMicrokernelTester()
173 .mr(4)
174 .nr(16)
175 .kr(4)
176 .sr(1)
177 .m(4)
178 .n(16)
179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800180 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800181 }
182 }
183
184 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_strided_a) {
185 TEST_REQUIRES_ARM_NEON_DOT;
186 for (size_t k = 17; k < 32; k++) {
187 GemmMicrokernelTester()
188 .mr(4)
189 .nr(16)
190 .kr(4)
191 .sr(1)
192 .m(4)
193 .n(16)
194 .k(k)
195 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -0800196 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800197 }
198 }
199
200 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
201 TEST_REQUIRES_ARM_NEON_DOT;
202 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800203 for (uint32_t n = 1; n <= 16; n++) {
204 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800205 GemmMicrokernelTester()
206 .mr(4)
207 .nr(16)
208 .kr(4)
209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800214 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800215 }
216 }
217 }
218 }
219
220 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
221 TEST_REQUIRES_ARM_NEON_DOT;
222 for (size_t k = 32; k <= 160; k += 16) {
223 GemmMicrokernelTester()
224 .mr(4)
225 .nr(16)
226 .kr(4)
227 .sr(1)
228 .m(4)
229 .n(16)
230 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800231 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800232 }
233 }
234
235 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_strided_a) {
236 TEST_REQUIRES_ARM_NEON_DOT;
237 for (size_t k = 32; k <= 160; k += 16) {
238 GemmMicrokernelTester()
239 .mr(4)
240 .nr(16)
241 .kr(4)
242 .sr(1)
243 .m(4)
244 .n(16)
245 .k(k)
246 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800247 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800248 }
249 }
250
251 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
252 TEST_REQUIRES_ARM_NEON_DOT;
253 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800254 for (uint32_t n = 1; n <= 16; n++) {
255 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800256 GemmMicrokernelTester()
257 .mr(4)
258 .nr(16)
259 .kr(4)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800265 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800266 }
267 }
268 }
269 }
270
271 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
272 TEST_REQUIRES_ARM_NEON_DOT;
273 for (uint32_t n = 17; n < 32; n++) {
274 for (size_t k = 1; k <= 80; k += 17) {
275 GemmMicrokernelTester()
276 .mr(4)
277 .nr(16)
278 .kr(4)
279 .sr(1)
280 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800282 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800283 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800284 }
285 }
286 }
287
288 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
289 TEST_REQUIRES_ARM_NEON_DOT;
290 for (uint32_t n = 17; n < 32; n++) {
291 for (size_t k = 1; k <= 80; k += 17) {
292 GemmMicrokernelTester()
293 .mr(4)
294 .nr(16)
295 .kr(4)
296 .sr(1)
297 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800298 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800299 .k(k)
300 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800301 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800302 }
303 }
304 }
305
306 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_a) {
307 TEST_REQUIRES_ARM_NEON_DOT;
308 for (uint32_t n = 17; n < 32; n++) {
309 for (size_t k = 1; k <= 80; k += 17) {
310 GemmMicrokernelTester()
311 .mr(4)
312 .nr(16)
313 .kr(4)
314 .sr(1)
315 .m(4)
316 .n(n)
317 .k(k)
318 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800319 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800320 }
321 }
322 }
323
324 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
325 TEST_REQUIRES_ARM_NEON_DOT;
326 for (uint32_t n = 17; n < 32; n++) {
327 for (size_t k = 1; k <= 80; k += 17) {
328 for (uint32_t m = 1; m <= 4; m++) {
329 GemmMicrokernelTester()
330 .mr(4)
331 .nr(16)
332 .kr(4)
333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800338 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800339 }
340 }
341 }
342 }
343
344 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
345 TEST_REQUIRES_ARM_NEON_DOT;
346 for (uint32_t n = 32; n <= 48; n += 16) {
347 for (size_t k = 1; k <= 80; k += 17) {
348 GemmMicrokernelTester()
349 .mr(4)
350 .nr(16)
351 .kr(4)
352 .sr(1)
353 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800354 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800355 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800357 }
358 }
359 }
360
361 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
362 TEST_REQUIRES_ARM_NEON_DOT;
363 for (uint32_t n = 32; n <= 48; n += 16) {
364 for (size_t k = 1; k <= 80; k += 17) {
365 GemmMicrokernelTester()
366 .mr(4)
367 .nr(16)
368 .kr(4)
369 .sr(1)
370 .m(4)
371 .n(n)
372 .k(k)
373 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800375 }
376 }
377 }
378
379 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_a) {
380 TEST_REQUIRES_ARM_NEON_DOT;
381 for (uint32_t n = 32; n <= 48; n += 16) {
382 for (size_t k = 1; k <= 80; k += 17) {
383 GemmMicrokernelTester()
384 .mr(4)
385 .nr(16)
386 .kr(4)
387 .sr(1)
388 .m(4)
389 .n(n)
390 .k(k)
391 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800392 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800393 }
394 }
395 }
396
397 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
398 TEST_REQUIRES_ARM_NEON_DOT;
399 for (uint32_t n = 32; n <= 48; n += 16) {
400 for (size_t k = 1; k <= 80; k += 17) {
401 for (uint32_t m = 1; m <= 4; m++) {
402 GemmMicrokernelTester()
403 .mr(4)
404 .nr(16)
405 .kr(4)
406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800411 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800412 }
413 }
414 }
415 }
416
417 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON_DOT;
419 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800420 for (uint32_t n = 1; n <= 16; n++) {
421 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800422 GemmMicrokernelTester()
423 .mr(4)
424 .nr(16)
425 .kr(4)
426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(19)
431 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800433 }
434 }
435 }
436 }
437
438 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
439 TEST_REQUIRES_ARM_NEON_DOT;
440 GemmMicrokernelTester()
441 .mr(4)
442 .nr(16)
443 .kr(4)
444 .sr(1)
445 .m(4)
446 .n(16)
447 .k(16)
448 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800449 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800450 }
451
452 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
453 TEST_REQUIRES_ARM_NEON_DOT;
454 GemmMicrokernelTester()
455 .mr(4)
456 .nr(16)
457 .kr(4)
458 .sr(1)
459 .m(4)
460 .n(16)
461 .k(16)
462 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800463 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800464 }
465
466 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
467 TEST_REQUIRES_ARM_NEON_DOT;
468 GemmMicrokernelTester()
469 .mr(4)
470 .nr(16)
471 .kr(4)
472 .sr(1)
473 .m(4)
474 .n(16)
475 .k(16)
476 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800477 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800478 }
479
480 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
481 TEST_REQUIRES_ARM_NEON_DOT;
482 for (size_t k = 1; k <= 80; k += 17) {
483 GemmMicrokernelTester()
484 .mr(4)
485 .nr(16)
486 .kr(4)
487 .sr(1)
488 .m(4)
489 .n(16)
490 .k(k)
491 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800493 }
494 }
495
496 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
497 TEST_REQUIRES_ARM_NEON_DOT;
498 for (size_t k = 1; k <= 80; k += 17) {
499 GemmMicrokernelTester()
500 .mr(4)
501 .nr(16)
502 .kr(4)
503 .sr(1)
504 .m(4)
505 .n(16)
506 .k(k)
507 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800509 }
510 }
511
512 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
513 TEST_REQUIRES_ARM_NEON_DOT;
514 for (size_t k = 1; k <= 80; k += 17) {
515 GemmMicrokernelTester()
516 .mr(4)
517 .nr(16)
518 .kr(4)
519 .sr(1)
520 .m(4)
521 .n(16)
522 .k(k)
523 .a_zero_point(0)
524 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800525 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800526 }
527 }
528#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
529
530
531#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
532 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
533 TEST_REQUIRES_ARM_NEON_DOT;
534 GemmMicrokernelTester()
535 .mr(4)
536 .nr(16)
537 .kr(4)
538 .sr(1)
539 .m(4)
540 .n(16)
541 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800543 }
544
545 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
546 TEST_REQUIRES_ARM_NEON_DOT;
547 GemmMicrokernelTester()
548 .mr(4)
549 .nr(16)
550 .kr(4)
551 .sr(1)
552 .m(4)
553 .n(16)
554 .k(16)
555 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800556 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800557 }
558
559 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_strided_a) {
560 TEST_REQUIRES_ARM_NEON_DOT;
561 GemmMicrokernelTester()
562 .mr(4)
563 .nr(16)
564 .kr(4)
565 .sr(1)
566 .m(4)
567 .n(16)
568 .k(16)
569 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800571 }
572
573 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
574 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800575 for (uint32_t n = 1; n <= 16; n++) {
576 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800577 GemmMicrokernelTester()
578 .mr(4)
579 .nr(16)
580 .kr(4)
581 .sr(1)
582 .m(m)
583 .n(n)
584 .k(16)
585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800587 }
588 }
589 }
590
591 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
592 TEST_REQUIRES_ARM_NEON_DOT;
593 for (uint32_t m = 1; m <= 4; m++) {
594 GemmMicrokernelTester()
595 .mr(4)
596 .nr(16)
597 .kr(4)
598 .sr(1)
599 .m(m)
600 .n(16)
601 .k(16)
602 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800604 }
605 }
606
607 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
608 TEST_REQUIRES_ARM_NEON_DOT;
609 for (uint32_t n = 1; n <= 16; n++) {
610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(16)
613 .kr(4)
614 .sr(1)
615 .m(4)
616 .n(n)
617 .k(16)
618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800619 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800620 }
621 }
622
623 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
624 TEST_REQUIRES_ARM_NEON_DOT;
625 for (size_t k = 1; k < 16; k++) {
626 GemmMicrokernelTester()
627 .mr(4)
628 .nr(16)
629 .kr(4)
630 .sr(1)
631 .m(4)
632 .n(16)
633 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800634 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800635 }
636 }
637
638 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_strided_a) {
639 TEST_REQUIRES_ARM_NEON_DOT;
640 for (size_t k = 1; k < 16; k++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(16)
644 .kr(4)
645 .sr(1)
646 .m(4)
647 .n(16)
648 .k(k)
649 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800650 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800651 }
652 }
653
654 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
655 TEST_REQUIRES_ARM_NEON_DOT;
656 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800657 for (uint32_t n = 1; n <= 16; n++) {
658 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(16)
662 .kr(4)
663 .sr(1)
664 .m(m)
665 .n(n)
666 .k(k)
667 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800668 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800669 }
670 }
671 }
672 }
673
674 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
675 TEST_REQUIRES_ARM_NEON_DOT;
676 for (size_t k = 17; k < 32; k++) {
677 GemmMicrokernelTester()
678 .mr(4)
679 .nr(16)
680 .kr(4)
681 .sr(1)
682 .m(4)
683 .n(16)
684 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800685 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800686 }
687 }
688
689 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_strided_a) {
690 TEST_REQUIRES_ARM_NEON_DOT;
691 for (size_t k = 17; k < 32; k++) {
692 GemmMicrokernelTester()
693 .mr(4)
694 .nr(16)
695 .kr(4)
696 .sr(1)
697 .m(4)
698 .n(16)
699 .k(k)
700 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -0800701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800702 }
703 }
704
705 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
706 TEST_REQUIRES_ARM_NEON_DOT;
707 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800708 for (uint32_t n = 1; n <= 16; n++) {
709 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800710 GemmMicrokernelTester()
711 .mr(4)
712 .nr(16)
713 .kr(4)
714 .sr(1)
715 .m(m)
716 .n(n)
717 .k(k)
718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800719 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800720 }
721 }
722 }
723 }
724
725 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
726 TEST_REQUIRES_ARM_NEON_DOT;
727 for (size_t k = 32; k <= 160; k += 16) {
728 GemmMicrokernelTester()
729 .mr(4)
730 .nr(16)
731 .kr(4)
732 .sr(1)
733 .m(4)
734 .n(16)
735 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800736 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800737 }
738 }
739
740 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_strided_a) {
741 TEST_REQUIRES_ARM_NEON_DOT;
742 for (size_t k = 32; k <= 160; k += 16) {
743 GemmMicrokernelTester()
744 .mr(4)
745 .nr(16)
746 .kr(4)
747 .sr(1)
748 .m(4)
749 .n(16)
750 .k(k)
751 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800753 }
754 }
755
756 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
757 TEST_REQUIRES_ARM_NEON_DOT;
758 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800759 for (uint32_t n = 1; n <= 16; n++) {
760 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800761 GemmMicrokernelTester()
762 .mr(4)
763 .nr(16)
764 .kr(4)
765 .sr(1)
766 .m(m)
767 .n(n)
768 .k(k)
769 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800770 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800771 }
772 }
773 }
774 }
775
776 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
777 TEST_REQUIRES_ARM_NEON_DOT;
778 for (uint32_t n = 17; n < 32; n++) {
779 for (size_t k = 1; k <= 80; k += 17) {
780 GemmMicrokernelTester()
781 .mr(4)
782 .nr(16)
783 .kr(4)
784 .sr(1)
785 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800786 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800788 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800789 }
790 }
791 }
792
793 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
794 TEST_REQUIRES_ARM_NEON_DOT;
795 for (uint32_t n = 17; n < 32; n++) {
796 for (size_t k = 1; k <= 80; k += 17) {
797 GemmMicrokernelTester()
798 .mr(4)
799 .nr(16)
800 .kr(4)
801 .sr(1)
802 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800803 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800804 .k(k)
805 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800806 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800807 }
808 }
809 }
810
811 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_a) {
812 TEST_REQUIRES_ARM_NEON_DOT;
813 for (uint32_t n = 17; n < 32; n++) {
814 for (size_t k = 1; k <= 80; k += 17) {
815 GemmMicrokernelTester()
816 .mr(4)
817 .nr(16)
818 .kr(4)
819 .sr(1)
820 .m(4)
821 .n(n)
822 .k(k)
823 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800824 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800825 }
826 }
827 }
828
829 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
830 TEST_REQUIRES_ARM_NEON_DOT;
831 for (uint32_t n = 17; n < 32; n++) {
832 for (size_t k = 1; k <= 80; k += 17) {
833 for (uint32_t m = 1; m <= 4; m++) {
834 GemmMicrokernelTester()
835 .mr(4)
836 .nr(16)
837 .kr(4)
838 .sr(1)
839 .m(m)
840 .n(n)
841 .k(k)
842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800843 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800844 }
845 }
846 }
847 }
848
849 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
850 TEST_REQUIRES_ARM_NEON_DOT;
851 for (uint32_t n = 32; n <= 48; n += 16) {
852 for (size_t k = 1; k <= 80; k += 17) {
853 GemmMicrokernelTester()
854 .mr(4)
855 .nr(16)
856 .kr(4)
857 .sr(1)
858 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800859 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800860 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800861 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800862 }
863 }
864 }
865
866 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
867 TEST_REQUIRES_ARM_NEON_DOT;
868 for (uint32_t n = 32; n <= 48; n += 16) {
869 for (size_t k = 1; k <= 80; k += 17) {
870 GemmMicrokernelTester()
871 .mr(4)
872 .nr(16)
873 .kr(4)
874 .sr(1)
875 .m(4)
876 .n(n)
877 .k(k)
878 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800879 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800880 }
881 }
882 }
883
884 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_a) {
885 TEST_REQUIRES_ARM_NEON_DOT;
886 for (uint32_t n = 32; n <= 48; n += 16) {
887 for (size_t k = 1; k <= 80; k += 17) {
888 GemmMicrokernelTester()
889 .mr(4)
890 .nr(16)
891 .kr(4)
892 .sr(1)
893 .m(4)
894 .n(n)
895 .k(k)
896 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800897 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800898 }
899 }
900 }
901
902 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
903 TEST_REQUIRES_ARM_NEON_DOT;
904 for (uint32_t n = 32; n <= 48; n += 16) {
905 for (size_t k = 1; k <= 80; k += 17) {
906 for (uint32_t m = 1; m <= 4; m++) {
907 GemmMicrokernelTester()
908 .mr(4)
909 .nr(16)
910 .kr(4)
911 .sr(1)
912 .m(m)
913 .n(n)
914 .k(k)
915 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800917 }
918 }
919 }
920 }
921
922 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
923 TEST_REQUIRES_ARM_NEON_DOT;
924 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800925 for (uint32_t n = 1; n <= 16; n++) {
926 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800927 GemmMicrokernelTester()
928 .mr(4)
929 .nr(16)
930 .kr(4)
931 .sr(1)
932 .m(m)
933 .n(n)
934 .k(k)
935 .cm_stride(19)
936 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800937 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800938 }
939 }
940 }
941 }
942
943 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
944 TEST_REQUIRES_ARM_NEON_DOT;
945 GemmMicrokernelTester()
946 .mr(4)
947 .nr(16)
948 .kr(4)
949 .sr(1)
950 .m(4)
951 .n(16)
952 .k(16)
953 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800954 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800955 }
956
957 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
958 TEST_REQUIRES_ARM_NEON_DOT;
959 GemmMicrokernelTester()
960 .mr(4)
961 .nr(16)
962 .kr(4)
963 .sr(1)
964 .m(4)
965 .n(16)
966 .k(16)
967 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800968 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800969 }
970
971 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
972 TEST_REQUIRES_ARM_NEON_DOT;
973 GemmMicrokernelTester()
974 .mr(4)
975 .nr(16)
976 .kr(4)
977 .sr(1)
978 .m(4)
979 .n(16)
980 .k(16)
981 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800982 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800983 }
984
985 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
986 TEST_REQUIRES_ARM_NEON_DOT;
987 for (size_t k = 1; k <= 80; k += 17) {
988 GemmMicrokernelTester()
989 .mr(4)
990 .nr(16)
991 .kr(4)
992 .sr(1)
993 .m(4)
994 .n(16)
995 .k(k)
996 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800997 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800998 }
999 }
1000
1001 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
1002 TEST_REQUIRES_ARM_NEON_DOT;
1003 for (size_t k = 1; k <= 80; k += 17) {
1004 GemmMicrokernelTester()
1005 .mr(4)
1006 .nr(16)
1007 .kr(4)
1008 .sr(1)
1009 .m(4)
1010 .n(16)
1011 .k(k)
1012 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001013 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001014 }
1015 }
1016
1017 TEST(QU8_GEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) {
1018 TEST_REQUIRES_ARM_NEON_DOT;
1019 for (size_t k = 1; k <= 80; k += 17) {
1020 GemmMicrokernelTester()
1021 .mr(4)
1022 .nr(16)
1023 .kr(4)
1024 .sr(1)
1025 .m(4)
1026 .n(16)
1027 .k(k)
1028 .a_zero_point(0)
1029 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001030 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001031 }
1032 }
1033#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1034
1035
1036#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1037 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
1038 TEST_REQUIRES_ARM_NEON;
1039 GemmMicrokernelTester()
1040 .mr(4)
1041 .nr(16)
1042 .kr(1)
1043 .sr(1)
1044 .m(4)
1045 .n(16)
1046 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001048 }
1049
1050 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
1051 TEST_REQUIRES_ARM_NEON;
1052 GemmMicrokernelTester()
1053 .mr(4)
1054 .nr(16)
1055 .kr(1)
1056 .sr(1)
1057 .m(4)
1058 .n(16)
1059 .k(8)
1060 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001061 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001062 }
1063
1064 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
1065 TEST_REQUIRES_ARM_NEON;
1066 GemmMicrokernelTester()
1067 .mr(4)
1068 .nr(16)
1069 .kr(1)
1070 .sr(1)
1071 .m(4)
1072 .n(16)
1073 .k(8)
1074 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001075 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001076 }
1077
1078 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
1079 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001080 for (uint32_t n = 1; n <= 16; n++) {
1081 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001082 GemmMicrokernelTester()
1083 .mr(4)
1084 .nr(16)
1085 .kr(1)
1086 .sr(1)
1087 .m(m)
1088 .n(n)
1089 .k(8)
1090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001091 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001092 }
1093 }
1094 }
1095
1096 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1097 TEST_REQUIRES_ARM_NEON;
1098 for (uint32_t m = 1; m <= 4; m++) {
1099 GemmMicrokernelTester()
1100 .mr(4)
1101 .nr(16)
1102 .kr(1)
1103 .sr(1)
1104 .m(m)
1105 .n(16)
1106 .k(8)
1107 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001109 }
1110 }
1111
1112 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1113 TEST_REQUIRES_ARM_NEON;
1114 for (uint32_t n = 1; n <= 16; n++) {
1115 GemmMicrokernelTester()
1116 .mr(4)
1117 .nr(16)
1118 .kr(1)
1119 .sr(1)
1120 .m(4)
1121 .n(n)
1122 .k(8)
1123 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001124 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001125 }
1126 }
1127
1128 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
1129 TEST_REQUIRES_ARM_NEON;
1130 for (size_t k = 1; k < 8; k++) {
1131 GemmMicrokernelTester()
1132 .mr(4)
1133 .nr(16)
1134 .kr(1)
1135 .sr(1)
1136 .m(4)
1137 .n(16)
1138 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001139 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001140 }
1141 }
1142
1143 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
1144 TEST_REQUIRES_ARM_NEON;
1145 for (size_t k = 1; k < 8; k++) {
1146 GemmMicrokernelTester()
1147 .mr(4)
1148 .nr(16)
1149 .kr(1)
1150 .sr(1)
1151 .m(4)
1152 .n(16)
1153 .k(k)
1154 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001155 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001156 }
1157 }
1158
1159 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
1160 TEST_REQUIRES_ARM_NEON;
1161 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001162 for (uint32_t n = 1; n <= 16; n++) {
1163 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001164 GemmMicrokernelTester()
1165 .mr(4)
1166 .nr(16)
1167 .kr(1)
1168 .sr(1)
1169 .m(m)
1170 .n(n)
1171 .k(k)
1172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001173 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001174 }
1175 }
1176 }
1177 }
1178
1179 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
1180 TEST_REQUIRES_ARM_NEON;
1181 for (size_t k = 9; k < 16; k++) {
1182 GemmMicrokernelTester()
1183 .mr(4)
1184 .nr(16)
1185 .kr(1)
1186 .sr(1)
1187 .m(4)
1188 .n(16)
1189 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001191 }
1192 }
1193
1194 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
1195 TEST_REQUIRES_ARM_NEON;
1196 for (size_t k = 9; k < 16; k++) {
1197 GemmMicrokernelTester()
1198 .mr(4)
1199 .nr(16)
1200 .kr(1)
1201 .sr(1)
1202 .m(4)
1203 .n(16)
1204 .k(k)
1205 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001206 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001207 }
1208 }
1209
1210 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
1211 TEST_REQUIRES_ARM_NEON;
1212 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001213 for (uint32_t n = 1; n <= 16; n++) {
1214 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001215 GemmMicrokernelTester()
1216 .mr(4)
1217 .nr(16)
1218 .kr(1)
1219 .sr(1)
1220 .m(m)
1221 .n(n)
1222 .k(k)
1223 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001224 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001225 }
1226 }
1227 }
1228 }
1229
1230 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
1231 TEST_REQUIRES_ARM_NEON;
1232 for (size_t k = 16; k <= 80; k += 8) {
1233 GemmMicrokernelTester()
1234 .mr(4)
1235 .nr(16)
1236 .kr(1)
1237 .sr(1)
1238 .m(4)
1239 .n(16)
1240 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001241 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001242 }
1243 }
1244
1245 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_strided_a) {
1246 TEST_REQUIRES_ARM_NEON;
1247 for (size_t k = 16; k <= 80; k += 8) {
1248 GemmMicrokernelTester()
1249 .mr(4)
1250 .nr(16)
1251 .kr(1)
1252 .sr(1)
1253 .m(4)
1254 .n(16)
1255 .k(k)
1256 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001257 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001258 }
1259 }
1260
1261 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
1262 TEST_REQUIRES_ARM_NEON;
1263 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001264 for (uint32_t n = 1; n <= 16; n++) {
1265 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001266 GemmMicrokernelTester()
1267 .mr(4)
1268 .nr(16)
1269 .kr(1)
1270 .sr(1)
1271 .m(m)
1272 .n(n)
1273 .k(k)
1274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001275 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001276 }
1277 }
1278 }
1279 }
1280
1281 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
1282 TEST_REQUIRES_ARM_NEON;
1283 for (uint32_t n = 17; n < 32; n++) {
1284 for (size_t k = 1; k <= 40; k += 9) {
1285 GemmMicrokernelTester()
1286 .mr(4)
1287 .nr(16)
1288 .kr(1)
1289 .sr(1)
1290 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001291 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001292 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001293 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001294 }
1295 }
1296 }
1297
1298 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
1299 TEST_REQUIRES_ARM_NEON;
1300 for (uint32_t n = 17; n < 32; n++) {
1301 for (size_t k = 1; k <= 40; k += 9) {
1302 GemmMicrokernelTester()
1303 .mr(4)
1304 .nr(16)
1305 .kr(1)
1306 .sr(1)
1307 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001308 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001309 .k(k)
1310 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001311 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001312 }
1313 }
1314 }
1315
1316 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
1317 TEST_REQUIRES_ARM_NEON;
1318 for (uint32_t n = 17; n < 32; n++) {
1319 for (size_t k = 1; k <= 40; k += 9) {
1320 GemmMicrokernelTester()
1321 .mr(4)
1322 .nr(16)
1323 .kr(1)
1324 .sr(1)
1325 .m(4)
1326 .n(n)
1327 .k(k)
1328 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001329 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001330 }
1331 }
1332 }
1333
1334 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
1335 TEST_REQUIRES_ARM_NEON;
1336 for (uint32_t n = 17; n < 32; n++) {
1337 for (size_t k = 1; k <= 40; k += 9) {
1338 for (uint32_t m = 1; m <= 4; m++) {
1339 GemmMicrokernelTester()
1340 .mr(4)
1341 .nr(16)
1342 .kr(1)
1343 .sr(1)
1344 .m(m)
1345 .n(n)
1346 .k(k)
1347 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001348 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001349 }
1350 }
1351 }
1352 }
1353
1354 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
1355 TEST_REQUIRES_ARM_NEON;
1356 for (uint32_t n = 32; n <= 48; n += 16) {
1357 for (size_t k = 1; k <= 40; k += 9) {
1358 GemmMicrokernelTester()
1359 .mr(4)
1360 .nr(16)
1361 .kr(1)
1362 .sr(1)
1363 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001364 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001365 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001366 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001367 }
1368 }
1369 }
1370
1371 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
1372 TEST_REQUIRES_ARM_NEON;
1373 for (uint32_t n = 32; n <= 48; n += 16) {
1374 for (size_t k = 1; k <= 40; k += 9) {
1375 GemmMicrokernelTester()
1376 .mr(4)
1377 .nr(16)
1378 .kr(1)
1379 .sr(1)
1380 .m(4)
1381 .n(n)
1382 .k(k)
1383 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001384 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001385 }
1386 }
1387 }
1388
1389 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_a) {
1390 TEST_REQUIRES_ARM_NEON;
1391 for (uint32_t n = 32; n <= 48; n += 16) {
1392 for (size_t k = 1; k <= 40; k += 9) {
1393 GemmMicrokernelTester()
1394 .mr(4)
1395 .nr(16)
1396 .kr(1)
1397 .sr(1)
1398 .m(4)
1399 .n(n)
1400 .k(k)
1401 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001402 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001403 }
1404 }
1405 }
1406
1407 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
1408 TEST_REQUIRES_ARM_NEON;
1409 for (uint32_t n = 32; n <= 48; n += 16) {
1410 for (size_t k = 1; k <= 40; k += 9) {
1411 for (uint32_t m = 1; m <= 4; m++) {
1412 GemmMicrokernelTester()
1413 .mr(4)
1414 .nr(16)
1415 .kr(1)
1416 .sr(1)
1417 .m(m)
1418 .n(n)
1419 .k(k)
1420 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001421 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001422 }
1423 }
1424 }
1425 }
1426
1427 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
1428 TEST_REQUIRES_ARM_NEON;
1429 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001430 for (uint32_t n = 1; n <= 16; n++) {
1431 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001432 GemmMicrokernelTester()
1433 .mr(4)
1434 .nr(16)
1435 .kr(1)
1436 .sr(1)
1437 .m(m)
1438 .n(n)
1439 .k(k)
1440 .cm_stride(19)
1441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001443 }
1444 }
1445 }
1446 }
1447
1448 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
1449 TEST_REQUIRES_ARM_NEON;
1450 GemmMicrokernelTester()
1451 .mr(4)
1452 .nr(16)
1453 .kr(1)
1454 .sr(1)
1455 .m(4)
1456 .n(16)
1457 .k(8)
1458 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001459 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001460 }
1461
1462 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
1463 TEST_REQUIRES_ARM_NEON;
1464 GemmMicrokernelTester()
1465 .mr(4)
1466 .nr(16)
1467 .kr(1)
1468 .sr(1)
1469 .m(4)
1470 .n(16)
1471 .k(8)
1472 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001473 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001474 }
1475
1476 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
1477 TEST_REQUIRES_ARM_NEON;
1478 GemmMicrokernelTester()
1479 .mr(4)
1480 .nr(16)
1481 .kr(1)
1482 .sr(1)
1483 .m(4)
1484 .n(16)
1485 .k(8)
1486 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001487 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001488 }
1489
1490 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_a_zero_point) {
1491 TEST_REQUIRES_ARM_NEON;
1492 for (size_t k = 1; k <= 40; k += 9) {
1493 GemmMicrokernelTester()
1494 .mr(4)
1495 .nr(16)
1496 .kr(1)
1497 .sr(1)
1498 .m(4)
1499 .n(16)
1500 .k(k)
1501 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001502 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001503 }
1504 }
1505
1506 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_b_zero_point) {
1507 TEST_REQUIRES_ARM_NEON;
1508 for (size_t k = 1; k <= 40; k += 9) {
1509 GemmMicrokernelTester()
1510 .mr(4)
1511 .nr(16)
1512 .kr(1)
1513 .sr(1)
1514 .m(4)
1515 .n(16)
1516 .k(k)
1517 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001518 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001519 }
1520 }
1521
1522 TEST(QU8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_zero_point) {
1523 TEST_REQUIRES_ARM_NEON;
1524 for (size_t k = 1; k <= 40; k += 9) {
1525 GemmMicrokernelTester()
1526 .mr(4)
1527 .nr(16)
1528 .kr(1)
1529 .sr(1)
1530 .m(4)
1531 .n(16)
1532 .k(k)
1533 .a_zero_point(0)
1534 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001535 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001536 }
1537 }
1538#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1539
1540
1541#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
1542 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
1543 TEST_REQUIRES_ARM_NEON_DOT;
1544 GemmMicrokernelTester()
1545 .mr(1)
1546 .nr(16)
1547 .kr(4)
1548 .sr(1)
1549 .m(1)
1550 .n(16)
1551 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001553 }
1554
1555 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
1556 TEST_REQUIRES_ARM_NEON_DOT;
1557 GemmMicrokernelTester()
1558 .mr(1)
1559 .nr(16)
1560 .kr(4)
1561 .sr(1)
1562 .m(1)
1563 .n(16)
1564 .k(8)
1565 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001566 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001567 }
1568
1569 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_strided_a) {
1570 TEST_REQUIRES_ARM_NEON_DOT;
1571 GemmMicrokernelTester()
1572 .mr(1)
1573 .nr(16)
1574 .kr(4)
1575 .sr(1)
1576 .m(1)
1577 .n(16)
1578 .k(8)
1579 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001580 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001581 }
1582
1583 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
1584 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001585 for (uint32_t n = 1; n <= 16; n++) {
1586 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001587 GemmMicrokernelTester()
1588 .mr(1)
1589 .nr(16)
1590 .kr(4)
1591 .sr(1)
1592 .m(m)
1593 .n(n)
1594 .k(8)
1595 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001596 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001597 }
1598 }
1599 }
1600
1601 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
1602 TEST_REQUIRES_ARM_NEON_DOT;
1603 for (uint32_t m = 1; m <= 1; m++) {
1604 GemmMicrokernelTester()
1605 .mr(1)
1606 .nr(16)
1607 .kr(4)
1608 .sr(1)
1609 .m(m)
1610 .n(16)
1611 .k(8)
1612 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001613 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001614 }
1615 }
1616
1617 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
1618 TEST_REQUIRES_ARM_NEON_DOT;
1619 for (uint32_t n = 1; n <= 16; n++) {
1620 GemmMicrokernelTester()
1621 .mr(1)
1622 .nr(16)
1623 .kr(4)
1624 .sr(1)
1625 .m(1)
1626 .n(n)
1627 .k(8)
1628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001629 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001630 }
1631 }
1632
1633 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
1634 TEST_REQUIRES_ARM_NEON_DOT;
1635 for (size_t k = 1; k < 8; k++) {
1636 GemmMicrokernelTester()
1637 .mr(1)
1638 .nr(16)
1639 .kr(4)
1640 .sr(1)
1641 .m(1)
1642 .n(16)
1643 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001644 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001645 }
1646 }
1647
1648 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_strided_a) {
1649 TEST_REQUIRES_ARM_NEON_DOT;
1650 for (size_t k = 1; k < 8; k++) {
1651 GemmMicrokernelTester()
1652 .mr(1)
1653 .nr(16)
1654 .kr(4)
1655 .sr(1)
1656 .m(1)
1657 .n(16)
1658 .k(k)
1659 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001660 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001661 }
1662 }
1663
1664 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
1665 TEST_REQUIRES_ARM_NEON_DOT;
1666 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001667 for (uint32_t n = 1; n <= 16; n++) {
1668 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001669 GemmMicrokernelTester()
1670 .mr(1)
1671 .nr(16)
1672 .kr(4)
1673 .sr(1)
1674 .m(m)
1675 .n(n)
1676 .k(k)
1677 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001679 }
1680 }
1681 }
1682 }
1683
1684 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
1685 TEST_REQUIRES_ARM_NEON_DOT;
1686 for (size_t k = 9; k < 16; k++) {
1687 GemmMicrokernelTester()
1688 .mr(1)
1689 .nr(16)
1690 .kr(4)
1691 .sr(1)
1692 .m(1)
1693 .n(16)
1694 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001695 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001696 }
1697 }
1698
1699 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_strided_a) {
1700 TEST_REQUIRES_ARM_NEON_DOT;
1701 for (size_t k = 9; k < 16; k++) {
1702 GemmMicrokernelTester()
1703 .mr(1)
1704 .nr(16)
1705 .kr(4)
1706 .sr(1)
1707 .m(1)
1708 .n(16)
1709 .k(k)
1710 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001711 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001712 }
1713 }
1714
1715 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
1716 TEST_REQUIRES_ARM_NEON_DOT;
1717 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001718 for (uint32_t n = 1; n <= 16; n++) {
1719 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001720 GemmMicrokernelTester()
1721 .mr(1)
1722 .nr(16)
1723 .kr(4)
1724 .sr(1)
1725 .m(m)
1726 .n(n)
1727 .k(k)
1728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001729 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001730 }
1731 }
1732 }
1733 }
1734
1735 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
1736 TEST_REQUIRES_ARM_NEON_DOT;
1737 for (size_t k = 16; k <= 80; k += 8) {
1738 GemmMicrokernelTester()
1739 .mr(1)
1740 .nr(16)
1741 .kr(4)
1742 .sr(1)
1743 .m(1)
1744 .n(16)
1745 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001746 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001747 }
1748 }
1749
1750 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_strided_a) {
1751 TEST_REQUIRES_ARM_NEON_DOT;
1752 for (size_t k = 16; k <= 80; k += 8) {
1753 GemmMicrokernelTester()
1754 .mr(1)
1755 .nr(16)
1756 .kr(4)
1757 .sr(1)
1758 .m(1)
1759 .n(16)
1760 .k(k)
1761 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001762 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001763 }
1764 }
1765
1766 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
1767 TEST_REQUIRES_ARM_NEON_DOT;
1768 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001769 for (uint32_t n = 1; n <= 16; n++) {
1770 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001771 GemmMicrokernelTester()
1772 .mr(1)
1773 .nr(16)
1774 .kr(4)
1775 .sr(1)
1776 .m(m)
1777 .n(n)
1778 .k(k)
1779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001780 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001781 }
1782 }
1783 }
1784 }
1785
1786 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
1787 TEST_REQUIRES_ARM_NEON_DOT;
1788 for (uint32_t n = 17; n < 32; n++) {
1789 for (size_t k = 1; k <= 40; k += 9) {
1790 GemmMicrokernelTester()
1791 .mr(1)
1792 .nr(16)
1793 .kr(4)
1794 .sr(1)
1795 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001796 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001797 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001798 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001799 }
1800 }
1801 }
1802
1803 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
1804 TEST_REQUIRES_ARM_NEON_DOT;
1805 for (uint32_t n = 17; n < 32; n++) {
1806 for (size_t k = 1; k <= 40; k += 9) {
1807 GemmMicrokernelTester()
1808 .mr(1)
1809 .nr(16)
1810 .kr(4)
1811 .sr(1)
1812 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001813 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001814 .k(k)
1815 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001816 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001817 }
1818 }
1819 }
1820
1821 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_a) {
1822 TEST_REQUIRES_ARM_NEON_DOT;
1823 for (uint32_t n = 17; n < 32; n++) {
1824 for (size_t k = 1; k <= 40; k += 9) {
1825 GemmMicrokernelTester()
1826 .mr(1)
1827 .nr(16)
1828 .kr(4)
1829 .sr(1)
1830 .m(1)
1831 .n(n)
1832 .k(k)
1833 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001834 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001835 }
1836 }
1837 }
1838
1839 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
1840 TEST_REQUIRES_ARM_NEON_DOT;
1841 for (uint32_t n = 17; n < 32; n++) {
1842 for (size_t k = 1; k <= 40; k += 9) {
1843 for (uint32_t m = 1; m <= 1; m++) {
1844 GemmMicrokernelTester()
1845 .mr(1)
1846 .nr(16)
1847 .kr(4)
1848 .sr(1)
1849 .m(m)
1850 .n(n)
1851 .k(k)
1852 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001853 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001854 }
1855 }
1856 }
1857 }
1858
1859 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
1860 TEST_REQUIRES_ARM_NEON_DOT;
1861 for (uint32_t n = 32; n <= 48; n += 16) {
1862 for (size_t k = 1; k <= 40; k += 9) {
1863 GemmMicrokernelTester()
1864 .mr(1)
1865 .nr(16)
1866 .kr(4)
1867 .sr(1)
1868 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001869 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001870 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001871 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001872 }
1873 }
1874 }
1875
1876 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
1877 TEST_REQUIRES_ARM_NEON_DOT;
1878 for (uint32_t n = 32; n <= 48; n += 16) {
1879 for (size_t k = 1; k <= 40; k += 9) {
1880 GemmMicrokernelTester()
1881 .mr(1)
1882 .nr(16)
1883 .kr(4)
1884 .sr(1)
1885 .m(1)
1886 .n(n)
1887 .k(k)
1888 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001889 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001890 }
1891 }
1892 }
1893
1894 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_a) {
1895 TEST_REQUIRES_ARM_NEON_DOT;
1896 for (uint32_t n = 32; n <= 48; n += 16) {
1897 for (size_t k = 1; k <= 40; k += 9) {
1898 GemmMicrokernelTester()
1899 .mr(1)
1900 .nr(16)
1901 .kr(4)
1902 .sr(1)
1903 .m(1)
1904 .n(n)
1905 .k(k)
1906 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001907 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001908 }
1909 }
1910 }
1911
1912 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
1913 TEST_REQUIRES_ARM_NEON_DOT;
1914 for (uint32_t n = 32; n <= 48; n += 16) {
1915 for (size_t k = 1; k <= 40; k += 9) {
1916 for (uint32_t m = 1; m <= 1; m++) {
1917 GemmMicrokernelTester()
1918 .mr(1)
1919 .nr(16)
1920 .kr(4)
1921 .sr(1)
1922 .m(m)
1923 .n(n)
1924 .k(k)
1925 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001926 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001927 }
1928 }
1929 }
1930 }
1931
1932 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
1933 TEST_REQUIRES_ARM_NEON_DOT;
1934 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001935 for (uint32_t n = 1; n <= 16; n++) {
1936 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001937 GemmMicrokernelTester()
1938 .mr(1)
1939 .nr(16)
1940 .kr(4)
1941 .sr(1)
1942 .m(m)
1943 .n(n)
1944 .k(k)
1945 .cm_stride(19)
1946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001948 }
1949 }
1950 }
1951 }
1952
1953 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
1954 TEST_REQUIRES_ARM_NEON_DOT;
1955 GemmMicrokernelTester()
1956 .mr(1)
1957 .nr(16)
1958 .kr(4)
1959 .sr(1)
1960 .m(1)
1961 .n(16)
1962 .k(8)
1963 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001964 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001965 }
1966
1967 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
1968 TEST_REQUIRES_ARM_NEON_DOT;
1969 GemmMicrokernelTester()
1970 .mr(1)
1971 .nr(16)
1972 .kr(4)
1973 .sr(1)
1974 .m(1)
1975 .n(16)
1976 .k(8)
1977 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001978 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001979 }
1980
1981 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
1982 TEST_REQUIRES_ARM_NEON_DOT;
1983 GemmMicrokernelTester()
1984 .mr(1)
1985 .nr(16)
1986 .kr(4)
1987 .sr(1)
1988 .m(1)
1989 .n(16)
1990 .k(8)
1991 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001992 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001993 }
1994
1995 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, no_a_zero_point) {
1996 TEST_REQUIRES_ARM_NEON_DOT;
1997 for (size_t k = 1; k <= 40; k += 9) {
1998 GemmMicrokernelTester()
1999 .mr(1)
2000 .nr(16)
2001 .kr(4)
2002 .sr(1)
2003 .m(1)
2004 .n(16)
2005 .k(k)
2006 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002007 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002008 }
2009 }
2010
2011 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, no_b_zero_point) {
2012 TEST_REQUIRES_ARM_NEON_DOT;
2013 for (size_t k = 1; k <= 40; k += 9) {
2014 GemmMicrokernelTester()
2015 .mr(1)
2016 .nr(16)
2017 .kr(4)
2018 .sr(1)
2019 .m(1)
2020 .n(16)
2021 .k(k)
2022 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002023 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002024 }
2025 }
2026
2027 TEST(QU8_GEMM_MINMAX_FP32_1X16C4__NEONDOT, no_zero_point) {
2028 TEST_REQUIRES_ARM_NEON_DOT;
2029 for (size_t k = 1; k <= 40; k += 9) {
2030 GemmMicrokernelTester()
2031 .mr(1)
2032 .nr(16)
2033 .kr(4)
2034 .sr(1)
2035 .m(1)
2036 .n(16)
2037 .k(k)
2038 .a_zero_point(0)
2039 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002040 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002041 }
2042 }
2043#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2044
2045
2046#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2047 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
2048 TEST_REQUIRES_X86_SSE2;
2049 GemmMicrokernelTester()
2050 .mr(3)
2051 .nr(4)
2052 .kr(2)
2053 .sr(1)
2054 .m(3)
2055 .n(4)
2056 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002058 }
2059
2060 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
2061 TEST_REQUIRES_X86_SSE2;
2062 GemmMicrokernelTester()
2063 .mr(3)
2064 .nr(4)
2065 .kr(2)
2066 .sr(1)
2067 .m(3)
2068 .n(4)
2069 .k(8)
2070 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002072 }
2073
2074 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_strided_a) {
2075 TEST_REQUIRES_X86_SSE2;
2076 GemmMicrokernelTester()
2077 .mr(3)
2078 .nr(4)
2079 .kr(2)
2080 .sr(1)
2081 .m(3)
2082 .n(4)
2083 .k(8)
2084 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002085 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002086 }
2087
2088 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
2089 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002090 for (uint32_t n = 1; n <= 4; n++) {
2091 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002092 GemmMicrokernelTester()
2093 .mr(3)
2094 .nr(4)
2095 .kr(2)
2096 .sr(1)
2097 .m(m)
2098 .n(n)
2099 .k(8)
2100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002101 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002102 }
2103 }
2104 }
2105
2106 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
2107 TEST_REQUIRES_X86_SSE2;
2108 for (uint32_t m = 1; m <= 3; m++) {
2109 GemmMicrokernelTester()
2110 .mr(3)
2111 .nr(4)
2112 .kr(2)
2113 .sr(1)
2114 .m(m)
2115 .n(4)
2116 .k(8)
2117 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002119 }
2120 }
2121
2122 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
2123 TEST_REQUIRES_X86_SSE2;
2124 for (uint32_t n = 1; n <= 4; n++) {
2125 GemmMicrokernelTester()
2126 .mr(3)
2127 .nr(4)
2128 .kr(2)
2129 .sr(1)
2130 .m(3)
2131 .n(n)
2132 .k(8)
2133 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002134 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002135 }
2136 }
2137
2138 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
2139 TEST_REQUIRES_X86_SSE2;
2140 for (size_t k = 1; k < 8; k++) {
2141 GemmMicrokernelTester()
2142 .mr(3)
2143 .nr(4)
2144 .kr(2)
2145 .sr(1)
2146 .m(3)
2147 .n(4)
2148 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002149 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002150 }
2151 }
2152
2153 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_strided_a) {
2154 TEST_REQUIRES_X86_SSE2;
2155 for (size_t k = 1; k < 8; k++) {
2156 GemmMicrokernelTester()
2157 .mr(3)
2158 .nr(4)
2159 .kr(2)
2160 .sr(1)
2161 .m(3)
2162 .n(4)
2163 .k(k)
2164 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002165 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002166 }
2167 }
2168
2169 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
2170 TEST_REQUIRES_X86_SSE2;
2171 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002172 for (uint32_t n = 1; n <= 4; n++) {
2173 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002174 GemmMicrokernelTester()
2175 .mr(3)
2176 .nr(4)
2177 .kr(2)
2178 .sr(1)
2179 .m(m)
2180 .n(n)
2181 .k(k)
2182 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002183 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002184 }
2185 }
2186 }
2187 }
2188
2189 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
2190 TEST_REQUIRES_X86_SSE2;
2191 for (size_t k = 9; k < 16; k++) {
2192 GemmMicrokernelTester()
2193 .mr(3)
2194 .nr(4)
2195 .kr(2)
2196 .sr(1)
2197 .m(3)
2198 .n(4)
2199 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002200 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002201 }
2202 }
2203
2204 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_strided_a) {
2205 TEST_REQUIRES_X86_SSE2;
2206 for (size_t k = 9; k < 16; k++) {
2207 GemmMicrokernelTester()
2208 .mr(3)
2209 .nr(4)
2210 .kr(2)
2211 .sr(1)
2212 .m(3)
2213 .n(4)
2214 .k(k)
2215 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002216 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002217 }
2218 }
2219
2220 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
2221 TEST_REQUIRES_X86_SSE2;
2222 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002223 for (uint32_t n = 1; n <= 4; n++) {
2224 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002225 GemmMicrokernelTester()
2226 .mr(3)
2227 .nr(4)
2228 .kr(2)
2229 .sr(1)
2230 .m(m)
2231 .n(n)
2232 .k(k)
2233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002234 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002235 }
2236 }
2237 }
2238 }
2239
2240 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
2241 TEST_REQUIRES_X86_SSE2;
2242 for (size_t k = 16; k <= 80; k += 8) {
2243 GemmMicrokernelTester()
2244 .mr(3)
2245 .nr(4)
2246 .kr(2)
2247 .sr(1)
2248 .m(3)
2249 .n(4)
2250 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002251 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002252 }
2253 }
2254
2255 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_strided_a) {
2256 TEST_REQUIRES_X86_SSE2;
2257 for (size_t k = 16; k <= 80; k += 8) {
2258 GemmMicrokernelTester()
2259 .mr(3)
2260 .nr(4)
2261 .kr(2)
2262 .sr(1)
2263 .m(3)
2264 .n(4)
2265 .k(k)
2266 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002267 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002268 }
2269 }
2270
2271 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
2272 TEST_REQUIRES_X86_SSE2;
2273 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002274 for (uint32_t n = 1; n <= 4; n++) {
2275 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002276 GemmMicrokernelTester()
2277 .mr(3)
2278 .nr(4)
2279 .kr(2)
2280 .sr(1)
2281 .m(m)
2282 .n(n)
2283 .k(k)
2284 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002286 }
2287 }
2288 }
2289 }
2290
2291 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
2292 TEST_REQUIRES_X86_SSE2;
2293 for (uint32_t n = 5; n < 8; n++) {
2294 for (size_t k = 1; k <= 40; k += 9) {
2295 GemmMicrokernelTester()
2296 .mr(3)
2297 .nr(4)
2298 .kr(2)
2299 .sr(1)
2300 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002301 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002302 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002303 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002304 }
2305 }
2306 }
2307
2308 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
2309 TEST_REQUIRES_X86_SSE2;
2310 for (uint32_t n = 5; n < 8; n++) {
2311 for (size_t k = 1; k <= 40; k += 9) {
2312 GemmMicrokernelTester()
2313 .mr(3)
2314 .nr(4)
2315 .kr(2)
2316 .sr(1)
2317 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002318 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002319 .k(k)
2320 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002321 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002322 }
2323 }
2324 }
2325
2326 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_a) {
2327 TEST_REQUIRES_X86_SSE2;
2328 for (uint32_t n = 5; n < 8; n++) {
2329 for (size_t k = 1; k <= 40; k += 9) {
2330 GemmMicrokernelTester()
2331 .mr(3)
2332 .nr(4)
2333 .kr(2)
2334 .sr(1)
2335 .m(3)
2336 .n(n)
2337 .k(k)
2338 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002339 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002340 }
2341 }
2342 }
2343
2344 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
2345 TEST_REQUIRES_X86_SSE2;
2346 for (uint32_t n = 5; n < 8; n++) {
2347 for (size_t k = 1; k <= 40; k += 9) {
2348 for (uint32_t m = 1; m <= 3; m++) {
2349 GemmMicrokernelTester()
2350 .mr(3)
2351 .nr(4)
2352 .kr(2)
2353 .sr(1)
2354 .m(m)
2355 .n(n)
2356 .k(k)
2357 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002358 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002359 }
2360 }
2361 }
2362 }
2363
2364 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
2365 TEST_REQUIRES_X86_SSE2;
2366 for (uint32_t n = 8; n <= 12; n += 4) {
2367 for (size_t k = 1; k <= 40; k += 9) {
2368 GemmMicrokernelTester()
2369 .mr(3)
2370 .nr(4)
2371 .kr(2)
2372 .sr(1)
2373 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002374 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002375 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002377 }
2378 }
2379 }
2380
2381 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
2382 TEST_REQUIRES_X86_SSE2;
2383 for (uint32_t n = 8; n <= 12; n += 4) {
2384 for (size_t k = 1; k <= 40; k += 9) {
2385 GemmMicrokernelTester()
2386 .mr(3)
2387 .nr(4)
2388 .kr(2)
2389 .sr(1)
2390 .m(3)
2391 .n(n)
2392 .k(k)
2393 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002394 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002395 }
2396 }
2397 }
2398
2399 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_a) {
2400 TEST_REQUIRES_X86_SSE2;
2401 for (uint32_t n = 8; n <= 12; n += 4) {
2402 for (size_t k = 1; k <= 40; k += 9) {
2403 GemmMicrokernelTester()
2404 .mr(3)
2405 .nr(4)
2406 .kr(2)
2407 .sr(1)
2408 .m(3)
2409 .n(n)
2410 .k(k)
2411 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002412 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002413 }
2414 }
2415 }
2416
2417 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
2418 TEST_REQUIRES_X86_SSE2;
2419 for (uint32_t n = 8; n <= 12; n += 4) {
2420 for (size_t k = 1; k <= 40; k += 9) {
2421 for (uint32_t m = 1; m <= 3; m++) {
2422 GemmMicrokernelTester()
2423 .mr(3)
2424 .nr(4)
2425 .kr(2)
2426 .sr(1)
2427 .m(m)
2428 .n(n)
2429 .k(k)
2430 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002431 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002432 }
2433 }
2434 }
2435 }
2436
2437 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
2438 TEST_REQUIRES_X86_SSE2;
2439 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002440 for (uint32_t n = 1; n <= 4; n++) {
2441 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002442 GemmMicrokernelTester()
2443 .mr(3)
2444 .nr(4)
2445 .kr(2)
2446 .sr(1)
2447 .m(m)
2448 .n(n)
2449 .k(k)
2450 .cm_stride(7)
2451 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002452 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002453 }
2454 }
2455 }
2456 }
2457
2458 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
2459 TEST_REQUIRES_X86_SSE2;
2460 GemmMicrokernelTester()
2461 .mr(3)
2462 .nr(4)
2463 .kr(2)
2464 .sr(1)
2465 .m(3)
2466 .n(4)
2467 .k(8)
2468 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002469 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002470 }
2471
2472 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
2473 TEST_REQUIRES_X86_SSE2;
2474 GemmMicrokernelTester()
2475 .mr(3)
2476 .nr(4)
2477 .kr(2)
2478 .sr(1)
2479 .m(3)
2480 .n(4)
2481 .k(8)
2482 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002483 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002484 }
2485
2486 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
2487 TEST_REQUIRES_X86_SSE2;
2488 GemmMicrokernelTester()
2489 .mr(3)
2490 .nr(4)
2491 .kr(2)
2492 .sr(1)
2493 .m(3)
2494 .n(4)
2495 .k(8)
2496 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002497 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002498 }
2499
2500 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_a_zero_point) {
2501 TEST_REQUIRES_X86_SSE2;
2502 for (size_t k = 1; k <= 40; k += 9) {
2503 GemmMicrokernelTester()
2504 .mr(3)
2505 .nr(4)
2506 .kr(2)
2507 .sr(1)
2508 .m(3)
2509 .n(4)
2510 .k(k)
2511 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002512 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002513 }
2514 }
2515
2516 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_b_zero_point) {
2517 TEST_REQUIRES_X86_SSE2;
2518 for (size_t k = 1; k <= 40; k += 9) {
2519 GemmMicrokernelTester()
2520 .mr(3)
2521 .nr(4)
2522 .kr(2)
2523 .sr(1)
2524 .m(3)
2525 .n(4)
2526 .k(k)
2527 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002528 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002529 }
2530 }
2531
2532 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_zero_point) {
2533 TEST_REQUIRES_X86_SSE2;
2534 for (size_t k = 1; k <= 40; k += 9) {
2535 GemmMicrokernelTester()
2536 .mr(3)
2537 .nr(4)
2538 .kr(2)
2539 .sr(1)
2540 .m(3)
2541 .n(4)
2542 .k(k)
2543 .a_zero_point(0)
2544 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002545 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002546 }
2547 }
2548#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2549
2550
2551#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2552 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
2553 TEST_REQUIRES_X86_SSE41;
2554 GemmMicrokernelTester()
2555 .mr(3)
2556 .nr(4)
2557 .kr(2)
2558 .sr(1)
2559 .m(3)
2560 .n(4)
2561 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002563 }
2564
2565 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
2566 TEST_REQUIRES_X86_SSE41;
2567 GemmMicrokernelTester()
2568 .mr(3)
2569 .nr(4)
2570 .kr(2)
2571 .sr(1)
2572 .m(3)
2573 .n(4)
2574 .k(8)
2575 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002576 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002577 }
2578
2579 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_strided_a) {
2580 TEST_REQUIRES_X86_SSE41;
2581 GemmMicrokernelTester()
2582 .mr(3)
2583 .nr(4)
2584 .kr(2)
2585 .sr(1)
2586 .m(3)
2587 .n(4)
2588 .k(8)
2589 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002590 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002591 }
2592
2593 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
2594 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002595 for (uint32_t n = 1; n <= 4; n++) {
2596 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002597 GemmMicrokernelTester()
2598 .mr(3)
2599 .nr(4)
2600 .kr(2)
2601 .sr(1)
2602 .m(m)
2603 .n(n)
2604 .k(8)
2605 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002606 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002607 }
2608 }
2609 }
2610
2611 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
2612 TEST_REQUIRES_X86_SSE41;
2613 for (uint32_t m = 1; m <= 3; m++) {
2614 GemmMicrokernelTester()
2615 .mr(3)
2616 .nr(4)
2617 .kr(2)
2618 .sr(1)
2619 .m(m)
2620 .n(4)
2621 .k(8)
2622 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002624 }
2625 }
2626
2627 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
2628 TEST_REQUIRES_X86_SSE41;
2629 for (uint32_t n = 1; n <= 4; n++) {
2630 GemmMicrokernelTester()
2631 .mr(3)
2632 .nr(4)
2633 .kr(2)
2634 .sr(1)
2635 .m(3)
2636 .n(n)
2637 .k(8)
2638 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002639 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002640 }
2641 }
2642
2643 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
2644 TEST_REQUIRES_X86_SSE41;
2645 for (size_t k = 1; k < 8; k++) {
2646 GemmMicrokernelTester()
2647 .mr(3)
2648 .nr(4)
2649 .kr(2)
2650 .sr(1)
2651 .m(3)
2652 .n(4)
2653 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002654 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002655 }
2656 }
2657
2658 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_strided_a) {
2659 TEST_REQUIRES_X86_SSE41;
2660 for (size_t k = 1; k < 8; k++) {
2661 GemmMicrokernelTester()
2662 .mr(3)
2663 .nr(4)
2664 .kr(2)
2665 .sr(1)
2666 .m(3)
2667 .n(4)
2668 .k(k)
2669 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002670 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002671 }
2672 }
2673
2674 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
2675 TEST_REQUIRES_X86_SSE41;
2676 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002677 for (uint32_t n = 1; n <= 4; n++) {
2678 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002679 GemmMicrokernelTester()
2680 .mr(3)
2681 .nr(4)
2682 .kr(2)
2683 .sr(1)
2684 .m(m)
2685 .n(n)
2686 .k(k)
2687 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002689 }
2690 }
2691 }
2692 }
2693
2694 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
2695 TEST_REQUIRES_X86_SSE41;
2696 for (size_t k = 9; k < 16; k++) {
2697 GemmMicrokernelTester()
2698 .mr(3)
2699 .nr(4)
2700 .kr(2)
2701 .sr(1)
2702 .m(3)
2703 .n(4)
2704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002705 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002706 }
2707 }
2708
2709 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_strided_a) {
2710 TEST_REQUIRES_X86_SSE41;
2711 for (size_t k = 9; k < 16; k++) {
2712 GemmMicrokernelTester()
2713 .mr(3)
2714 .nr(4)
2715 .kr(2)
2716 .sr(1)
2717 .m(3)
2718 .n(4)
2719 .k(k)
2720 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002721 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002722 }
2723 }
2724
2725 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
2726 TEST_REQUIRES_X86_SSE41;
2727 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002728 for (uint32_t n = 1; n <= 4; n++) {
2729 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002730 GemmMicrokernelTester()
2731 .mr(3)
2732 .nr(4)
2733 .kr(2)
2734 .sr(1)
2735 .m(m)
2736 .n(n)
2737 .k(k)
2738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002739 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002740 }
2741 }
2742 }
2743 }
2744
2745 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
2746 TEST_REQUIRES_X86_SSE41;
2747 for (size_t k = 16; k <= 80; k += 8) {
2748 GemmMicrokernelTester()
2749 .mr(3)
2750 .nr(4)
2751 .kr(2)
2752 .sr(1)
2753 .m(3)
2754 .n(4)
2755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002756 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002757 }
2758 }
2759
2760 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_strided_a) {
2761 TEST_REQUIRES_X86_SSE41;
2762 for (size_t k = 16; k <= 80; k += 8) {
2763 GemmMicrokernelTester()
2764 .mr(3)
2765 .nr(4)
2766 .kr(2)
2767 .sr(1)
2768 .m(3)
2769 .n(4)
2770 .k(k)
2771 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002772 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002773 }
2774 }
2775
2776 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
2777 TEST_REQUIRES_X86_SSE41;
2778 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002779 for (uint32_t n = 1; n <= 4; n++) {
2780 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002781 GemmMicrokernelTester()
2782 .mr(3)
2783 .nr(4)
2784 .kr(2)
2785 .sr(1)
2786 .m(m)
2787 .n(n)
2788 .k(k)
2789 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002790 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002791 }
2792 }
2793 }
2794 }
2795
2796 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
2797 TEST_REQUIRES_X86_SSE41;
2798 for (uint32_t n = 5; n < 8; n++) {
2799 for (size_t k = 1; k <= 40; k += 9) {
2800 GemmMicrokernelTester()
2801 .mr(3)
2802 .nr(4)
2803 .kr(2)
2804 .sr(1)
2805 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002806 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002807 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002808 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002809 }
2810 }
2811 }
2812
2813 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
2814 TEST_REQUIRES_X86_SSE41;
2815 for (uint32_t n = 5; n < 8; n++) {
2816 for (size_t k = 1; k <= 40; k += 9) {
2817 GemmMicrokernelTester()
2818 .mr(3)
2819 .nr(4)
2820 .kr(2)
2821 .sr(1)
2822 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002823 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002824 .k(k)
2825 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002826 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002827 }
2828 }
2829 }
2830
2831 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_a) {
2832 TEST_REQUIRES_X86_SSE41;
2833 for (uint32_t n = 5; n < 8; n++) {
2834 for (size_t k = 1; k <= 40; k += 9) {
2835 GemmMicrokernelTester()
2836 .mr(3)
2837 .nr(4)
2838 .kr(2)
2839 .sr(1)
2840 .m(3)
2841 .n(n)
2842 .k(k)
2843 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002844 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002845 }
2846 }
2847 }
2848
2849 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
2850 TEST_REQUIRES_X86_SSE41;
2851 for (uint32_t n = 5; n < 8; n++) {
2852 for (size_t k = 1; k <= 40; k += 9) {
2853 for (uint32_t m = 1; m <= 3; m++) {
2854 GemmMicrokernelTester()
2855 .mr(3)
2856 .nr(4)
2857 .kr(2)
2858 .sr(1)
2859 .m(m)
2860 .n(n)
2861 .k(k)
2862 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002863 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002864 }
2865 }
2866 }
2867 }
2868
2869 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
2870 TEST_REQUIRES_X86_SSE41;
2871 for (uint32_t n = 8; n <= 12; n += 4) {
2872 for (size_t k = 1; k <= 40; k += 9) {
2873 GemmMicrokernelTester()
2874 .mr(3)
2875 .nr(4)
2876 .kr(2)
2877 .sr(1)
2878 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002879 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002880 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002881 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002882 }
2883 }
2884 }
2885
2886 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
2887 TEST_REQUIRES_X86_SSE41;
2888 for (uint32_t n = 8; n <= 12; n += 4) {
2889 for (size_t k = 1; k <= 40; k += 9) {
2890 GemmMicrokernelTester()
2891 .mr(3)
2892 .nr(4)
2893 .kr(2)
2894 .sr(1)
2895 .m(3)
2896 .n(n)
2897 .k(k)
2898 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002899 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002900 }
2901 }
2902 }
2903
2904 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_a) {
2905 TEST_REQUIRES_X86_SSE41;
2906 for (uint32_t n = 8; n <= 12; n += 4) {
2907 for (size_t k = 1; k <= 40; k += 9) {
2908 GemmMicrokernelTester()
2909 .mr(3)
2910 .nr(4)
2911 .kr(2)
2912 .sr(1)
2913 .m(3)
2914 .n(n)
2915 .k(k)
2916 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002918 }
2919 }
2920 }
2921
2922 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
2923 TEST_REQUIRES_X86_SSE41;
2924 for (uint32_t n = 8; n <= 12; n += 4) {
2925 for (size_t k = 1; k <= 40; k += 9) {
2926 for (uint32_t m = 1; m <= 3; m++) {
2927 GemmMicrokernelTester()
2928 .mr(3)
2929 .nr(4)
2930 .kr(2)
2931 .sr(1)
2932 .m(m)
2933 .n(n)
2934 .k(k)
2935 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002936 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002937 }
2938 }
2939 }
2940 }
2941
2942 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
2943 TEST_REQUIRES_X86_SSE41;
2944 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002945 for (uint32_t n = 1; n <= 4; n++) {
2946 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002947 GemmMicrokernelTester()
2948 .mr(3)
2949 .nr(4)
2950 .kr(2)
2951 .sr(1)
2952 .m(m)
2953 .n(n)
2954 .k(k)
2955 .cm_stride(7)
2956 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002957 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002958 }
2959 }
2960 }
2961 }
2962
2963 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
2964 TEST_REQUIRES_X86_SSE41;
2965 GemmMicrokernelTester()
2966 .mr(3)
2967 .nr(4)
2968 .kr(2)
2969 .sr(1)
2970 .m(3)
2971 .n(4)
2972 .k(8)
2973 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002974 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002975 }
2976
2977 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
2978 TEST_REQUIRES_X86_SSE41;
2979 GemmMicrokernelTester()
2980 .mr(3)
2981 .nr(4)
2982 .kr(2)
2983 .sr(1)
2984 .m(3)
2985 .n(4)
2986 .k(8)
2987 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002988 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002989 }
2990
2991 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
2992 TEST_REQUIRES_X86_SSE41;
2993 GemmMicrokernelTester()
2994 .mr(3)
2995 .nr(4)
2996 .kr(2)
2997 .sr(1)
2998 .m(3)
2999 .n(4)
3000 .k(8)
3001 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003002 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003003 }
3004
3005 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_a_zero_point) {
3006 TEST_REQUIRES_X86_SSE41;
3007 for (size_t k = 1; k <= 40; k += 9) {
3008 GemmMicrokernelTester()
3009 .mr(3)
3010 .nr(4)
3011 .kr(2)
3012 .sr(1)
3013 .m(3)
3014 .n(4)
3015 .k(k)
3016 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003018 }
3019 }
3020
3021 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_b_zero_point) {
3022 TEST_REQUIRES_X86_SSE41;
3023 for (size_t k = 1; k <= 40; k += 9) {
3024 GemmMicrokernelTester()
3025 .mr(3)
3026 .nr(4)
3027 .kr(2)
3028 .sr(1)
3029 .m(3)
3030 .n(4)
3031 .k(k)
3032 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003033 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003034 }
3035 }
3036
3037 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_zero_point) {
3038 TEST_REQUIRES_X86_SSE41;
3039 for (size_t k = 1; k <= 40; k += 9) {
3040 GemmMicrokernelTester()
3041 .mr(3)
3042 .nr(4)
3043 .kr(2)
3044 .sr(1)
3045 .m(3)
3046 .n(4)
3047 .k(k)
3048 .a_zero_point(0)
3049 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003050 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003051 }
3052 }
3053#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3054
3055
3056#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3057 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
3058 TEST_REQUIRES_X86_AVX;
3059 GemmMicrokernelTester()
3060 .mr(2)
3061 .nr(4)
3062 .kr(2)
3063 .sr(1)
3064 .m(2)
3065 .n(4)
3066 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003068 }
3069
3070 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
3071 TEST_REQUIRES_X86_AVX;
3072 GemmMicrokernelTester()
3073 .mr(2)
3074 .nr(4)
3075 .kr(2)
3076 .sr(1)
3077 .m(2)
3078 .n(4)
3079 .k(8)
3080 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003081 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003082 }
3083
3084 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_strided_a) {
3085 TEST_REQUIRES_X86_AVX;
3086 GemmMicrokernelTester()
3087 .mr(2)
3088 .nr(4)
3089 .kr(2)
3090 .sr(1)
3091 .m(2)
3092 .n(4)
3093 .k(8)
3094 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003095 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003096 }
3097
3098 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
3099 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003100 for (uint32_t n = 1; n <= 4; n++) {
3101 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003102 GemmMicrokernelTester()
3103 .mr(2)
3104 .nr(4)
3105 .kr(2)
3106 .sr(1)
3107 .m(m)
3108 .n(n)
3109 .k(8)
3110 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003111 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003112 }
3113 }
3114 }
3115
3116 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
3117 TEST_REQUIRES_X86_AVX;
3118 for (uint32_t m = 1; m <= 2; m++) {
3119 GemmMicrokernelTester()
3120 .mr(2)
3121 .nr(4)
3122 .kr(2)
3123 .sr(1)
3124 .m(m)
3125 .n(4)
3126 .k(8)
3127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003129 }
3130 }
3131
3132 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
3133 TEST_REQUIRES_X86_AVX;
3134 for (uint32_t n = 1; n <= 4; n++) {
3135 GemmMicrokernelTester()
3136 .mr(2)
3137 .nr(4)
3138 .kr(2)
3139 .sr(1)
3140 .m(2)
3141 .n(n)
3142 .k(8)
3143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003144 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003145 }
3146 }
3147
3148 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
3149 TEST_REQUIRES_X86_AVX;
3150 for (size_t k = 1; k < 8; k++) {
3151 GemmMicrokernelTester()
3152 .mr(2)
3153 .nr(4)
3154 .kr(2)
3155 .sr(1)
3156 .m(2)
3157 .n(4)
3158 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003159 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003160 }
3161 }
3162
3163 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_strided_a) {
3164 TEST_REQUIRES_X86_AVX;
3165 for (size_t k = 1; k < 8; k++) {
3166 GemmMicrokernelTester()
3167 .mr(2)
3168 .nr(4)
3169 .kr(2)
3170 .sr(1)
3171 .m(2)
3172 .n(4)
3173 .k(k)
3174 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003176 }
3177 }
3178
3179 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
3180 TEST_REQUIRES_X86_AVX;
3181 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003182 for (uint32_t n = 1; n <= 4; n++) {
3183 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003184 GemmMicrokernelTester()
3185 .mr(2)
3186 .nr(4)
3187 .kr(2)
3188 .sr(1)
3189 .m(m)
3190 .n(n)
3191 .k(k)
3192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003193 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003194 }
3195 }
3196 }
3197 }
3198
3199 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
3200 TEST_REQUIRES_X86_AVX;
3201 for (size_t k = 9; k < 16; k++) {
3202 GemmMicrokernelTester()
3203 .mr(2)
3204 .nr(4)
3205 .kr(2)
3206 .sr(1)
3207 .m(2)
3208 .n(4)
3209 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003210 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003211 }
3212 }
3213
3214 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_strided_a) {
3215 TEST_REQUIRES_X86_AVX;
3216 for (size_t k = 9; k < 16; k++) {
3217 GemmMicrokernelTester()
3218 .mr(2)
3219 .nr(4)
3220 .kr(2)
3221 .sr(1)
3222 .m(2)
3223 .n(4)
3224 .k(k)
3225 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003226 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003227 }
3228 }
3229
3230 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
3231 TEST_REQUIRES_X86_AVX;
3232 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003233 for (uint32_t n = 1; n <= 4; n++) {
3234 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003235 GemmMicrokernelTester()
3236 .mr(2)
3237 .nr(4)
3238 .kr(2)
3239 .sr(1)
3240 .m(m)
3241 .n(n)
3242 .k(k)
3243 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003244 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003245 }
3246 }
3247 }
3248 }
3249
3250 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
3251 TEST_REQUIRES_X86_AVX;
3252 for (size_t k = 16; k <= 80; k += 8) {
3253 GemmMicrokernelTester()
3254 .mr(2)
3255 .nr(4)
3256 .kr(2)
3257 .sr(1)
3258 .m(2)
3259 .n(4)
3260 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003261 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003262 }
3263 }
3264
3265 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_strided_a) {
3266 TEST_REQUIRES_X86_AVX;
3267 for (size_t k = 16; k <= 80; k += 8) {
3268 GemmMicrokernelTester()
3269 .mr(2)
3270 .nr(4)
3271 .kr(2)
3272 .sr(1)
3273 .m(2)
3274 .n(4)
3275 .k(k)
3276 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003277 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003278 }
3279 }
3280
3281 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
3282 TEST_REQUIRES_X86_AVX;
3283 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003284 for (uint32_t n = 1; n <= 4; n++) {
3285 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003286 GemmMicrokernelTester()
3287 .mr(2)
3288 .nr(4)
3289 .kr(2)
3290 .sr(1)
3291 .m(m)
3292 .n(n)
3293 .k(k)
3294 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003296 }
3297 }
3298 }
3299 }
3300
3301 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
3302 TEST_REQUIRES_X86_AVX;
3303 for (uint32_t n = 5; n < 8; n++) {
3304 for (size_t k = 1; k <= 40; k += 9) {
3305 GemmMicrokernelTester()
3306 .mr(2)
3307 .nr(4)
3308 .kr(2)
3309 .sr(1)
3310 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003311 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003312 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003313 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003314 }
3315 }
3316 }
3317
3318 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
3319 TEST_REQUIRES_X86_AVX;
3320 for (uint32_t n = 5; n < 8; n++) {
3321 for (size_t k = 1; k <= 40; k += 9) {
3322 GemmMicrokernelTester()
3323 .mr(2)
3324 .nr(4)
3325 .kr(2)
3326 .sr(1)
3327 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003328 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003329 .k(k)
3330 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003331 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003332 }
3333 }
3334 }
3335
3336 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_a) {
3337 TEST_REQUIRES_X86_AVX;
3338 for (uint32_t n = 5; n < 8; n++) {
3339 for (size_t k = 1; k <= 40; k += 9) {
3340 GemmMicrokernelTester()
3341 .mr(2)
3342 .nr(4)
3343 .kr(2)
3344 .sr(1)
3345 .m(2)
3346 .n(n)
3347 .k(k)
3348 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003349 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003350 }
3351 }
3352 }
3353
3354 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
3355 TEST_REQUIRES_X86_AVX;
3356 for (uint32_t n = 5; n < 8; n++) {
3357 for (size_t k = 1; k <= 40; k += 9) {
3358 for (uint32_t m = 1; m <= 2; m++) {
3359 GemmMicrokernelTester()
3360 .mr(2)
3361 .nr(4)
3362 .kr(2)
3363 .sr(1)
3364 .m(m)
3365 .n(n)
3366 .k(k)
3367 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003368 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003369 }
3370 }
3371 }
3372 }
3373
3374 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
3375 TEST_REQUIRES_X86_AVX;
3376 for (uint32_t n = 8; n <= 12; n += 4) {
3377 for (size_t k = 1; k <= 40; k += 9) {
3378 GemmMicrokernelTester()
3379 .mr(2)
3380 .nr(4)
3381 .kr(2)
3382 .sr(1)
3383 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003384 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003385 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003386 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003387 }
3388 }
3389 }
3390
3391 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
3392 TEST_REQUIRES_X86_AVX;
3393 for (uint32_t n = 8; n <= 12; n += 4) {
3394 for (size_t k = 1; k <= 40; k += 9) {
3395 GemmMicrokernelTester()
3396 .mr(2)
3397 .nr(4)
3398 .kr(2)
3399 .sr(1)
3400 .m(2)
3401 .n(n)
3402 .k(k)
3403 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003404 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003405 }
3406 }
3407 }
3408
3409 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_a) {
3410 TEST_REQUIRES_X86_AVX;
3411 for (uint32_t n = 8; n <= 12; n += 4) {
3412 for (size_t k = 1; k <= 40; k += 9) {
3413 GemmMicrokernelTester()
3414 .mr(2)
3415 .nr(4)
3416 .kr(2)
3417 .sr(1)
3418 .m(2)
3419 .n(n)
3420 .k(k)
3421 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003422 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003423 }
3424 }
3425 }
3426
3427 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
3428 TEST_REQUIRES_X86_AVX;
3429 for (uint32_t n = 8; n <= 12; n += 4) {
3430 for (size_t k = 1; k <= 40; k += 9) {
3431 for (uint32_t m = 1; m <= 2; m++) {
3432 GemmMicrokernelTester()
3433 .mr(2)
3434 .nr(4)
3435 .kr(2)
3436 .sr(1)
3437 .m(m)
3438 .n(n)
3439 .k(k)
3440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003441 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003442 }
3443 }
3444 }
3445 }
3446
3447 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
3448 TEST_REQUIRES_X86_AVX;
3449 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003450 for (uint32_t n = 1; n <= 4; n++) {
3451 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003452 GemmMicrokernelTester()
3453 .mr(2)
3454 .nr(4)
3455 .kr(2)
3456 .sr(1)
3457 .m(m)
3458 .n(n)
3459 .k(k)
3460 .cm_stride(7)
3461 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003462 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003463 }
3464 }
3465 }
3466 }
3467
3468 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
3469 TEST_REQUIRES_X86_AVX;
3470 GemmMicrokernelTester()
3471 .mr(2)
3472 .nr(4)
3473 .kr(2)
3474 .sr(1)
3475 .m(2)
3476 .n(4)
3477 .k(8)
3478 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003479 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003480 }
3481
3482 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
3483 TEST_REQUIRES_X86_AVX;
3484 GemmMicrokernelTester()
3485 .mr(2)
3486 .nr(4)
3487 .kr(2)
3488 .sr(1)
3489 .m(2)
3490 .n(4)
3491 .k(8)
3492 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003493 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003494 }
3495
3496 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
3497 TEST_REQUIRES_X86_AVX;
3498 GemmMicrokernelTester()
3499 .mr(2)
3500 .nr(4)
3501 .kr(2)
3502 .sr(1)
3503 .m(2)
3504 .n(4)
3505 .k(8)
3506 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003507 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003508 }
3509
3510 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_a_zero_point) {
3511 TEST_REQUIRES_X86_AVX;
3512 for (size_t k = 1; k <= 40; k += 9) {
3513 GemmMicrokernelTester()
3514 .mr(2)
3515 .nr(4)
3516 .kr(2)
3517 .sr(1)
3518 .m(2)
3519 .n(4)
3520 .k(k)
3521 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003522 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003523 }
3524 }
3525
3526 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_b_zero_point) {
3527 TEST_REQUIRES_X86_AVX;
3528 for (size_t k = 1; k <= 40; k += 9) {
3529 GemmMicrokernelTester()
3530 .mr(2)
3531 .nr(4)
3532 .kr(2)
3533 .sr(1)
3534 .m(2)
3535 .n(4)
3536 .k(k)
3537 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003538 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003539 }
3540 }
3541
3542 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_zero_point) {
3543 TEST_REQUIRES_X86_AVX;
3544 for (size_t k = 1; k <= 40; k += 9) {
3545 GemmMicrokernelTester()
3546 .mr(2)
3547 .nr(4)
3548 .kr(2)
3549 .sr(1)
3550 .m(2)
3551 .n(4)
3552 .k(k)
3553 .a_zero_point(0)
3554 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003555 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003556 }
3557 }
3558#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3559
3560
3561#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3562 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
3563 TEST_REQUIRES_X86_AVX;
3564 GemmMicrokernelTester()
3565 .mr(3)
3566 .nr(4)
3567 .kr(2)
3568 .sr(1)
3569 .m(3)
3570 .n(4)
3571 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003572 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003573 }
3574
3575 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
3576 TEST_REQUIRES_X86_AVX;
3577 GemmMicrokernelTester()
3578 .mr(3)
3579 .nr(4)
3580 .kr(2)
3581 .sr(1)
3582 .m(3)
3583 .n(4)
3584 .k(8)
3585 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003587 }
3588
3589 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_strided_a) {
3590 TEST_REQUIRES_X86_AVX;
3591 GemmMicrokernelTester()
3592 .mr(3)
3593 .nr(4)
3594 .kr(2)
3595 .sr(1)
3596 .m(3)
3597 .n(4)
3598 .k(8)
3599 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003600 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003601 }
3602
3603 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
3604 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003605 for (uint32_t n = 1; n <= 4; n++) {
3606 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003607 GemmMicrokernelTester()
3608 .mr(3)
3609 .nr(4)
3610 .kr(2)
3611 .sr(1)
3612 .m(m)
3613 .n(n)
3614 .k(8)
3615 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003616 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003617 }
3618 }
3619 }
3620
3621 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
3622 TEST_REQUIRES_X86_AVX;
3623 for (uint32_t m = 1; m <= 3; m++) {
3624 GemmMicrokernelTester()
3625 .mr(3)
3626 .nr(4)
3627 .kr(2)
3628 .sr(1)
3629 .m(m)
3630 .n(4)
3631 .k(8)
3632 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003633 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003634 }
3635 }
3636
3637 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
3638 TEST_REQUIRES_X86_AVX;
3639 for (uint32_t n = 1; n <= 4; n++) {
3640 GemmMicrokernelTester()
3641 .mr(3)
3642 .nr(4)
3643 .kr(2)
3644 .sr(1)
3645 .m(3)
3646 .n(n)
3647 .k(8)
3648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003649 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003650 }
3651 }
3652
3653 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
3654 TEST_REQUIRES_X86_AVX;
3655 for (size_t k = 1; k < 8; k++) {
3656 GemmMicrokernelTester()
3657 .mr(3)
3658 .nr(4)
3659 .kr(2)
3660 .sr(1)
3661 .m(3)
3662 .n(4)
3663 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003664 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003665 }
3666 }
3667
3668 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_strided_a) {
3669 TEST_REQUIRES_X86_AVX;
3670 for (size_t k = 1; k < 8; k++) {
3671 GemmMicrokernelTester()
3672 .mr(3)
3673 .nr(4)
3674 .kr(2)
3675 .sr(1)
3676 .m(3)
3677 .n(4)
3678 .k(k)
3679 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003680 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003681 }
3682 }
3683
3684 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
3685 TEST_REQUIRES_X86_AVX;
3686 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003687 for (uint32_t n = 1; n <= 4; n++) {
3688 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003689 GemmMicrokernelTester()
3690 .mr(3)
3691 .nr(4)
3692 .kr(2)
3693 .sr(1)
3694 .m(m)
3695 .n(n)
3696 .k(k)
3697 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003698 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003699 }
3700 }
3701 }
3702 }
3703
3704 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
3705 TEST_REQUIRES_X86_AVX;
3706 for (size_t k = 9; k < 16; k++) {
3707 GemmMicrokernelTester()
3708 .mr(3)
3709 .nr(4)
3710 .kr(2)
3711 .sr(1)
3712 .m(3)
3713 .n(4)
3714 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003715 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003716 }
3717 }
3718
3719 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_strided_a) {
3720 TEST_REQUIRES_X86_AVX;
3721 for (size_t k = 9; k < 16; k++) {
3722 GemmMicrokernelTester()
3723 .mr(3)
3724 .nr(4)
3725 .kr(2)
3726 .sr(1)
3727 .m(3)
3728 .n(4)
3729 .k(k)
3730 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003731 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003732 }
3733 }
3734
3735 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
3736 TEST_REQUIRES_X86_AVX;
3737 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003738 for (uint32_t n = 1; n <= 4; n++) {
3739 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003740 GemmMicrokernelTester()
3741 .mr(3)
3742 .nr(4)
3743 .kr(2)
3744 .sr(1)
3745 .m(m)
3746 .n(n)
3747 .k(k)
3748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003749 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003750 }
3751 }
3752 }
3753 }
3754
3755 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
3756 TEST_REQUIRES_X86_AVX;
3757 for (size_t k = 16; k <= 80; k += 8) {
3758 GemmMicrokernelTester()
3759 .mr(3)
3760 .nr(4)
3761 .kr(2)
3762 .sr(1)
3763 .m(3)
3764 .n(4)
3765 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003766 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003767 }
3768 }
3769
3770 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_strided_a) {
3771 TEST_REQUIRES_X86_AVX;
3772 for (size_t k = 16; k <= 80; k += 8) {
3773 GemmMicrokernelTester()
3774 .mr(3)
3775 .nr(4)
3776 .kr(2)
3777 .sr(1)
3778 .m(3)
3779 .n(4)
3780 .k(k)
3781 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003782 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003783 }
3784 }
3785
3786 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
3787 TEST_REQUIRES_X86_AVX;
3788 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003789 for (uint32_t n = 1; n <= 4; n++) {
3790 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003791 GemmMicrokernelTester()
3792 .mr(3)
3793 .nr(4)
3794 .kr(2)
3795 .sr(1)
3796 .m(m)
3797 .n(n)
3798 .k(k)
3799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003800 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003801 }
3802 }
3803 }
3804 }
3805
3806 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
3807 TEST_REQUIRES_X86_AVX;
3808 for (uint32_t n = 5; n < 8; n++) {
3809 for (size_t k = 1; k <= 40; k += 9) {
3810 GemmMicrokernelTester()
3811 .mr(3)
3812 .nr(4)
3813 .kr(2)
3814 .sr(1)
3815 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003816 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003817 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003818 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003819 }
3820 }
3821 }
3822
3823 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
3824 TEST_REQUIRES_X86_AVX;
3825 for (uint32_t n = 5; n < 8; n++) {
3826 for (size_t k = 1; k <= 40; k += 9) {
3827 GemmMicrokernelTester()
3828 .mr(3)
3829 .nr(4)
3830 .kr(2)
3831 .sr(1)
3832 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003833 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003834 .k(k)
3835 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003836 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003837 }
3838 }
3839 }
3840
3841 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_a) {
3842 TEST_REQUIRES_X86_AVX;
3843 for (uint32_t n = 5; n < 8; n++) {
3844 for (size_t k = 1; k <= 40; k += 9) {
3845 GemmMicrokernelTester()
3846 .mr(3)
3847 .nr(4)
3848 .kr(2)
3849 .sr(1)
3850 .m(3)
3851 .n(n)
3852 .k(k)
3853 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003854 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003855 }
3856 }
3857 }
3858
3859 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
3860 TEST_REQUIRES_X86_AVX;
3861 for (uint32_t n = 5; n < 8; n++) {
3862 for (size_t k = 1; k <= 40; k += 9) {
3863 for (uint32_t m = 1; m <= 3; m++) {
3864 GemmMicrokernelTester()
3865 .mr(3)
3866 .nr(4)
3867 .kr(2)
3868 .sr(1)
3869 .m(m)
3870 .n(n)
3871 .k(k)
3872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003873 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003874 }
3875 }
3876 }
3877 }
3878
3879 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
3880 TEST_REQUIRES_X86_AVX;
3881 for (uint32_t n = 8; n <= 12; n += 4) {
3882 for (size_t k = 1; k <= 40; k += 9) {
3883 GemmMicrokernelTester()
3884 .mr(3)
3885 .nr(4)
3886 .kr(2)
3887 .sr(1)
3888 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003889 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003890 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003891 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003892 }
3893 }
3894 }
3895
3896 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
3897 TEST_REQUIRES_X86_AVX;
3898 for (uint32_t n = 8; n <= 12; n += 4) {
3899 for (size_t k = 1; k <= 40; k += 9) {
3900 GemmMicrokernelTester()
3901 .mr(3)
3902 .nr(4)
3903 .kr(2)
3904 .sr(1)
3905 .m(3)
3906 .n(n)
3907 .k(k)
3908 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003909 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003910 }
3911 }
3912 }
3913
3914 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_a) {
3915 TEST_REQUIRES_X86_AVX;
3916 for (uint32_t n = 8; n <= 12; n += 4) {
3917 for (size_t k = 1; k <= 40; k += 9) {
3918 GemmMicrokernelTester()
3919 .mr(3)
3920 .nr(4)
3921 .kr(2)
3922 .sr(1)
3923 .m(3)
3924 .n(n)
3925 .k(k)
3926 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003927 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003928 }
3929 }
3930 }
3931
3932 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
3933 TEST_REQUIRES_X86_AVX;
3934 for (uint32_t n = 8; n <= 12; n += 4) {
3935 for (size_t k = 1; k <= 40; k += 9) {
3936 for (uint32_t m = 1; m <= 3; m++) {
3937 GemmMicrokernelTester()
3938 .mr(3)
3939 .nr(4)
3940 .kr(2)
3941 .sr(1)
3942 .m(m)
3943 .n(n)
3944 .k(k)
3945 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003946 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003947 }
3948 }
3949 }
3950 }
3951
3952 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
3953 TEST_REQUIRES_X86_AVX;
3954 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003955 for (uint32_t n = 1; n <= 4; n++) {
3956 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003957 GemmMicrokernelTester()
3958 .mr(3)
3959 .nr(4)
3960 .kr(2)
3961 .sr(1)
3962 .m(m)
3963 .n(n)
3964 .k(k)
3965 .cm_stride(7)
3966 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003968 }
3969 }
3970 }
3971 }
3972
3973 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
3974 TEST_REQUIRES_X86_AVX;
3975 GemmMicrokernelTester()
3976 .mr(3)
3977 .nr(4)
3978 .kr(2)
3979 .sr(1)
3980 .m(3)
3981 .n(4)
3982 .k(8)
3983 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003984 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003985 }
3986
3987 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
3988 TEST_REQUIRES_X86_AVX;
3989 GemmMicrokernelTester()
3990 .mr(3)
3991 .nr(4)
3992 .kr(2)
3993 .sr(1)
3994 .m(3)
3995 .n(4)
3996 .k(8)
3997 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003998 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003999 }
4000
4001 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
4002 TEST_REQUIRES_X86_AVX;
4003 GemmMicrokernelTester()
4004 .mr(3)
4005 .nr(4)
4006 .kr(2)
4007 .sr(1)
4008 .m(3)
4009 .n(4)
4010 .k(8)
4011 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004012 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004013 }
4014
4015 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_a_zero_point) {
4016 TEST_REQUIRES_X86_AVX;
4017 for (size_t k = 1; k <= 40; k += 9) {
4018 GemmMicrokernelTester()
4019 .mr(3)
4020 .nr(4)
4021 .kr(2)
4022 .sr(1)
4023 .m(3)
4024 .n(4)
4025 .k(k)
4026 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004027 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004028 }
4029 }
4030
4031 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_b_zero_point) {
4032 TEST_REQUIRES_X86_AVX;
4033 for (size_t k = 1; k <= 40; k += 9) {
4034 GemmMicrokernelTester()
4035 .mr(3)
4036 .nr(4)
4037 .kr(2)
4038 .sr(1)
4039 .m(3)
4040 .n(4)
4041 .k(k)
4042 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004043 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004044 }
4045 }
4046
4047 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_zero_point) {
4048 TEST_REQUIRES_X86_AVX;
4049 for (size_t k = 1; k <= 40; k += 9) {
4050 GemmMicrokernelTester()
4051 .mr(3)
4052 .nr(4)
4053 .kr(2)
4054 .sr(1)
4055 .m(3)
4056 .n(4)
4057 .k(k)
4058 .a_zero_point(0)
4059 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004060 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004061 }
4062 }
4063#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4064
4065
4066#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4067 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
4068 TEST_REQUIRES_X86_AVX;
4069 GemmMicrokernelTester()
4070 .mr(4)
4071 .nr(4)
4072 .kr(2)
4073 .sr(1)
4074 .m(4)
4075 .n(4)
4076 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004077 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004078 }
4079
4080 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
4081 TEST_REQUIRES_X86_AVX;
4082 GemmMicrokernelTester()
4083 .mr(4)
4084 .nr(4)
4085 .kr(2)
4086 .sr(1)
4087 .m(4)
4088 .n(4)
4089 .k(8)
4090 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004091 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004092 }
4093
4094 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_strided_a) {
4095 TEST_REQUIRES_X86_AVX;
4096 GemmMicrokernelTester()
4097 .mr(4)
4098 .nr(4)
4099 .kr(2)
4100 .sr(1)
4101 .m(4)
4102 .n(4)
4103 .k(8)
4104 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004105 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004106 }
4107
4108 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
4109 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004110 for (uint32_t n = 1; n <= 4; n++) {
4111 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004112 GemmMicrokernelTester()
4113 .mr(4)
4114 .nr(4)
4115 .kr(2)
4116 .sr(1)
4117 .m(m)
4118 .n(n)
4119 .k(8)
4120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004121 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004122 }
4123 }
4124 }
4125
4126 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
4127 TEST_REQUIRES_X86_AVX;
4128 for (uint32_t m = 1; m <= 4; m++) {
4129 GemmMicrokernelTester()
4130 .mr(4)
4131 .nr(4)
4132 .kr(2)
4133 .sr(1)
4134 .m(m)
4135 .n(4)
4136 .k(8)
4137 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004138 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004139 }
4140 }
4141
4142 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
4143 TEST_REQUIRES_X86_AVX;
4144 for (uint32_t n = 1; n <= 4; n++) {
4145 GemmMicrokernelTester()
4146 .mr(4)
4147 .nr(4)
4148 .kr(2)
4149 .sr(1)
4150 .m(4)
4151 .n(n)
4152 .k(8)
4153 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004154 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004155 }
4156 }
4157
4158 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
4159 TEST_REQUIRES_X86_AVX;
4160 for (size_t k = 1; k < 8; k++) {
4161 GemmMicrokernelTester()
4162 .mr(4)
4163 .nr(4)
4164 .kr(2)
4165 .sr(1)
4166 .m(4)
4167 .n(4)
4168 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004169 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004170 }
4171 }
4172
4173 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_strided_a) {
4174 TEST_REQUIRES_X86_AVX;
4175 for (size_t k = 1; k < 8; k++) {
4176 GemmMicrokernelTester()
4177 .mr(4)
4178 .nr(4)
4179 .kr(2)
4180 .sr(1)
4181 .m(4)
4182 .n(4)
4183 .k(k)
4184 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004185 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004186 }
4187 }
4188
4189 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
4190 TEST_REQUIRES_X86_AVX;
4191 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004192 for (uint32_t n = 1; n <= 4; n++) {
4193 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004194 GemmMicrokernelTester()
4195 .mr(4)
4196 .nr(4)
4197 .kr(2)
4198 .sr(1)
4199 .m(m)
4200 .n(n)
4201 .k(k)
4202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004203 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004204 }
4205 }
4206 }
4207 }
4208
4209 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
4210 TEST_REQUIRES_X86_AVX;
4211 for (size_t k = 9; k < 16; k++) {
4212 GemmMicrokernelTester()
4213 .mr(4)
4214 .nr(4)
4215 .kr(2)
4216 .sr(1)
4217 .m(4)
4218 .n(4)
4219 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004220 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004221 }
4222 }
4223
4224 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_strided_a) {
4225 TEST_REQUIRES_X86_AVX;
4226 for (size_t k = 9; k < 16; k++) {
4227 GemmMicrokernelTester()
4228 .mr(4)
4229 .nr(4)
4230 .kr(2)
4231 .sr(1)
4232 .m(4)
4233 .n(4)
4234 .k(k)
4235 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004236 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004237 }
4238 }
4239
4240 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
4241 TEST_REQUIRES_X86_AVX;
4242 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004243 for (uint32_t n = 1; n <= 4; n++) {
4244 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004245 GemmMicrokernelTester()
4246 .mr(4)
4247 .nr(4)
4248 .kr(2)
4249 .sr(1)
4250 .m(m)
4251 .n(n)
4252 .k(k)
4253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004254 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004255 }
4256 }
4257 }
4258 }
4259
4260 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
4261 TEST_REQUIRES_X86_AVX;
4262 for (size_t k = 16; k <= 80; k += 8) {
4263 GemmMicrokernelTester()
4264 .mr(4)
4265 .nr(4)
4266 .kr(2)
4267 .sr(1)
4268 .m(4)
4269 .n(4)
4270 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004271 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004272 }
4273 }
4274
4275 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_strided_a) {
4276 TEST_REQUIRES_X86_AVX;
4277 for (size_t k = 16; k <= 80; k += 8) {
4278 GemmMicrokernelTester()
4279 .mr(4)
4280 .nr(4)
4281 .kr(2)
4282 .sr(1)
4283 .m(4)
4284 .n(4)
4285 .k(k)
4286 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004287 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004288 }
4289 }
4290
4291 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
4292 TEST_REQUIRES_X86_AVX;
4293 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004294 for (uint32_t n = 1; n <= 4; n++) {
4295 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004296 GemmMicrokernelTester()
4297 .mr(4)
4298 .nr(4)
4299 .kr(2)
4300 .sr(1)
4301 .m(m)
4302 .n(n)
4303 .k(k)
4304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004305 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004306 }
4307 }
4308 }
4309 }
4310
4311 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
4312 TEST_REQUIRES_X86_AVX;
4313 for (uint32_t n = 5; n < 8; n++) {
4314 for (size_t k = 1; k <= 40; k += 9) {
4315 GemmMicrokernelTester()
4316 .mr(4)
4317 .nr(4)
4318 .kr(2)
4319 .sr(1)
4320 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004321 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004322 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004323 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004324 }
4325 }
4326 }
4327
4328 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
4329 TEST_REQUIRES_X86_AVX;
4330 for (uint32_t n = 5; n < 8; n++) {
4331 for (size_t k = 1; k <= 40; k += 9) {
4332 GemmMicrokernelTester()
4333 .mr(4)
4334 .nr(4)
4335 .kr(2)
4336 .sr(1)
4337 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004338 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004339 .k(k)
4340 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004341 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004342 }
4343 }
4344 }
4345
4346 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_a) {
4347 TEST_REQUIRES_X86_AVX;
4348 for (uint32_t n = 5; n < 8; n++) {
4349 for (size_t k = 1; k <= 40; k += 9) {
4350 GemmMicrokernelTester()
4351 .mr(4)
4352 .nr(4)
4353 .kr(2)
4354 .sr(1)
4355 .m(4)
4356 .n(n)
4357 .k(k)
4358 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004359 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004360 }
4361 }
4362 }
4363
4364 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
4365 TEST_REQUIRES_X86_AVX;
4366 for (uint32_t n = 5; n < 8; n++) {
4367 for (size_t k = 1; k <= 40; k += 9) {
4368 for (uint32_t m = 1; m <= 4; m++) {
4369 GemmMicrokernelTester()
4370 .mr(4)
4371 .nr(4)
4372 .kr(2)
4373 .sr(1)
4374 .m(m)
4375 .n(n)
4376 .k(k)
4377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004378 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004379 }
4380 }
4381 }
4382 }
4383
4384 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
4385 TEST_REQUIRES_X86_AVX;
4386 for (uint32_t n = 8; n <= 12; n += 4) {
4387 for (size_t k = 1; k <= 40; k += 9) {
4388 GemmMicrokernelTester()
4389 .mr(4)
4390 .nr(4)
4391 .kr(2)
4392 .sr(1)
4393 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004394 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004395 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004396 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004397 }
4398 }
4399 }
4400
4401 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
4402 TEST_REQUIRES_X86_AVX;
4403 for (uint32_t n = 8; n <= 12; n += 4) {
4404 for (size_t k = 1; k <= 40; k += 9) {
4405 GemmMicrokernelTester()
4406 .mr(4)
4407 .nr(4)
4408 .kr(2)
4409 .sr(1)
4410 .m(4)
4411 .n(n)
4412 .k(k)
4413 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004414 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004415 }
4416 }
4417 }
4418
4419 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_a) {
4420 TEST_REQUIRES_X86_AVX;
4421 for (uint32_t n = 8; n <= 12; n += 4) {
4422 for (size_t k = 1; k <= 40; k += 9) {
4423 GemmMicrokernelTester()
4424 .mr(4)
4425 .nr(4)
4426 .kr(2)
4427 .sr(1)
4428 .m(4)
4429 .n(n)
4430 .k(k)
4431 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004433 }
4434 }
4435 }
4436
4437 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
4438 TEST_REQUIRES_X86_AVX;
4439 for (uint32_t n = 8; n <= 12; n += 4) {
4440 for (size_t k = 1; k <= 40; k += 9) {
4441 for (uint32_t m = 1; m <= 4; m++) {
4442 GemmMicrokernelTester()
4443 .mr(4)
4444 .nr(4)
4445 .kr(2)
4446 .sr(1)
4447 .m(m)
4448 .n(n)
4449 .k(k)
4450 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004451 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004452 }
4453 }
4454 }
4455 }
4456
4457 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
4458 TEST_REQUIRES_X86_AVX;
4459 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004460 for (uint32_t n = 1; n <= 4; n++) {
4461 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004462 GemmMicrokernelTester()
4463 .mr(4)
4464 .nr(4)
4465 .kr(2)
4466 .sr(1)
4467 .m(m)
4468 .n(n)
4469 .k(k)
4470 .cm_stride(7)
4471 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004472 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004473 }
4474 }
4475 }
4476 }
4477
4478 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
4479 TEST_REQUIRES_X86_AVX;
4480 GemmMicrokernelTester()
4481 .mr(4)
4482 .nr(4)
4483 .kr(2)
4484 .sr(1)
4485 .m(4)
4486 .n(4)
4487 .k(8)
4488 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004489 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004490 }
4491
4492 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
4493 TEST_REQUIRES_X86_AVX;
4494 GemmMicrokernelTester()
4495 .mr(4)
4496 .nr(4)
4497 .kr(2)
4498 .sr(1)
4499 .m(4)
4500 .n(4)
4501 .k(8)
4502 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004503 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004504 }
4505
4506 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
4507 TEST_REQUIRES_X86_AVX;
4508 GemmMicrokernelTester()
4509 .mr(4)
4510 .nr(4)
4511 .kr(2)
4512 .sr(1)
4513 .m(4)
4514 .n(4)
4515 .k(8)
4516 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004517 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004518 }
4519
4520 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_a_zero_point) {
4521 TEST_REQUIRES_X86_AVX;
4522 for (size_t k = 1; k <= 40; k += 9) {
4523 GemmMicrokernelTester()
4524 .mr(4)
4525 .nr(4)
4526 .kr(2)
4527 .sr(1)
4528 .m(4)
4529 .n(4)
4530 .k(k)
4531 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004532 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004533 }
4534 }
4535
4536 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_b_zero_point) {
4537 TEST_REQUIRES_X86_AVX;
4538 for (size_t k = 1; k <= 40; k += 9) {
4539 GemmMicrokernelTester()
4540 .mr(4)
4541 .nr(4)
4542 .kr(2)
4543 .sr(1)
4544 .m(4)
4545 .n(4)
4546 .k(k)
4547 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004548 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004549 }
4550 }
4551
4552 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_zero_point) {
4553 TEST_REQUIRES_X86_AVX;
4554 for (size_t k = 1; k <= 40; k += 9) {
4555 GemmMicrokernelTester()
4556 .mr(4)
4557 .nr(4)
4558 .kr(2)
4559 .sr(1)
4560 .m(4)
4561 .n(4)
4562 .k(k)
4563 .a_zero_point(0)
4564 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004565 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004566 }
4567 }
4568#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4569
4570
4571#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4572 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
4573 TEST_REQUIRES_X86_XOP;
4574 GemmMicrokernelTester()
4575 .mr(2)
4576 .nr(4)
4577 .kr(2)
4578 .sr(1)
4579 .m(2)
4580 .n(4)
4581 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004582 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004583 }
4584
4585 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
4586 TEST_REQUIRES_X86_XOP;
4587 GemmMicrokernelTester()
4588 .mr(2)
4589 .nr(4)
4590 .kr(2)
4591 .sr(1)
4592 .m(2)
4593 .n(4)
4594 .k(8)
4595 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004596 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004597 }
4598
4599 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_strided_a) {
4600 TEST_REQUIRES_X86_XOP;
4601 GemmMicrokernelTester()
4602 .mr(2)
4603 .nr(4)
4604 .kr(2)
4605 .sr(1)
4606 .m(2)
4607 .n(4)
4608 .k(8)
4609 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004610 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004611 }
4612
4613 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
4614 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004615 for (uint32_t n = 1; n <= 4; n++) {
4616 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004617 GemmMicrokernelTester()
4618 .mr(2)
4619 .nr(4)
4620 .kr(2)
4621 .sr(1)
4622 .m(m)
4623 .n(n)
4624 .k(8)
4625 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004626 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004627 }
4628 }
4629 }
4630
4631 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
4632 TEST_REQUIRES_X86_XOP;
4633 for (uint32_t m = 1; m <= 2; m++) {
4634 GemmMicrokernelTester()
4635 .mr(2)
4636 .nr(4)
4637 .kr(2)
4638 .sr(1)
4639 .m(m)
4640 .n(4)
4641 .k(8)
4642 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004644 }
4645 }
4646
4647 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
4648 TEST_REQUIRES_X86_XOP;
4649 for (uint32_t n = 1; n <= 4; n++) {
4650 GemmMicrokernelTester()
4651 .mr(2)
4652 .nr(4)
4653 .kr(2)
4654 .sr(1)
4655 .m(2)
4656 .n(n)
4657 .k(8)
4658 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004659 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004660 }
4661 }
4662
4663 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
4664 TEST_REQUIRES_X86_XOP;
4665 for (size_t k = 1; k < 8; k++) {
4666 GemmMicrokernelTester()
4667 .mr(2)
4668 .nr(4)
4669 .kr(2)
4670 .sr(1)
4671 .m(2)
4672 .n(4)
4673 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004674 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004675 }
4676 }
4677
4678 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_strided_a) {
4679 TEST_REQUIRES_X86_XOP;
4680 for (size_t k = 1; k < 8; k++) {
4681 GemmMicrokernelTester()
4682 .mr(2)
4683 .nr(4)
4684 .kr(2)
4685 .sr(1)
4686 .m(2)
4687 .n(4)
4688 .k(k)
4689 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004690 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004691 }
4692 }
4693
4694 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
4695 TEST_REQUIRES_X86_XOP;
4696 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004697 for (uint32_t n = 1; n <= 4; n++) {
4698 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004699 GemmMicrokernelTester()
4700 .mr(2)
4701 .nr(4)
4702 .kr(2)
4703 .sr(1)
4704 .m(m)
4705 .n(n)
4706 .k(k)
4707 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004708 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004709 }
4710 }
4711 }
4712 }
4713
4714 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
4715 TEST_REQUIRES_X86_XOP;
4716 for (size_t k = 9; k < 16; k++) {
4717 GemmMicrokernelTester()
4718 .mr(2)
4719 .nr(4)
4720 .kr(2)
4721 .sr(1)
4722 .m(2)
4723 .n(4)
4724 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004725 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004726 }
4727 }
4728
4729 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_strided_a) {
4730 TEST_REQUIRES_X86_XOP;
4731 for (size_t k = 9; k < 16; k++) {
4732 GemmMicrokernelTester()
4733 .mr(2)
4734 .nr(4)
4735 .kr(2)
4736 .sr(1)
4737 .m(2)
4738 .n(4)
4739 .k(k)
4740 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004741 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004742 }
4743 }
4744
4745 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
4746 TEST_REQUIRES_X86_XOP;
4747 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004748 for (uint32_t n = 1; n <= 4; n++) {
4749 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004750 GemmMicrokernelTester()
4751 .mr(2)
4752 .nr(4)
4753 .kr(2)
4754 .sr(1)
4755 .m(m)
4756 .n(n)
4757 .k(k)
4758 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004759 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004760 }
4761 }
4762 }
4763 }
4764
4765 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
4766 TEST_REQUIRES_X86_XOP;
4767 for (size_t k = 16; k <= 80; k += 8) {
4768 GemmMicrokernelTester()
4769 .mr(2)
4770 .nr(4)
4771 .kr(2)
4772 .sr(1)
4773 .m(2)
4774 .n(4)
4775 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004776 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004777 }
4778 }
4779
4780 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_strided_a) {
4781 TEST_REQUIRES_X86_XOP;
4782 for (size_t k = 16; k <= 80; k += 8) {
4783 GemmMicrokernelTester()
4784 .mr(2)
4785 .nr(4)
4786 .kr(2)
4787 .sr(1)
4788 .m(2)
4789 .n(4)
4790 .k(k)
4791 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004792 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004793 }
4794 }
4795
4796 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
4797 TEST_REQUIRES_X86_XOP;
4798 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004799 for (uint32_t n = 1; n <= 4; n++) {
4800 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004801 GemmMicrokernelTester()
4802 .mr(2)
4803 .nr(4)
4804 .kr(2)
4805 .sr(1)
4806 .m(m)
4807 .n(n)
4808 .k(k)
4809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004810 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004811 }
4812 }
4813 }
4814 }
4815
4816 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
4817 TEST_REQUIRES_X86_XOP;
4818 for (uint32_t n = 5; n < 8; n++) {
4819 for (size_t k = 1; k <= 40; k += 9) {
4820 GemmMicrokernelTester()
4821 .mr(2)
4822 .nr(4)
4823 .kr(2)
4824 .sr(1)
4825 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004826 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004828 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004829 }
4830 }
4831 }
4832
4833 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
4834 TEST_REQUIRES_X86_XOP;
4835 for (uint32_t n = 5; n < 8; n++) {
4836 for (size_t k = 1; k <= 40; k += 9) {
4837 GemmMicrokernelTester()
4838 .mr(2)
4839 .nr(4)
4840 .kr(2)
4841 .sr(1)
4842 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004843 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004844 .k(k)
4845 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004846 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004847 }
4848 }
4849 }
4850
4851 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_a) {
4852 TEST_REQUIRES_X86_XOP;
4853 for (uint32_t n = 5; n < 8; n++) {
4854 for (size_t k = 1; k <= 40; k += 9) {
4855 GemmMicrokernelTester()
4856 .mr(2)
4857 .nr(4)
4858 .kr(2)
4859 .sr(1)
4860 .m(2)
4861 .n(n)
4862 .k(k)
4863 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004864 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004865 }
4866 }
4867 }
4868
4869 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
4870 TEST_REQUIRES_X86_XOP;
4871 for (uint32_t n = 5; n < 8; n++) {
4872 for (size_t k = 1; k <= 40; k += 9) {
4873 for (uint32_t m = 1; m <= 2; m++) {
4874 GemmMicrokernelTester()
4875 .mr(2)
4876 .nr(4)
4877 .kr(2)
4878 .sr(1)
4879 .m(m)
4880 .n(n)
4881 .k(k)
4882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004883 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004884 }
4885 }
4886 }
4887 }
4888
4889 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
4890 TEST_REQUIRES_X86_XOP;
4891 for (uint32_t n = 8; n <= 12; n += 4) {
4892 for (size_t k = 1; k <= 40; k += 9) {
4893 GemmMicrokernelTester()
4894 .mr(2)
4895 .nr(4)
4896 .kr(2)
4897 .sr(1)
4898 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004899 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004900 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004902 }
4903 }
4904 }
4905
4906 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
4907 TEST_REQUIRES_X86_XOP;
4908 for (uint32_t n = 8; n <= 12; n += 4) {
4909 for (size_t k = 1; k <= 40; k += 9) {
4910 GemmMicrokernelTester()
4911 .mr(2)
4912 .nr(4)
4913 .kr(2)
4914 .sr(1)
4915 .m(2)
4916 .n(n)
4917 .k(k)
4918 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004919 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004920 }
4921 }
4922 }
4923
4924 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_a) {
4925 TEST_REQUIRES_X86_XOP;
4926 for (uint32_t n = 8; n <= 12; n += 4) {
4927 for (size_t k = 1; k <= 40; k += 9) {
4928 GemmMicrokernelTester()
4929 .mr(2)
4930 .nr(4)
4931 .kr(2)
4932 .sr(1)
4933 .m(2)
4934 .n(n)
4935 .k(k)
4936 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004937 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004938 }
4939 }
4940 }
4941
4942 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
4943 TEST_REQUIRES_X86_XOP;
4944 for (uint32_t n = 8; n <= 12; n += 4) {
4945 for (size_t k = 1; k <= 40; k += 9) {
4946 for (uint32_t m = 1; m <= 2; m++) {
4947 GemmMicrokernelTester()
4948 .mr(2)
4949 .nr(4)
4950 .kr(2)
4951 .sr(1)
4952 .m(m)
4953 .n(n)
4954 .k(k)
4955 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004956 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004957 }
4958 }
4959 }
4960 }
4961
4962 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
4963 TEST_REQUIRES_X86_XOP;
4964 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004965 for (uint32_t n = 1; n <= 4; n++) {
4966 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004967 GemmMicrokernelTester()
4968 .mr(2)
4969 .nr(4)
4970 .kr(2)
4971 .sr(1)
4972 .m(m)
4973 .n(n)
4974 .k(k)
4975 .cm_stride(7)
4976 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004978 }
4979 }
4980 }
4981 }
4982
4983 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
4984 TEST_REQUIRES_X86_XOP;
4985 GemmMicrokernelTester()
4986 .mr(2)
4987 .nr(4)
4988 .kr(2)
4989 .sr(1)
4990 .m(2)
4991 .n(4)
4992 .k(8)
4993 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004994 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004995 }
4996
4997 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
4998 TEST_REQUIRES_X86_XOP;
4999 GemmMicrokernelTester()
5000 .mr(2)
5001 .nr(4)
5002 .kr(2)
5003 .sr(1)
5004 .m(2)
5005 .n(4)
5006 .k(8)
5007 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005009 }
5010
5011 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
5012 TEST_REQUIRES_X86_XOP;
5013 GemmMicrokernelTester()
5014 .mr(2)
5015 .nr(4)
5016 .kr(2)
5017 .sr(1)
5018 .m(2)
5019 .n(4)
5020 .k(8)
5021 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005022 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005023 }
5024
5025 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_a_zero_point) {
5026 TEST_REQUIRES_X86_XOP;
5027 for (size_t k = 1; k <= 40; k += 9) {
5028 GemmMicrokernelTester()
5029 .mr(2)
5030 .nr(4)
5031 .kr(2)
5032 .sr(1)
5033 .m(2)
5034 .n(4)
5035 .k(k)
5036 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005038 }
5039 }
5040
5041 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_b_zero_point) {
5042 TEST_REQUIRES_X86_XOP;
5043 for (size_t k = 1; k <= 40; k += 9) {
5044 GemmMicrokernelTester()
5045 .mr(2)
5046 .nr(4)
5047 .kr(2)
5048 .sr(1)
5049 .m(2)
5050 .n(4)
5051 .k(k)
5052 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005053 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005054 }
5055 }
5056
5057 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_zero_point) {
5058 TEST_REQUIRES_X86_XOP;
5059 for (size_t k = 1; k <= 40; k += 9) {
5060 GemmMicrokernelTester()
5061 .mr(2)
5062 .nr(4)
5063 .kr(2)
5064 .sr(1)
5065 .m(2)
5066 .n(4)
5067 .k(k)
5068 .a_zero_point(0)
5069 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005070 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005071 }
5072 }
5073#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5074
5075
5076#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5077 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
5078 TEST_REQUIRES_X86_XOP;
5079 GemmMicrokernelTester()
5080 .mr(3)
5081 .nr(4)
5082 .kr(2)
5083 .sr(1)
5084 .m(3)
5085 .n(4)
5086 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005087 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005088 }
5089
5090 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
5091 TEST_REQUIRES_X86_XOP;
5092 GemmMicrokernelTester()
5093 .mr(3)
5094 .nr(4)
5095 .kr(2)
5096 .sr(1)
5097 .m(3)
5098 .n(4)
5099 .k(8)
5100 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005101 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005102 }
5103
5104 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_strided_a) {
5105 TEST_REQUIRES_X86_XOP;
5106 GemmMicrokernelTester()
5107 .mr(3)
5108 .nr(4)
5109 .kr(2)
5110 .sr(1)
5111 .m(3)
5112 .n(4)
5113 .k(8)
5114 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005115 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005116 }
5117
5118 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
5119 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005120 for (uint32_t n = 1; n <= 4; n++) {
5121 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005122 GemmMicrokernelTester()
5123 .mr(3)
5124 .nr(4)
5125 .kr(2)
5126 .sr(1)
5127 .m(m)
5128 .n(n)
5129 .k(8)
5130 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005131 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005132 }
5133 }
5134 }
5135
5136 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
5137 TEST_REQUIRES_X86_XOP;
5138 for (uint32_t m = 1; m <= 3; m++) {
5139 GemmMicrokernelTester()
5140 .mr(3)
5141 .nr(4)
5142 .kr(2)
5143 .sr(1)
5144 .m(m)
5145 .n(4)
5146 .k(8)
5147 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005149 }
5150 }
5151
5152 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
5153 TEST_REQUIRES_X86_XOP;
5154 for (uint32_t n = 1; n <= 4; n++) {
5155 GemmMicrokernelTester()
5156 .mr(3)
5157 .nr(4)
5158 .kr(2)
5159 .sr(1)
5160 .m(3)
5161 .n(n)
5162 .k(8)
5163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005164 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005165 }
5166 }
5167
5168 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
5169 TEST_REQUIRES_X86_XOP;
5170 for (size_t k = 1; k < 8; k++) {
5171 GemmMicrokernelTester()
5172 .mr(3)
5173 .nr(4)
5174 .kr(2)
5175 .sr(1)
5176 .m(3)
5177 .n(4)
5178 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005179 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005180 }
5181 }
5182
5183 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_strided_a) {
5184 TEST_REQUIRES_X86_XOP;
5185 for (size_t k = 1; k < 8; k++) {
5186 GemmMicrokernelTester()
5187 .mr(3)
5188 .nr(4)
5189 .kr(2)
5190 .sr(1)
5191 .m(3)
5192 .n(4)
5193 .k(k)
5194 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005195 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005196 }
5197 }
5198
5199 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
5200 TEST_REQUIRES_X86_XOP;
5201 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005202 for (uint32_t n = 1; n <= 4; n++) {
5203 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005204 GemmMicrokernelTester()
5205 .mr(3)
5206 .nr(4)
5207 .kr(2)
5208 .sr(1)
5209 .m(m)
5210 .n(n)
5211 .k(k)
5212 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005213 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005214 }
5215 }
5216 }
5217 }
5218
5219 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
5220 TEST_REQUIRES_X86_XOP;
5221 for (size_t k = 9; k < 16; k++) {
5222 GemmMicrokernelTester()
5223 .mr(3)
5224 .nr(4)
5225 .kr(2)
5226 .sr(1)
5227 .m(3)
5228 .n(4)
5229 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005230 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005231 }
5232 }
5233
5234 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_strided_a) {
5235 TEST_REQUIRES_X86_XOP;
5236 for (size_t k = 9; k < 16; k++) {
5237 GemmMicrokernelTester()
5238 .mr(3)
5239 .nr(4)
5240 .kr(2)
5241 .sr(1)
5242 .m(3)
5243 .n(4)
5244 .k(k)
5245 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005246 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005247 }
5248 }
5249
5250 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
5251 TEST_REQUIRES_X86_XOP;
5252 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005253 for (uint32_t n = 1; n <= 4; n++) {
5254 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005255 GemmMicrokernelTester()
5256 .mr(3)
5257 .nr(4)
5258 .kr(2)
5259 .sr(1)
5260 .m(m)
5261 .n(n)
5262 .k(k)
5263 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005264 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005265 }
5266 }
5267 }
5268 }
5269
5270 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
5271 TEST_REQUIRES_X86_XOP;
5272 for (size_t k = 16; k <= 80; k += 8) {
5273 GemmMicrokernelTester()
5274 .mr(3)
5275 .nr(4)
5276 .kr(2)
5277 .sr(1)
5278 .m(3)
5279 .n(4)
5280 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005281 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005282 }
5283 }
5284
5285 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_strided_a) {
5286 TEST_REQUIRES_X86_XOP;
5287 for (size_t k = 16; k <= 80; k += 8) {
5288 GemmMicrokernelTester()
5289 .mr(3)
5290 .nr(4)
5291 .kr(2)
5292 .sr(1)
5293 .m(3)
5294 .n(4)
5295 .k(k)
5296 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005297 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005298 }
5299 }
5300
5301 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
5302 TEST_REQUIRES_X86_XOP;
5303 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005304 for (uint32_t n = 1; n <= 4; n++) {
5305 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005306 GemmMicrokernelTester()
5307 .mr(3)
5308 .nr(4)
5309 .kr(2)
5310 .sr(1)
5311 .m(m)
5312 .n(n)
5313 .k(k)
5314 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005315 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005316 }
5317 }
5318 }
5319 }
5320
5321 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
5322 TEST_REQUIRES_X86_XOP;
5323 for (uint32_t n = 5; n < 8; n++) {
5324 for (size_t k = 1; k <= 40; k += 9) {
5325 GemmMicrokernelTester()
5326 .mr(3)
5327 .nr(4)
5328 .kr(2)
5329 .sr(1)
5330 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005331 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005332 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005333 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005334 }
5335 }
5336 }
5337
5338 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
5339 TEST_REQUIRES_X86_XOP;
5340 for (uint32_t n = 5; n < 8; n++) {
5341 for (size_t k = 1; k <= 40; k += 9) {
5342 GemmMicrokernelTester()
5343 .mr(3)
5344 .nr(4)
5345 .kr(2)
5346 .sr(1)
5347 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005348 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005349 .k(k)
5350 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005351 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005352 }
5353 }
5354 }
5355
5356 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_a) {
5357 TEST_REQUIRES_X86_XOP;
5358 for (uint32_t n = 5; n < 8; n++) {
5359 for (size_t k = 1; k <= 40; k += 9) {
5360 GemmMicrokernelTester()
5361 .mr(3)
5362 .nr(4)
5363 .kr(2)
5364 .sr(1)
5365 .m(3)
5366 .n(n)
5367 .k(k)
5368 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005369 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005370 }
5371 }
5372 }
5373
5374 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
5375 TEST_REQUIRES_X86_XOP;
5376 for (uint32_t n = 5; n < 8; n++) {
5377 for (size_t k = 1; k <= 40; k += 9) {
5378 for (uint32_t m = 1; m <= 3; m++) {
5379 GemmMicrokernelTester()
5380 .mr(3)
5381 .nr(4)
5382 .kr(2)
5383 .sr(1)
5384 .m(m)
5385 .n(n)
5386 .k(k)
5387 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005388 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005389 }
5390 }
5391 }
5392 }
5393
5394 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
5395 TEST_REQUIRES_X86_XOP;
5396 for (uint32_t n = 8; n <= 12; n += 4) {
5397 for (size_t k = 1; k <= 40; k += 9) {
5398 GemmMicrokernelTester()
5399 .mr(3)
5400 .nr(4)
5401 .kr(2)
5402 .sr(1)
5403 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005404 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005405 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005406 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005407 }
5408 }
5409 }
5410
5411 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
5412 TEST_REQUIRES_X86_XOP;
5413 for (uint32_t n = 8; n <= 12; n += 4) {
5414 for (size_t k = 1; k <= 40; k += 9) {
5415 GemmMicrokernelTester()
5416 .mr(3)
5417 .nr(4)
5418 .kr(2)
5419 .sr(1)
5420 .m(3)
5421 .n(n)
5422 .k(k)
5423 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005424 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005425 }
5426 }
5427 }
5428
5429 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_a) {
5430 TEST_REQUIRES_X86_XOP;
5431 for (uint32_t n = 8; n <= 12; n += 4) {
5432 for (size_t k = 1; k <= 40; k += 9) {
5433 GemmMicrokernelTester()
5434 .mr(3)
5435 .nr(4)
5436 .kr(2)
5437 .sr(1)
5438 .m(3)
5439 .n(n)
5440 .k(k)
5441 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005443 }
5444 }
5445 }
5446
5447 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
5448 TEST_REQUIRES_X86_XOP;
5449 for (uint32_t n = 8; n <= 12; n += 4) {
5450 for (size_t k = 1; k <= 40; k += 9) {
5451 for (uint32_t m = 1; m <= 3; m++) {
5452 GemmMicrokernelTester()
5453 .mr(3)
5454 .nr(4)
5455 .kr(2)
5456 .sr(1)
5457 .m(m)
5458 .n(n)
5459 .k(k)
5460 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005461 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005462 }
5463 }
5464 }
5465 }
5466
5467 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
5468 TEST_REQUIRES_X86_XOP;
5469 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005470 for (uint32_t n = 1; n <= 4; n++) {
5471 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005472 GemmMicrokernelTester()
5473 .mr(3)
5474 .nr(4)
5475 .kr(2)
5476 .sr(1)
5477 .m(m)
5478 .n(n)
5479 .k(k)
5480 .cm_stride(7)
5481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005482 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005483 }
5484 }
5485 }
5486 }
5487
5488 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
5489 TEST_REQUIRES_X86_XOP;
5490 GemmMicrokernelTester()
5491 .mr(3)
5492 .nr(4)
5493 .kr(2)
5494 .sr(1)
5495 .m(3)
5496 .n(4)
5497 .k(8)
5498 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005499 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005500 }
5501
5502 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
5503 TEST_REQUIRES_X86_XOP;
5504 GemmMicrokernelTester()
5505 .mr(3)
5506 .nr(4)
5507 .kr(2)
5508 .sr(1)
5509 .m(3)
5510 .n(4)
5511 .k(8)
5512 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005513 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005514 }
5515
5516 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
5517 TEST_REQUIRES_X86_XOP;
5518 GemmMicrokernelTester()
5519 .mr(3)
5520 .nr(4)
5521 .kr(2)
5522 .sr(1)
5523 .m(3)
5524 .n(4)
5525 .k(8)
5526 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005528 }
5529
5530 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_a_zero_point) {
5531 TEST_REQUIRES_X86_XOP;
5532 for (size_t k = 1; k <= 40; k += 9) {
5533 GemmMicrokernelTester()
5534 .mr(3)
5535 .nr(4)
5536 .kr(2)
5537 .sr(1)
5538 .m(3)
5539 .n(4)
5540 .k(k)
5541 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005543 }
5544 }
5545
5546 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_b_zero_point) {
5547 TEST_REQUIRES_X86_XOP;
5548 for (size_t k = 1; k <= 40; k += 9) {
5549 GemmMicrokernelTester()
5550 .mr(3)
5551 .nr(4)
5552 .kr(2)
5553 .sr(1)
5554 .m(3)
5555 .n(4)
5556 .k(k)
5557 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005558 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005559 }
5560 }
5561
5562 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_zero_point) {
5563 TEST_REQUIRES_X86_XOP;
5564 for (size_t k = 1; k <= 40; k += 9) {
5565 GemmMicrokernelTester()
5566 .mr(3)
5567 .nr(4)
5568 .kr(2)
5569 .sr(1)
5570 .m(3)
5571 .n(4)
5572 .k(k)
5573 .a_zero_point(0)
5574 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005575 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005576 }
5577 }
5578#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5579
5580
5581#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5582 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
5583 TEST_REQUIRES_X86_XOP;
5584 GemmMicrokernelTester()
5585 .mr(4)
5586 .nr(4)
5587 .kr(2)
5588 .sr(1)
5589 .m(4)
5590 .n(4)
5591 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005592 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005593 }
5594
5595 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
5596 TEST_REQUIRES_X86_XOP;
5597 GemmMicrokernelTester()
5598 .mr(4)
5599 .nr(4)
5600 .kr(2)
5601 .sr(1)
5602 .m(4)
5603 .n(4)
5604 .k(8)
5605 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005606 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005607 }
5608
5609 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_strided_a) {
5610 TEST_REQUIRES_X86_XOP;
5611 GemmMicrokernelTester()
5612 .mr(4)
5613 .nr(4)
5614 .kr(2)
5615 .sr(1)
5616 .m(4)
5617 .n(4)
5618 .k(8)
5619 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005620 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005621 }
5622
5623 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
5624 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005625 for (uint32_t n = 1; n <= 4; n++) {
5626 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005627 GemmMicrokernelTester()
5628 .mr(4)
5629 .nr(4)
5630 .kr(2)
5631 .sr(1)
5632 .m(m)
5633 .n(n)
5634 .k(8)
5635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005636 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005637 }
5638 }
5639 }
5640
5641 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
5642 TEST_REQUIRES_X86_XOP;
5643 for (uint32_t m = 1; m <= 4; m++) {
5644 GemmMicrokernelTester()
5645 .mr(4)
5646 .nr(4)
5647 .kr(2)
5648 .sr(1)
5649 .m(m)
5650 .n(4)
5651 .k(8)
5652 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005653 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005654 }
5655 }
5656
5657 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
5658 TEST_REQUIRES_X86_XOP;
5659 for (uint32_t n = 1; n <= 4; n++) {
5660 GemmMicrokernelTester()
5661 .mr(4)
5662 .nr(4)
5663 .kr(2)
5664 .sr(1)
5665 .m(4)
5666 .n(n)
5667 .k(8)
5668 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005669 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005670 }
5671 }
5672
5673 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
5674 TEST_REQUIRES_X86_XOP;
5675 for (size_t k = 1; k < 8; k++) {
5676 GemmMicrokernelTester()
5677 .mr(4)
5678 .nr(4)
5679 .kr(2)
5680 .sr(1)
5681 .m(4)
5682 .n(4)
5683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005684 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005685 }
5686 }
5687
5688 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_strided_a) {
5689 TEST_REQUIRES_X86_XOP;
5690 for (size_t k = 1; k < 8; k++) {
5691 GemmMicrokernelTester()
5692 .mr(4)
5693 .nr(4)
5694 .kr(2)
5695 .sr(1)
5696 .m(4)
5697 .n(4)
5698 .k(k)
5699 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005700 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005701 }
5702 }
5703
5704 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
5705 TEST_REQUIRES_X86_XOP;
5706 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005707 for (uint32_t n = 1; n <= 4; n++) {
5708 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005709 GemmMicrokernelTester()
5710 .mr(4)
5711 .nr(4)
5712 .kr(2)
5713 .sr(1)
5714 .m(m)
5715 .n(n)
5716 .k(k)
5717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005718 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005719 }
5720 }
5721 }
5722 }
5723
5724 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
5725 TEST_REQUIRES_X86_XOP;
5726 for (size_t k = 9; k < 16; k++) {
5727 GemmMicrokernelTester()
5728 .mr(4)
5729 .nr(4)
5730 .kr(2)
5731 .sr(1)
5732 .m(4)
5733 .n(4)
5734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005735 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005736 }
5737 }
5738
5739 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_strided_a) {
5740 TEST_REQUIRES_X86_XOP;
5741 for (size_t k = 9; k < 16; k++) {
5742 GemmMicrokernelTester()
5743 .mr(4)
5744 .nr(4)
5745 .kr(2)
5746 .sr(1)
5747 .m(4)
5748 .n(4)
5749 .k(k)
5750 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005751 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005752 }
5753 }
5754
5755 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
5756 TEST_REQUIRES_X86_XOP;
5757 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005758 for (uint32_t n = 1; n <= 4; n++) {
5759 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005760 GemmMicrokernelTester()
5761 .mr(4)
5762 .nr(4)
5763 .kr(2)
5764 .sr(1)
5765 .m(m)
5766 .n(n)
5767 .k(k)
5768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005769 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005770 }
5771 }
5772 }
5773 }
5774
5775 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
5776 TEST_REQUIRES_X86_XOP;
5777 for (size_t k = 16; k <= 80; k += 8) {
5778 GemmMicrokernelTester()
5779 .mr(4)
5780 .nr(4)
5781 .kr(2)
5782 .sr(1)
5783 .m(4)
5784 .n(4)
5785 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005786 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005787 }
5788 }
5789
5790 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_strided_a) {
5791 TEST_REQUIRES_X86_XOP;
5792 for (size_t k = 16; k <= 80; k += 8) {
5793 GemmMicrokernelTester()
5794 .mr(4)
5795 .nr(4)
5796 .kr(2)
5797 .sr(1)
5798 .m(4)
5799 .n(4)
5800 .k(k)
5801 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005802 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005803 }
5804 }
5805
5806 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
5807 TEST_REQUIRES_X86_XOP;
5808 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005809 for (uint32_t n = 1; n <= 4; n++) {
5810 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005811 GemmMicrokernelTester()
5812 .mr(4)
5813 .nr(4)
5814 .kr(2)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005820 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005821 }
5822 }
5823 }
5824 }
5825
5826 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
5827 TEST_REQUIRES_X86_XOP;
5828 for (uint32_t n = 5; n < 8; n++) {
5829 for (size_t k = 1; k <= 40; k += 9) {
5830 GemmMicrokernelTester()
5831 .mr(4)
5832 .nr(4)
5833 .kr(2)
5834 .sr(1)
5835 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005836 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005837 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005838 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005839 }
5840 }
5841 }
5842
5843 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
5844 TEST_REQUIRES_X86_XOP;
5845 for (uint32_t n = 5; n < 8; n++) {
5846 for (size_t k = 1; k <= 40; k += 9) {
5847 GemmMicrokernelTester()
5848 .mr(4)
5849 .nr(4)
5850 .kr(2)
5851 .sr(1)
5852 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005853 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005854 .k(k)
5855 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005856 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005857 }
5858 }
5859 }
5860
5861 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_a) {
5862 TEST_REQUIRES_X86_XOP;
5863 for (uint32_t n = 5; n < 8; n++) {
5864 for (size_t k = 1; k <= 40; k += 9) {
5865 GemmMicrokernelTester()
5866 .mr(4)
5867 .nr(4)
5868 .kr(2)
5869 .sr(1)
5870 .m(4)
5871 .n(n)
5872 .k(k)
5873 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005874 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005875 }
5876 }
5877 }
5878
5879 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
5880 TEST_REQUIRES_X86_XOP;
5881 for (uint32_t n = 5; n < 8; n++) {
5882 for (size_t k = 1; k <= 40; k += 9) {
5883 for (uint32_t m = 1; m <= 4; m++) {
5884 GemmMicrokernelTester()
5885 .mr(4)
5886 .nr(4)
5887 .kr(2)
5888 .sr(1)
5889 .m(m)
5890 .n(n)
5891 .k(k)
5892 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005893 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005894 }
5895 }
5896 }
5897 }
5898
5899 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
5900 TEST_REQUIRES_X86_XOP;
5901 for (uint32_t n = 8; n <= 12; n += 4) {
5902 for (size_t k = 1; k <= 40; k += 9) {
5903 GemmMicrokernelTester()
5904 .mr(4)
5905 .nr(4)
5906 .kr(2)
5907 .sr(1)
5908 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005909 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005910 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005911 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005912 }
5913 }
5914 }
5915
5916 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
5917 TEST_REQUIRES_X86_XOP;
5918 for (uint32_t n = 8; n <= 12; n += 4) {
5919 for (size_t k = 1; k <= 40; k += 9) {
5920 GemmMicrokernelTester()
5921 .mr(4)
5922 .nr(4)
5923 .kr(2)
5924 .sr(1)
5925 .m(4)
5926 .n(n)
5927 .k(k)
5928 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005929 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005930 }
5931 }
5932 }
5933
5934 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_a) {
5935 TEST_REQUIRES_X86_XOP;
5936 for (uint32_t n = 8; n <= 12; n += 4) {
5937 for (size_t k = 1; k <= 40; k += 9) {
5938 GemmMicrokernelTester()
5939 .mr(4)
5940 .nr(4)
5941 .kr(2)
5942 .sr(1)
5943 .m(4)
5944 .n(n)
5945 .k(k)
5946 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005947 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005948 }
5949 }
5950 }
5951
5952 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
5953 TEST_REQUIRES_X86_XOP;
5954 for (uint32_t n = 8; n <= 12; n += 4) {
5955 for (size_t k = 1; k <= 40; k += 9) {
5956 for (uint32_t m = 1; m <= 4; m++) {
5957 GemmMicrokernelTester()
5958 .mr(4)
5959 .nr(4)
5960 .kr(2)
5961 .sr(1)
5962 .m(m)
5963 .n(n)
5964 .k(k)
5965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005966 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005967 }
5968 }
5969 }
5970 }
5971
5972 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
5973 TEST_REQUIRES_X86_XOP;
5974 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005975 for (uint32_t n = 1; n <= 4; n++) {
5976 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005977 GemmMicrokernelTester()
5978 .mr(4)
5979 .nr(4)
5980 .kr(2)
5981 .sr(1)
5982 .m(m)
5983 .n(n)
5984 .k(k)
5985 .cm_stride(7)
5986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005987 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005988 }
5989 }
5990 }
5991 }
5992
5993 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
5994 TEST_REQUIRES_X86_XOP;
5995 GemmMicrokernelTester()
5996 .mr(4)
5997 .nr(4)
5998 .kr(2)
5999 .sr(1)
6000 .m(4)
6001 .n(4)
6002 .k(8)
6003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006004 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006005 }
6006
6007 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
6008 TEST_REQUIRES_X86_XOP;
6009 GemmMicrokernelTester()
6010 .mr(4)
6011 .nr(4)
6012 .kr(2)
6013 .sr(1)
6014 .m(4)
6015 .n(4)
6016 .k(8)
6017 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006018 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006019 }
6020
6021 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
6022 TEST_REQUIRES_X86_XOP;
6023 GemmMicrokernelTester()
6024 .mr(4)
6025 .nr(4)
6026 .kr(2)
6027 .sr(1)
6028 .m(4)
6029 .n(4)
6030 .k(8)
6031 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006032 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006033 }
6034
6035 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_a_zero_point) {
6036 TEST_REQUIRES_X86_XOP;
6037 for (size_t k = 1; k <= 40; k += 9) {
6038 GemmMicrokernelTester()
6039 .mr(4)
6040 .nr(4)
6041 .kr(2)
6042 .sr(1)
6043 .m(4)
6044 .n(4)
6045 .k(k)
6046 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006048 }
6049 }
6050
6051 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_b_zero_point) {
6052 TEST_REQUIRES_X86_XOP;
6053 for (size_t k = 1; k <= 40; k += 9) {
6054 GemmMicrokernelTester()
6055 .mr(4)
6056 .nr(4)
6057 .kr(2)
6058 .sr(1)
6059 .m(4)
6060 .n(4)
6061 .k(k)
6062 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006063 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006064 }
6065 }
6066
6067 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_zero_point) {
6068 TEST_REQUIRES_X86_XOP;
6069 for (size_t k = 1; k <= 40; k += 9) {
6070 GemmMicrokernelTester()
6071 .mr(4)
6072 .nr(4)
6073 .kr(2)
6074 .sr(1)
6075 .m(4)
6076 .n(4)
6077 .k(k)
6078 .a_zero_point(0)
6079 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006080 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006081 }
6082 }
6083#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6084
6085
6086#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6087 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
6088 TEST_REQUIRES_X86_SSE2;
6089 GemmMicrokernelTester()
6090 .mr(1)
6091 .nr(4)
6092 .kr(2)
6093 .sr(1)
6094 .m(1)
6095 .n(4)
6096 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006097 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006098 }
6099
6100 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
6101 TEST_REQUIRES_X86_SSE2;
6102 GemmMicrokernelTester()
6103 .mr(1)
6104 .nr(4)
6105 .kr(2)
6106 .sr(1)
6107 .m(1)
6108 .n(4)
6109 .k(8)
6110 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006111 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006112 }
6113
6114 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_strided_a) {
6115 TEST_REQUIRES_X86_SSE2;
6116 GemmMicrokernelTester()
6117 .mr(1)
6118 .nr(4)
6119 .kr(2)
6120 .sr(1)
6121 .m(1)
6122 .n(4)
6123 .k(8)
6124 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006125 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006126 }
6127
6128 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
6129 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006130 for (uint32_t n = 1; n <= 4; n++) {
6131 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006132 GemmMicrokernelTester()
6133 .mr(1)
6134 .nr(4)
6135 .kr(2)
6136 .sr(1)
6137 .m(m)
6138 .n(n)
6139 .k(8)
6140 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006141 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006142 }
6143 }
6144 }
6145
6146 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
6147 TEST_REQUIRES_X86_SSE2;
6148 for (uint32_t m = 1; m <= 1; m++) {
6149 GemmMicrokernelTester()
6150 .mr(1)
6151 .nr(4)
6152 .kr(2)
6153 .sr(1)
6154 .m(m)
6155 .n(4)
6156 .k(8)
6157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006158 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006159 }
6160 }
6161
6162 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
6163 TEST_REQUIRES_X86_SSE2;
6164 for (uint32_t n = 1; n <= 4; n++) {
6165 GemmMicrokernelTester()
6166 .mr(1)
6167 .nr(4)
6168 .kr(2)
6169 .sr(1)
6170 .m(1)
6171 .n(n)
6172 .k(8)
6173 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006174 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006175 }
6176 }
6177
6178 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
6179 TEST_REQUIRES_X86_SSE2;
6180 for (size_t k = 1; k < 8; k++) {
6181 GemmMicrokernelTester()
6182 .mr(1)
6183 .nr(4)
6184 .kr(2)
6185 .sr(1)
6186 .m(1)
6187 .n(4)
6188 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006189 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006190 }
6191 }
6192
6193 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_strided_a) {
6194 TEST_REQUIRES_X86_SSE2;
6195 for (size_t k = 1; k < 8; k++) {
6196 GemmMicrokernelTester()
6197 .mr(1)
6198 .nr(4)
6199 .kr(2)
6200 .sr(1)
6201 .m(1)
6202 .n(4)
6203 .k(k)
6204 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006205 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006206 }
6207 }
6208
6209 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
6210 TEST_REQUIRES_X86_SSE2;
6211 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006212 for (uint32_t n = 1; n <= 4; n++) {
6213 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006214 GemmMicrokernelTester()
6215 .mr(1)
6216 .nr(4)
6217 .kr(2)
6218 .sr(1)
6219 .m(m)
6220 .n(n)
6221 .k(k)
6222 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006224 }
6225 }
6226 }
6227 }
6228
6229 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
6230 TEST_REQUIRES_X86_SSE2;
6231 for (size_t k = 9; k < 16; k++) {
6232 GemmMicrokernelTester()
6233 .mr(1)
6234 .nr(4)
6235 .kr(2)
6236 .sr(1)
6237 .m(1)
6238 .n(4)
6239 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006240 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006241 }
6242 }
6243
6244 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_strided_a) {
6245 TEST_REQUIRES_X86_SSE2;
6246 for (size_t k = 9; k < 16; k++) {
6247 GemmMicrokernelTester()
6248 .mr(1)
6249 .nr(4)
6250 .kr(2)
6251 .sr(1)
6252 .m(1)
6253 .n(4)
6254 .k(k)
6255 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006256 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006257 }
6258 }
6259
6260 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
6261 TEST_REQUIRES_X86_SSE2;
6262 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006263 for (uint32_t n = 1; n <= 4; n++) {
6264 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006265 GemmMicrokernelTester()
6266 .mr(1)
6267 .nr(4)
6268 .kr(2)
6269 .sr(1)
6270 .m(m)
6271 .n(n)
6272 .k(k)
6273 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006274 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006275 }
6276 }
6277 }
6278 }
6279
6280 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
6281 TEST_REQUIRES_X86_SSE2;
6282 for (size_t k = 16; k <= 80; k += 8) {
6283 GemmMicrokernelTester()
6284 .mr(1)
6285 .nr(4)
6286 .kr(2)
6287 .sr(1)
6288 .m(1)
6289 .n(4)
6290 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006291 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006292 }
6293 }
6294
6295 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_strided_a) {
6296 TEST_REQUIRES_X86_SSE2;
6297 for (size_t k = 16; k <= 80; k += 8) {
6298 GemmMicrokernelTester()
6299 .mr(1)
6300 .nr(4)
6301 .kr(2)
6302 .sr(1)
6303 .m(1)
6304 .n(4)
6305 .k(k)
6306 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006307 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006308 }
6309 }
6310
6311 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
6312 TEST_REQUIRES_X86_SSE2;
6313 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006314 for (uint32_t n = 1; n <= 4; n++) {
6315 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006316 GemmMicrokernelTester()
6317 .mr(1)
6318 .nr(4)
6319 .kr(2)
6320 .sr(1)
6321 .m(m)
6322 .n(n)
6323 .k(k)
6324 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006325 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006326 }
6327 }
6328 }
6329 }
6330
6331 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
6332 TEST_REQUIRES_X86_SSE2;
6333 for (uint32_t n = 5; n < 8; n++) {
6334 for (size_t k = 1; k <= 40; k += 9) {
6335 GemmMicrokernelTester()
6336 .mr(1)
6337 .nr(4)
6338 .kr(2)
6339 .sr(1)
6340 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006341 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006342 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006343 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006344 }
6345 }
6346 }
6347
6348 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
6349 TEST_REQUIRES_X86_SSE2;
6350 for (uint32_t n = 5; n < 8; n++) {
6351 for (size_t k = 1; k <= 40; k += 9) {
6352 GemmMicrokernelTester()
6353 .mr(1)
6354 .nr(4)
6355 .kr(2)
6356 .sr(1)
6357 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006358 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006359 .k(k)
6360 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006361 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006362 }
6363 }
6364 }
6365
6366 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_a) {
6367 TEST_REQUIRES_X86_SSE2;
6368 for (uint32_t n = 5; n < 8; n++) {
6369 for (size_t k = 1; k <= 40; k += 9) {
6370 GemmMicrokernelTester()
6371 .mr(1)
6372 .nr(4)
6373 .kr(2)
6374 .sr(1)
6375 .m(1)
6376 .n(n)
6377 .k(k)
6378 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006379 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006380 }
6381 }
6382 }
6383
6384 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
6385 TEST_REQUIRES_X86_SSE2;
6386 for (uint32_t n = 5; n < 8; n++) {
6387 for (size_t k = 1; k <= 40; k += 9) {
6388 for (uint32_t m = 1; m <= 1; m++) {
6389 GemmMicrokernelTester()
6390 .mr(1)
6391 .nr(4)
6392 .kr(2)
6393 .sr(1)
6394 .m(m)
6395 .n(n)
6396 .k(k)
6397 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006398 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006399 }
6400 }
6401 }
6402 }
6403
6404 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
6405 TEST_REQUIRES_X86_SSE2;
6406 for (uint32_t n = 8; n <= 12; n += 4) {
6407 for (size_t k = 1; k <= 40; k += 9) {
6408 GemmMicrokernelTester()
6409 .mr(1)
6410 .nr(4)
6411 .kr(2)
6412 .sr(1)
6413 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006414 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006415 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006416 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006417 }
6418 }
6419 }
6420
6421 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
6422 TEST_REQUIRES_X86_SSE2;
6423 for (uint32_t n = 8; n <= 12; n += 4) {
6424 for (size_t k = 1; k <= 40; k += 9) {
6425 GemmMicrokernelTester()
6426 .mr(1)
6427 .nr(4)
6428 .kr(2)
6429 .sr(1)
6430 .m(1)
6431 .n(n)
6432 .k(k)
6433 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006434 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006435 }
6436 }
6437 }
6438
6439 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_a) {
6440 TEST_REQUIRES_X86_SSE2;
6441 for (uint32_t n = 8; n <= 12; n += 4) {
6442 for (size_t k = 1; k <= 40; k += 9) {
6443 GemmMicrokernelTester()
6444 .mr(1)
6445 .nr(4)
6446 .kr(2)
6447 .sr(1)
6448 .m(1)
6449 .n(n)
6450 .k(k)
6451 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006452 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006453 }
6454 }
6455 }
6456
6457 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
6458 TEST_REQUIRES_X86_SSE2;
6459 for (uint32_t n = 8; n <= 12; n += 4) {
6460 for (size_t k = 1; k <= 40; k += 9) {
6461 for (uint32_t m = 1; m <= 1; m++) {
6462 GemmMicrokernelTester()
6463 .mr(1)
6464 .nr(4)
6465 .kr(2)
6466 .sr(1)
6467 .m(m)
6468 .n(n)
6469 .k(k)
6470 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006471 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006472 }
6473 }
6474 }
6475 }
6476
6477 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
6478 TEST_REQUIRES_X86_SSE2;
6479 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006480 for (uint32_t n = 1; n <= 4; n++) {
6481 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006482 GemmMicrokernelTester()
6483 .mr(1)
6484 .nr(4)
6485 .kr(2)
6486 .sr(1)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(7)
6491 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006493 }
6494 }
6495 }
6496 }
6497
6498 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
6499 TEST_REQUIRES_X86_SSE2;
6500 GemmMicrokernelTester()
6501 .mr(1)
6502 .nr(4)
6503 .kr(2)
6504 .sr(1)
6505 .m(1)
6506 .n(4)
6507 .k(8)
6508 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006509 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006510 }
6511
6512 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
6513 TEST_REQUIRES_X86_SSE2;
6514 GemmMicrokernelTester()
6515 .mr(1)
6516 .nr(4)
6517 .kr(2)
6518 .sr(1)
6519 .m(1)
6520 .n(4)
6521 .k(8)
6522 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006523 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006524 }
6525
6526 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
6527 TEST_REQUIRES_X86_SSE2;
6528 GemmMicrokernelTester()
6529 .mr(1)
6530 .nr(4)
6531 .kr(2)
6532 .sr(1)
6533 .m(1)
6534 .n(4)
6535 .k(8)
6536 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006537 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006538 }
6539
6540 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_a_zero_point) {
6541 TEST_REQUIRES_X86_SSE2;
6542 for (size_t k = 1; k <= 40; k += 9) {
6543 GemmMicrokernelTester()
6544 .mr(1)
6545 .nr(4)
6546 .kr(2)
6547 .sr(1)
6548 .m(1)
6549 .n(4)
6550 .k(k)
6551 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006553 }
6554 }
6555
6556 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_b_zero_point) {
6557 TEST_REQUIRES_X86_SSE2;
6558 for (size_t k = 1; k <= 40; k += 9) {
6559 GemmMicrokernelTester()
6560 .mr(1)
6561 .nr(4)
6562 .kr(2)
6563 .sr(1)
6564 .m(1)
6565 .n(4)
6566 .k(k)
6567 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006568 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006569 }
6570 }
6571
6572 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_zero_point) {
6573 TEST_REQUIRES_X86_SSE2;
6574 for (size_t k = 1; k <= 40; k += 9) {
6575 GemmMicrokernelTester()
6576 .mr(1)
6577 .nr(4)
6578 .kr(2)
6579 .sr(1)
6580 .m(1)
6581 .n(4)
6582 .k(k)
6583 .a_zero_point(0)
6584 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006585 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006586 }
6587 }
6588#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6589
6590
6591#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6592 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
6593 TEST_REQUIRES_X86_SSE2;
6594 GemmMicrokernelTester()
6595 .mr(2)
6596 .nr(4)
6597 .kr(2)
6598 .sr(1)
6599 .m(2)
6600 .n(4)
6601 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006603 }
6604
6605 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
6606 TEST_REQUIRES_X86_SSE2;
6607 GemmMicrokernelTester()
6608 .mr(2)
6609 .nr(4)
6610 .kr(2)
6611 .sr(1)
6612 .m(2)
6613 .n(4)
6614 .k(8)
6615 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006616 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006617 }
6618
6619 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_strided_a) {
6620 TEST_REQUIRES_X86_SSE2;
6621 GemmMicrokernelTester()
6622 .mr(2)
6623 .nr(4)
6624 .kr(2)
6625 .sr(1)
6626 .m(2)
6627 .n(4)
6628 .k(8)
6629 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006630 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006631 }
6632
6633 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
6634 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006635 for (uint32_t n = 1; n <= 4; n++) {
6636 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006637 GemmMicrokernelTester()
6638 .mr(2)
6639 .nr(4)
6640 .kr(2)
6641 .sr(1)
6642 .m(m)
6643 .n(n)
6644 .k(8)
6645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006646 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006647 }
6648 }
6649 }
6650
6651 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
6652 TEST_REQUIRES_X86_SSE2;
6653 for (uint32_t m = 1; m <= 2; m++) {
6654 GemmMicrokernelTester()
6655 .mr(2)
6656 .nr(4)
6657 .kr(2)
6658 .sr(1)
6659 .m(m)
6660 .n(4)
6661 .k(8)
6662 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006663 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006664 }
6665 }
6666
6667 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
6668 TEST_REQUIRES_X86_SSE2;
6669 for (uint32_t n = 1; n <= 4; n++) {
6670 GemmMicrokernelTester()
6671 .mr(2)
6672 .nr(4)
6673 .kr(2)
6674 .sr(1)
6675 .m(2)
6676 .n(n)
6677 .k(8)
6678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006679 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006680 }
6681 }
6682
6683 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
6684 TEST_REQUIRES_X86_SSE2;
6685 for (size_t k = 1; k < 8; k++) {
6686 GemmMicrokernelTester()
6687 .mr(2)
6688 .nr(4)
6689 .kr(2)
6690 .sr(1)
6691 .m(2)
6692 .n(4)
6693 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006694 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006695 }
6696 }
6697
6698 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_strided_a) {
6699 TEST_REQUIRES_X86_SSE2;
6700 for (size_t k = 1; k < 8; k++) {
6701 GemmMicrokernelTester()
6702 .mr(2)
6703 .nr(4)
6704 .kr(2)
6705 .sr(1)
6706 .m(2)
6707 .n(4)
6708 .k(k)
6709 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006710 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006711 }
6712 }
6713
6714 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
6715 TEST_REQUIRES_X86_SSE2;
6716 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006717 for (uint32_t n = 1; n <= 4; n++) {
6718 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006719 GemmMicrokernelTester()
6720 .mr(2)
6721 .nr(4)
6722 .kr(2)
6723 .sr(1)
6724 .m(m)
6725 .n(n)
6726 .k(k)
6727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006728 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006729 }
6730 }
6731 }
6732 }
6733
6734 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
6735 TEST_REQUIRES_X86_SSE2;
6736 for (size_t k = 9; k < 16; k++) {
6737 GemmMicrokernelTester()
6738 .mr(2)
6739 .nr(4)
6740 .kr(2)
6741 .sr(1)
6742 .m(2)
6743 .n(4)
6744 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006745 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006746 }
6747 }
6748
6749 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_strided_a) {
6750 TEST_REQUIRES_X86_SSE2;
6751 for (size_t k = 9; k < 16; k++) {
6752 GemmMicrokernelTester()
6753 .mr(2)
6754 .nr(4)
6755 .kr(2)
6756 .sr(1)
6757 .m(2)
6758 .n(4)
6759 .k(k)
6760 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006761 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006762 }
6763 }
6764
6765 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
6766 TEST_REQUIRES_X86_SSE2;
6767 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006768 for (uint32_t n = 1; n <= 4; n++) {
6769 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006770 GemmMicrokernelTester()
6771 .mr(2)
6772 .nr(4)
6773 .kr(2)
6774 .sr(1)
6775 .m(m)
6776 .n(n)
6777 .k(k)
6778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006779 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006780 }
6781 }
6782 }
6783 }
6784
6785 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
6786 TEST_REQUIRES_X86_SSE2;
6787 for (size_t k = 16; k <= 80; k += 8) {
6788 GemmMicrokernelTester()
6789 .mr(2)
6790 .nr(4)
6791 .kr(2)
6792 .sr(1)
6793 .m(2)
6794 .n(4)
6795 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006796 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006797 }
6798 }
6799
6800 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_strided_a) {
6801 TEST_REQUIRES_X86_SSE2;
6802 for (size_t k = 16; k <= 80; k += 8) {
6803 GemmMicrokernelTester()
6804 .mr(2)
6805 .nr(4)
6806 .kr(2)
6807 .sr(1)
6808 .m(2)
6809 .n(4)
6810 .k(k)
6811 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006812 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006813 }
6814 }
6815
6816 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
6817 TEST_REQUIRES_X86_SSE2;
6818 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006819 for (uint32_t n = 1; n <= 4; n++) {
6820 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006821 GemmMicrokernelTester()
6822 .mr(2)
6823 .nr(4)
6824 .kr(2)
6825 .sr(1)
6826 .m(m)
6827 .n(n)
6828 .k(k)
6829 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006830 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006831 }
6832 }
6833 }
6834 }
6835
6836 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
6837 TEST_REQUIRES_X86_SSE2;
6838 for (uint32_t n = 5; n < 8; n++) {
6839 for (size_t k = 1; k <= 40; k += 9) {
6840 GemmMicrokernelTester()
6841 .mr(2)
6842 .nr(4)
6843 .kr(2)
6844 .sr(1)
6845 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006846 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006847 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006848 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006849 }
6850 }
6851 }
6852
6853 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
6854 TEST_REQUIRES_X86_SSE2;
6855 for (uint32_t n = 5; n < 8; n++) {
6856 for (size_t k = 1; k <= 40; k += 9) {
6857 GemmMicrokernelTester()
6858 .mr(2)
6859 .nr(4)
6860 .kr(2)
6861 .sr(1)
6862 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006863 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006864 .k(k)
6865 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006866 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006867 }
6868 }
6869 }
6870
6871 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_a) {
6872 TEST_REQUIRES_X86_SSE2;
6873 for (uint32_t n = 5; n < 8; n++) {
6874 for (size_t k = 1; k <= 40; k += 9) {
6875 GemmMicrokernelTester()
6876 .mr(2)
6877 .nr(4)
6878 .kr(2)
6879 .sr(1)
6880 .m(2)
6881 .n(n)
6882 .k(k)
6883 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006884 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006885 }
6886 }
6887 }
6888
6889 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
6890 TEST_REQUIRES_X86_SSE2;
6891 for (uint32_t n = 5; n < 8; n++) {
6892 for (size_t k = 1; k <= 40; k += 9) {
6893 for (uint32_t m = 1; m <= 2; m++) {
6894 GemmMicrokernelTester()
6895 .mr(2)
6896 .nr(4)
6897 .kr(2)
6898 .sr(1)
6899 .m(m)
6900 .n(n)
6901 .k(k)
6902 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006903 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006904 }
6905 }
6906 }
6907 }
6908
6909 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
6910 TEST_REQUIRES_X86_SSE2;
6911 for (uint32_t n = 8; n <= 12; n += 4) {
6912 for (size_t k = 1; k <= 40; k += 9) {
6913 GemmMicrokernelTester()
6914 .mr(2)
6915 .nr(4)
6916 .kr(2)
6917 .sr(1)
6918 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006919 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006920 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006922 }
6923 }
6924 }
6925
6926 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
6927 TEST_REQUIRES_X86_SSE2;
6928 for (uint32_t n = 8; n <= 12; n += 4) {
6929 for (size_t k = 1; k <= 40; k += 9) {
6930 GemmMicrokernelTester()
6931 .mr(2)
6932 .nr(4)
6933 .kr(2)
6934 .sr(1)
6935 .m(2)
6936 .n(n)
6937 .k(k)
6938 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006939 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006940 }
6941 }
6942 }
6943
6944 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_a) {
6945 TEST_REQUIRES_X86_SSE2;
6946 for (uint32_t n = 8; n <= 12; n += 4) {
6947 for (size_t k = 1; k <= 40; k += 9) {
6948 GemmMicrokernelTester()
6949 .mr(2)
6950 .nr(4)
6951 .kr(2)
6952 .sr(1)
6953 .m(2)
6954 .n(n)
6955 .k(k)
6956 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006957 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006958 }
6959 }
6960 }
6961
6962 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
6963 TEST_REQUIRES_X86_SSE2;
6964 for (uint32_t n = 8; n <= 12; n += 4) {
6965 for (size_t k = 1; k <= 40; k += 9) {
6966 for (uint32_t m = 1; m <= 2; m++) {
6967 GemmMicrokernelTester()
6968 .mr(2)
6969 .nr(4)
6970 .kr(2)
6971 .sr(1)
6972 .m(m)
6973 .n(n)
6974 .k(k)
6975 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006976 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006977 }
6978 }
6979 }
6980 }
6981
6982 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
6983 TEST_REQUIRES_X86_SSE2;
6984 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006985 for (uint32_t n = 1; n <= 4; n++) {
6986 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006987 GemmMicrokernelTester()
6988 .mr(2)
6989 .nr(4)
6990 .kr(2)
6991 .sr(1)
6992 .m(m)
6993 .n(n)
6994 .k(k)
6995 .cm_stride(7)
6996 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006997 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006998 }
6999 }
7000 }
7001 }
7002
7003 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
7004 TEST_REQUIRES_X86_SSE2;
7005 GemmMicrokernelTester()
7006 .mr(2)
7007 .nr(4)
7008 .kr(2)
7009 .sr(1)
7010 .m(2)
7011 .n(4)
7012 .k(8)
7013 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007014 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007015 }
7016
7017 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
7018 TEST_REQUIRES_X86_SSE2;
7019 GemmMicrokernelTester()
7020 .mr(2)
7021 .nr(4)
7022 .kr(2)
7023 .sr(1)
7024 .m(2)
7025 .n(4)
7026 .k(8)
7027 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007028 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007029 }
7030
7031 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
7032 TEST_REQUIRES_X86_SSE2;
7033 GemmMicrokernelTester()
7034 .mr(2)
7035 .nr(4)
7036 .kr(2)
7037 .sr(1)
7038 .m(2)
7039 .n(4)
7040 .k(8)
7041 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007042 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007043 }
7044
7045 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_a_zero_point) {
7046 TEST_REQUIRES_X86_SSE2;
7047 for (size_t k = 1; k <= 40; k += 9) {
7048 GemmMicrokernelTester()
7049 .mr(2)
7050 .nr(4)
7051 .kr(2)
7052 .sr(1)
7053 .m(2)
7054 .n(4)
7055 .k(k)
7056 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007058 }
7059 }
7060
7061 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_b_zero_point) {
7062 TEST_REQUIRES_X86_SSE2;
7063 for (size_t k = 1; k <= 40; k += 9) {
7064 GemmMicrokernelTester()
7065 .mr(2)
7066 .nr(4)
7067 .kr(2)
7068 .sr(1)
7069 .m(2)
7070 .n(4)
7071 .k(k)
7072 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007073 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007074 }
7075 }
7076
7077 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_zero_point) {
7078 TEST_REQUIRES_X86_SSE2;
7079 for (size_t k = 1; k <= 40; k += 9) {
7080 GemmMicrokernelTester()
7081 .mr(2)
7082 .nr(4)
7083 .kr(2)
7084 .sr(1)
7085 .m(2)
7086 .n(4)
7087 .k(k)
7088 .a_zero_point(0)
7089 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007090 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007091 }
7092 }
7093#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7094
7095
7096#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7097 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
7098 TEST_REQUIRES_X86_SSE2;
7099 GemmMicrokernelTester()
7100 .mr(4)
7101 .nr(4)
7102 .kr(2)
7103 .sr(1)
7104 .m(4)
7105 .n(4)
7106 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007107 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007108 }
7109
7110 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
7111 TEST_REQUIRES_X86_SSE2;
7112 GemmMicrokernelTester()
7113 .mr(4)
7114 .nr(4)
7115 .kr(2)
7116 .sr(1)
7117 .m(4)
7118 .n(4)
7119 .k(8)
7120 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007121 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007122 }
7123
7124 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_strided_a) {
7125 TEST_REQUIRES_X86_SSE2;
7126 GemmMicrokernelTester()
7127 .mr(4)
7128 .nr(4)
7129 .kr(2)
7130 .sr(1)
7131 .m(4)
7132 .n(4)
7133 .k(8)
7134 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007135 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007136 }
7137
7138 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
7139 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007140 for (uint32_t n = 1; n <= 4; n++) {
7141 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007142 GemmMicrokernelTester()
7143 .mr(4)
7144 .nr(4)
7145 .kr(2)
7146 .sr(1)
7147 .m(m)
7148 .n(n)
7149 .k(8)
7150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007151 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007152 }
7153 }
7154 }
7155
7156 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
7157 TEST_REQUIRES_X86_SSE2;
7158 for (uint32_t m = 1; m <= 4; m++) {
7159 GemmMicrokernelTester()
7160 .mr(4)
7161 .nr(4)
7162 .kr(2)
7163 .sr(1)
7164 .m(m)
7165 .n(4)
7166 .k(8)
7167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007168 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007169 }
7170 }
7171
7172 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
7173 TEST_REQUIRES_X86_SSE2;
7174 for (uint32_t n = 1; n <= 4; n++) {
7175 GemmMicrokernelTester()
7176 .mr(4)
7177 .nr(4)
7178 .kr(2)
7179 .sr(1)
7180 .m(4)
7181 .n(n)
7182 .k(8)
7183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007184 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007185 }
7186 }
7187
7188 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
7189 TEST_REQUIRES_X86_SSE2;
7190 for (size_t k = 1; k < 8; k++) {
7191 GemmMicrokernelTester()
7192 .mr(4)
7193 .nr(4)
7194 .kr(2)
7195 .sr(1)
7196 .m(4)
7197 .n(4)
7198 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007199 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007200 }
7201 }
7202
7203 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_strided_a) {
7204 TEST_REQUIRES_X86_SSE2;
7205 for (size_t k = 1; k < 8; k++) {
7206 GemmMicrokernelTester()
7207 .mr(4)
7208 .nr(4)
7209 .kr(2)
7210 .sr(1)
7211 .m(4)
7212 .n(4)
7213 .k(k)
7214 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007215 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007216 }
7217 }
7218
7219 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
7220 TEST_REQUIRES_X86_SSE2;
7221 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007222 for (uint32_t n = 1; n <= 4; n++) {
7223 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007224 GemmMicrokernelTester()
7225 .mr(4)
7226 .nr(4)
7227 .kr(2)
7228 .sr(1)
7229 .m(m)
7230 .n(n)
7231 .k(k)
7232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007233 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007234 }
7235 }
7236 }
7237 }
7238
7239 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
7240 TEST_REQUIRES_X86_SSE2;
7241 for (size_t k = 9; k < 16; k++) {
7242 GemmMicrokernelTester()
7243 .mr(4)
7244 .nr(4)
7245 .kr(2)
7246 .sr(1)
7247 .m(4)
7248 .n(4)
7249 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007250 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007251 }
7252 }
7253
7254 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_strided_a) {
7255 TEST_REQUIRES_X86_SSE2;
7256 for (size_t k = 9; k < 16; k++) {
7257 GemmMicrokernelTester()
7258 .mr(4)
7259 .nr(4)
7260 .kr(2)
7261 .sr(1)
7262 .m(4)
7263 .n(4)
7264 .k(k)
7265 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007266 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007267 }
7268 }
7269
7270 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
7271 TEST_REQUIRES_X86_SSE2;
7272 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007273 for (uint32_t n = 1; n <= 4; n++) {
7274 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007275 GemmMicrokernelTester()
7276 .mr(4)
7277 .nr(4)
7278 .kr(2)
7279 .sr(1)
7280 .m(m)
7281 .n(n)
7282 .k(k)
7283 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007284 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007285 }
7286 }
7287 }
7288 }
7289
7290 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
7291 TEST_REQUIRES_X86_SSE2;
7292 for (size_t k = 16; k <= 80; k += 8) {
7293 GemmMicrokernelTester()
7294 .mr(4)
7295 .nr(4)
7296 .kr(2)
7297 .sr(1)
7298 .m(4)
7299 .n(4)
7300 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007301 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007302 }
7303 }
7304
7305 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_strided_a) {
7306 TEST_REQUIRES_X86_SSE2;
7307 for (size_t k = 16; k <= 80; k += 8) {
7308 GemmMicrokernelTester()
7309 .mr(4)
7310 .nr(4)
7311 .kr(2)
7312 .sr(1)
7313 .m(4)
7314 .n(4)
7315 .k(k)
7316 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007317 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007318 }
7319 }
7320
7321 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
7322 TEST_REQUIRES_X86_SSE2;
7323 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007324 for (uint32_t n = 1; n <= 4; n++) {
7325 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007326 GemmMicrokernelTester()
7327 .mr(4)
7328 .nr(4)
7329 .kr(2)
7330 .sr(1)
7331 .m(m)
7332 .n(n)
7333 .k(k)
7334 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007335 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007336 }
7337 }
7338 }
7339 }
7340
7341 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
7342 TEST_REQUIRES_X86_SSE2;
7343 for (uint32_t n = 5; n < 8; n++) {
7344 for (size_t k = 1; k <= 40; k += 9) {
7345 GemmMicrokernelTester()
7346 .mr(4)
7347 .nr(4)
7348 .kr(2)
7349 .sr(1)
7350 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007351 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007353 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007354 }
7355 }
7356 }
7357
7358 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
7359 TEST_REQUIRES_X86_SSE2;
7360 for (uint32_t n = 5; n < 8; n++) {
7361 for (size_t k = 1; k <= 40; k += 9) {
7362 GemmMicrokernelTester()
7363 .mr(4)
7364 .nr(4)
7365 .kr(2)
7366 .sr(1)
7367 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007368 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007369 .k(k)
7370 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007371 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007372 }
7373 }
7374 }
7375
7376 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_a) {
7377 TEST_REQUIRES_X86_SSE2;
7378 for (uint32_t n = 5; n < 8; n++) {
7379 for (size_t k = 1; k <= 40; k += 9) {
7380 GemmMicrokernelTester()
7381 .mr(4)
7382 .nr(4)
7383 .kr(2)
7384 .sr(1)
7385 .m(4)
7386 .n(n)
7387 .k(k)
7388 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007389 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007390 }
7391 }
7392 }
7393
7394 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
7395 TEST_REQUIRES_X86_SSE2;
7396 for (uint32_t n = 5; n < 8; n++) {
7397 for (size_t k = 1; k <= 40; k += 9) {
7398 for (uint32_t m = 1; m <= 4; m++) {
7399 GemmMicrokernelTester()
7400 .mr(4)
7401 .nr(4)
7402 .kr(2)
7403 .sr(1)
7404 .m(m)
7405 .n(n)
7406 .k(k)
7407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007408 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007409 }
7410 }
7411 }
7412 }
7413
7414 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
7415 TEST_REQUIRES_X86_SSE2;
7416 for (uint32_t n = 8; n <= 12; n += 4) {
7417 for (size_t k = 1; k <= 40; k += 9) {
7418 GemmMicrokernelTester()
7419 .mr(4)
7420 .nr(4)
7421 .kr(2)
7422 .sr(1)
7423 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007424 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007425 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007426 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007427 }
7428 }
7429 }
7430
7431 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
7432 TEST_REQUIRES_X86_SSE2;
7433 for (uint32_t n = 8; n <= 12; n += 4) {
7434 for (size_t k = 1; k <= 40; k += 9) {
7435 GemmMicrokernelTester()
7436 .mr(4)
7437 .nr(4)
7438 .kr(2)
7439 .sr(1)
7440 .m(4)
7441 .n(n)
7442 .k(k)
7443 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007444 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007445 }
7446 }
7447 }
7448
7449 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_a) {
7450 TEST_REQUIRES_X86_SSE2;
7451 for (uint32_t n = 8; n <= 12; n += 4) {
7452 for (size_t k = 1; k <= 40; k += 9) {
7453 GemmMicrokernelTester()
7454 .mr(4)
7455 .nr(4)
7456 .kr(2)
7457 .sr(1)
7458 .m(4)
7459 .n(n)
7460 .k(k)
7461 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007462 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007463 }
7464 }
7465 }
7466
7467 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
7468 TEST_REQUIRES_X86_SSE2;
7469 for (uint32_t n = 8; n <= 12; n += 4) {
7470 for (size_t k = 1; k <= 40; k += 9) {
7471 for (uint32_t m = 1; m <= 4; m++) {
7472 GemmMicrokernelTester()
7473 .mr(4)
7474 .nr(4)
7475 .kr(2)
7476 .sr(1)
7477 .m(m)
7478 .n(n)
7479 .k(k)
7480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007481 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007482 }
7483 }
7484 }
7485 }
7486
7487 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
7488 TEST_REQUIRES_X86_SSE2;
7489 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007490 for (uint32_t n = 1; n <= 4; n++) {
7491 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007492 GemmMicrokernelTester()
7493 .mr(4)
7494 .nr(4)
7495 .kr(2)
7496 .sr(1)
7497 .m(m)
7498 .n(n)
7499 .k(k)
7500 .cm_stride(7)
7501 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007502 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007503 }
7504 }
7505 }
7506 }
7507
7508 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
7509 TEST_REQUIRES_X86_SSE2;
7510 GemmMicrokernelTester()
7511 .mr(4)
7512 .nr(4)
7513 .kr(2)
7514 .sr(1)
7515 .m(4)
7516 .n(4)
7517 .k(8)
7518 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007519 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007520 }
7521
7522 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
7523 TEST_REQUIRES_X86_SSE2;
7524 GemmMicrokernelTester()
7525 .mr(4)
7526 .nr(4)
7527 .kr(2)
7528 .sr(1)
7529 .m(4)
7530 .n(4)
7531 .k(8)
7532 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007534 }
7535
7536 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
7537 TEST_REQUIRES_X86_SSE2;
7538 GemmMicrokernelTester()
7539 .mr(4)
7540 .nr(4)
7541 .kr(2)
7542 .sr(1)
7543 .m(4)
7544 .n(4)
7545 .k(8)
7546 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007547 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007548 }
7549
7550 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_a_zero_point) {
7551 TEST_REQUIRES_X86_SSE2;
7552 for (size_t k = 1; k <= 40; k += 9) {
7553 GemmMicrokernelTester()
7554 .mr(4)
7555 .nr(4)
7556 .kr(2)
7557 .sr(1)
7558 .m(4)
7559 .n(4)
7560 .k(k)
7561 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007563 }
7564 }
7565
7566 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_b_zero_point) {
7567 TEST_REQUIRES_X86_SSE2;
7568 for (size_t k = 1; k <= 40; k += 9) {
7569 GemmMicrokernelTester()
7570 .mr(4)
7571 .nr(4)
7572 .kr(2)
7573 .sr(1)
7574 .m(4)
7575 .n(4)
7576 .k(k)
7577 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007578 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007579 }
7580 }
7581
7582 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_zero_point) {
7583 TEST_REQUIRES_X86_SSE2;
7584 for (size_t k = 1; k <= 40; k += 9) {
7585 GemmMicrokernelTester()
7586 .mr(4)
7587 .nr(4)
7588 .kr(2)
7589 .sr(1)
7590 .m(4)
7591 .n(4)
7592 .k(k)
7593 .a_zero_point(0)
7594 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007595 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007596 }
7597 }
7598#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7599
7600
7601#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7602 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
7603 TEST_REQUIRES_X86_SSE41;
7604 GemmMicrokernelTester()
7605 .mr(3)
7606 .nr(4)
7607 .kr(2)
7608 .sr(1)
7609 .m(3)
7610 .n(4)
7611 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007612 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007613 }
7614
7615 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
7616 TEST_REQUIRES_X86_SSE41;
7617 GemmMicrokernelTester()
7618 .mr(3)
7619 .nr(4)
7620 .kr(2)
7621 .sr(1)
7622 .m(3)
7623 .n(4)
7624 .k(8)
7625 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007626 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007627 }
7628
7629 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
7630 TEST_REQUIRES_X86_SSE41;
7631 GemmMicrokernelTester()
7632 .mr(3)
7633 .nr(4)
7634 .kr(2)
7635 .sr(1)
7636 .m(3)
7637 .n(4)
7638 .k(8)
7639 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007640 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007641 }
7642
7643 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
7644 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007645 for (uint32_t n = 1; n <= 4; n++) {
7646 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007647 GemmMicrokernelTester()
7648 .mr(3)
7649 .nr(4)
7650 .kr(2)
7651 .sr(1)
7652 .m(m)
7653 .n(n)
7654 .k(8)
7655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007656 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007657 }
7658 }
7659 }
7660
7661 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
7662 TEST_REQUIRES_X86_SSE41;
7663 for (uint32_t m = 1; m <= 3; m++) {
7664 GemmMicrokernelTester()
7665 .mr(3)
7666 .nr(4)
7667 .kr(2)
7668 .sr(1)
7669 .m(m)
7670 .n(4)
7671 .k(8)
7672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007673 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007674 }
7675 }
7676
7677 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
7678 TEST_REQUIRES_X86_SSE41;
7679 for (uint32_t n = 1; n <= 4; n++) {
7680 GemmMicrokernelTester()
7681 .mr(3)
7682 .nr(4)
7683 .kr(2)
7684 .sr(1)
7685 .m(3)
7686 .n(n)
7687 .k(8)
7688 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007689 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007690 }
7691 }
7692
7693 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
7694 TEST_REQUIRES_X86_SSE41;
7695 for (size_t k = 1; k < 8; k++) {
7696 GemmMicrokernelTester()
7697 .mr(3)
7698 .nr(4)
7699 .kr(2)
7700 .sr(1)
7701 .m(3)
7702 .n(4)
7703 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007704 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007705 }
7706 }
7707
7708 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_strided_a) {
7709 TEST_REQUIRES_X86_SSE41;
7710 for (size_t k = 1; k < 8; k++) {
7711 GemmMicrokernelTester()
7712 .mr(3)
7713 .nr(4)
7714 .kr(2)
7715 .sr(1)
7716 .m(3)
7717 .n(4)
7718 .k(k)
7719 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007720 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007721 }
7722 }
7723
7724 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
7725 TEST_REQUIRES_X86_SSE41;
7726 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007727 for (uint32_t n = 1; n <= 4; n++) {
7728 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007729 GemmMicrokernelTester()
7730 .mr(3)
7731 .nr(4)
7732 .kr(2)
7733 .sr(1)
7734 .m(m)
7735 .n(n)
7736 .k(k)
7737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007738 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007739 }
7740 }
7741 }
7742 }
7743
7744 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
7745 TEST_REQUIRES_X86_SSE41;
7746 for (size_t k = 9; k < 16; k++) {
7747 GemmMicrokernelTester()
7748 .mr(3)
7749 .nr(4)
7750 .kr(2)
7751 .sr(1)
7752 .m(3)
7753 .n(4)
7754 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007755 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007756 }
7757 }
7758
7759 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_strided_a) {
7760 TEST_REQUIRES_X86_SSE41;
7761 for (size_t k = 9; k < 16; k++) {
7762 GemmMicrokernelTester()
7763 .mr(3)
7764 .nr(4)
7765 .kr(2)
7766 .sr(1)
7767 .m(3)
7768 .n(4)
7769 .k(k)
7770 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007771 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007772 }
7773 }
7774
7775 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
7776 TEST_REQUIRES_X86_SSE41;
7777 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007778 for (uint32_t n = 1; n <= 4; n++) {
7779 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007780 GemmMicrokernelTester()
7781 .mr(3)
7782 .nr(4)
7783 .kr(2)
7784 .sr(1)
7785 .m(m)
7786 .n(n)
7787 .k(k)
7788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007790 }
7791 }
7792 }
7793 }
7794
7795 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
7796 TEST_REQUIRES_X86_SSE41;
7797 for (size_t k = 16; k <= 80; k += 8) {
7798 GemmMicrokernelTester()
7799 .mr(3)
7800 .nr(4)
7801 .kr(2)
7802 .sr(1)
7803 .m(3)
7804 .n(4)
7805 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007806 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007807 }
7808 }
7809
7810 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_strided_a) {
7811 TEST_REQUIRES_X86_SSE41;
7812 for (size_t k = 16; k <= 80; k += 8) {
7813 GemmMicrokernelTester()
7814 .mr(3)
7815 .nr(4)
7816 .kr(2)
7817 .sr(1)
7818 .m(3)
7819 .n(4)
7820 .k(k)
7821 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007822 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007823 }
7824 }
7825
7826 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
7827 TEST_REQUIRES_X86_SSE41;
7828 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007829 for (uint32_t n = 1; n <= 4; n++) {
7830 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007831 GemmMicrokernelTester()
7832 .mr(3)
7833 .nr(4)
7834 .kr(2)
7835 .sr(1)
7836 .m(m)
7837 .n(n)
7838 .k(k)
7839 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007840 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007841 }
7842 }
7843 }
7844 }
7845
7846 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
7847 TEST_REQUIRES_X86_SSE41;
7848 for (uint32_t n = 5; n < 8; n++) {
7849 for (size_t k = 1; k <= 40; k += 9) {
7850 GemmMicrokernelTester()
7851 .mr(3)
7852 .nr(4)
7853 .kr(2)
7854 .sr(1)
7855 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007856 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007857 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007858 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007859 }
7860 }
7861 }
7862
7863 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
7864 TEST_REQUIRES_X86_SSE41;
7865 for (uint32_t n = 5; n < 8; n++) {
7866 for (size_t k = 1; k <= 40; k += 9) {
7867 GemmMicrokernelTester()
7868 .mr(3)
7869 .nr(4)
7870 .kr(2)
7871 .sr(1)
7872 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007873 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007874 .k(k)
7875 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007876 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007877 }
7878 }
7879 }
7880
7881 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_a) {
7882 TEST_REQUIRES_X86_SSE41;
7883 for (uint32_t n = 5; n < 8; n++) {
7884 for (size_t k = 1; k <= 40; k += 9) {
7885 GemmMicrokernelTester()
7886 .mr(3)
7887 .nr(4)
7888 .kr(2)
7889 .sr(1)
7890 .m(3)
7891 .n(n)
7892 .k(k)
7893 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007894 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007895 }
7896 }
7897 }
7898
7899 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
7900 TEST_REQUIRES_X86_SSE41;
7901 for (uint32_t n = 5; n < 8; n++) {
7902 for (size_t k = 1; k <= 40; k += 9) {
7903 for (uint32_t m = 1; m <= 3; m++) {
7904 GemmMicrokernelTester()
7905 .mr(3)
7906 .nr(4)
7907 .kr(2)
7908 .sr(1)
7909 .m(m)
7910 .n(n)
7911 .k(k)
7912 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007913 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007914 }
7915 }
7916 }
7917 }
7918
7919 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
7920 TEST_REQUIRES_X86_SSE41;
7921 for (uint32_t n = 8; n <= 12; n += 4) {
7922 for (size_t k = 1; k <= 40; k += 9) {
7923 GemmMicrokernelTester()
7924 .mr(3)
7925 .nr(4)
7926 .kr(2)
7927 .sr(1)
7928 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007929 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007930 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007931 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007932 }
7933 }
7934 }
7935
7936 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
7937 TEST_REQUIRES_X86_SSE41;
7938 for (uint32_t n = 8; n <= 12; n += 4) {
7939 for (size_t k = 1; k <= 40; k += 9) {
7940 GemmMicrokernelTester()
7941 .mr(3)
7942 .nr(4)
7943 .kr(2)
7944 .sr(1)
7945 .m(3)
7946 .n(n)
7947 .k(k)
7948 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007949 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007950 }
7951 }
7952 }
7953
7954 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_a) {
7955 TEST_REQUIRES_X86_SSE41;
7956 for (uint32_t n = 8; n <= 12; n += 4) {
7957 for (size_t k = 1; k <= 40; k += 9) {
7958 GemmMicrokernelTester()
7959 .mr(3)
7960 .nr(4)
7961 .kr(2)
7962 .sr(1)
7963 .m(3)
7964 .n(n)
7965 .k(k)
7966 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007968 }
7969 }
7970 }
7971
7972 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
7973 TEST_REQUIRES_X86_SSE41;
7974 for (uint32_t n = 8; n <= 12; n += 4) {
7975 for (size_t k = 1; k <= 40; k += 9) {
7976 for (uint32_t m = 1; m <= 3; m++) {
7977 GemmMicrokernelTester()
7978 .mr(3)
7979 .nr(4)
7980 .kr(2)
7981 .sr(1)
7982 .m(m)
7983 .n(n)
7984 .k(k)
7985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007986 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007987 }
7988 }
7989 }
7990 }
7991
7992 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
7993 TEST_REQUIRES_X86_SSE41;
7994 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007995 for (uint32_t n = 1; n <= 4; n++) {
7996 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007997 GemmMicrokernelTester()
7998 .mr(3)
7999 .nr(4)
8000 .kr(2)
8001 .sr(1)
8002 .m(m)
8003 .n(n)
8004 .k(k)
8005 .cm_stride(7)
8006 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008007 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008008 }
8009 }
8010 }
8011 }
8012
8013 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
8014 TEST_REQUIRES_X86_SSE41;
8015 GemmMicrokernelTester()
8016 .mr(3)
8017 .nr(4)
8018 .kr(2)
8019 .sr(1)
8020 .m(3)
8021 .n(4)
8022 .k(8)
8023 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008024 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008025 }
8026
8027 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
8028 TEST_REQUIRES_X86_SSE41;
8029 GemmMicrokernelTester()
8030 .mr(3)
8031 .nr(4)
8032 .kr(2)
8033 .sr(1)
8034 .m(3)
8035 .n(4)
8036 .k(8)
8037 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008038 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008039 }
8040
8041 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
8042 TEST_REQUIRES_X86_SSE41;
8043 GemmMicrokernelTester()
8044 .mr(3)
8045 .nr(4)
8046 .kr(2)
8047 .sr(1)
8048 .m(3)
8049 .n(4)
8050 .k(8)
8051 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008053 }
8054
8055 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_a_zero_point) {
8056 TEST_REQUIRES_X86_SSE41;
8057 for (size_t k = 1; k <= 40; k += 9) {
8058 GemmMicrokernelTester()
8059 .mr(3)
8060 .nr(4)
8061 .kr(2)
8062 .sr(1)
8063 .m(3)
8064 .n(4)
8065 .k(k)
8066 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008068 }
8069 }
8070
8071 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_b_zero_point) {
8072 TEST_REQUIRES_X86_SSE41;
8073 for (size_t k = 1; k <= 40; k += 9) {
8074 GemmMicrokernelTester()
8075 .mr(3)
8076 .nr(4)
8077 .kr(2)
8078 .sr(1)
8079 .m(3)
8080 .n(4)
8081 .k(k)
8082 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008083 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008084 }
8085 }
8086
8087 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_zero_point) {
8088 TEST_REQUIRES_X86_SSE41;
8089 for (size_t k = 1; k <= 40; k += 9) {
8090 GemmMicrokernelTester()
8091 .mr(3)
8092 .nr(4)
8093 .kr(2)
8094 .sr(1)
8095 .m(3)
8096 .n(4)
8097 .k(k)
8098 .a_zero_point(0)
8099 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008100 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008101 }
8102 }
8103#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8104
8105
8106#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8107 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
8108 TEST_REQUIRES_X86_SSE41;
8109 GemmMicrokernelTester()
8110 .mr(4)
8111 .nr(4)
8112 .kr(2)
8113 .sr(1)
8114 .m(4)
8115 .n(4)
8116 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008117 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008118 }
8119
8120 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
8121 TEST_REQUIRES_X86_SSE41;
8122 GemmMicrokernelTester()
8123 .mr(4)
8124 .nr(4)
8125 .kr(2)
8126 .sr(1)
8127 .m(4)
8128 .n(4)
8129 .k(8)
8130 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008131 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008132 }
8133
8134 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
8135 TEST_REQUIRES_X86_SSE41;
8136 GemmMicrokernelTester()
8137 .mr(4)
8138 .nr(4)
8139 .kr(2)
8140 .sr(1)
8141 .m(4)
8142 .n(4)
8143 .k(8)
8144 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008145 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008146 }
8147
8148 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
8149 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008150 for (uint32_t n = 1; n <= 4; n++) {
8151 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008152 GemmMicrokernelTester()
8153 .mr(4)
8154 .nr(4)
8155 .kr(2)
8156 .sr(1)
8157 .m(m)
8158 .n(n)
8159 .k(8)
8160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008161 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008162 }
8163 }
8164 }
8165
8166 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
8167 TEST_REQUIRES_X86_SSE41;
8168 for (uint32_t m = 1; m <= 4; m++) {
8169 GemmMicrokernelTester()
8170 .mr(4)
8171 .nr(4)
8172 .kr(2)
8173 .sr(1)
8174 .m(m)
8175 .n(4)
8176 .k(8)
8177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008178 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008179 }
8180 }
8181
8182 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
8183 TEST_REQUIRES_X86_SSE41;
8184 for (uint32_t n = 1; n <= 4; n++) {
8185 GemmMicrokernelTester()
8186 .mr(4)
8187 .nr(4)
8188 .kr(2)
8189 .sr(1)
8190 .m(4)
8191 .n(n)
8192 .k(8)
8193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008194 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008195 }
8196 }
8197
8198 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
8199 TEST_REQUIRES_X86_SSE41;
8200 for (size_t k = 1; k < 8; k++) {
8201 GemmMicrokernelTester()
8202 .mr(4)
8203 .nr(4)
8204 .kr(2)
8205 .sr(1)
8206 .m(4)
8207 .n(4)
8208 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008210 }
8211 }
8212
8213 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_strided_a) {
8214 TEST_REQUIRES_X86_SSE41;
8215 for (size_t k = 1; k < 8; k++) {
8216 GemmMicrokernelTester()
8217 .mr(4)
8218 .nr(4)
8219 .kr(2)
8220 .sr(1)
8221 .m(4)
8222 .n(4)
8223 .k(k)
8224 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008225 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008226 }
8227 }
8228
8229 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
8230 TEST_REQUIRES_X86_SSE41;
8231 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008232 for (uint32_t n = 1; n <= 4; n++) {
8233 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008234 GemmMicrokernelTester()
8235 .mr(4)
8236 .nr(4)
8237 .kr(2)
8238 .sr(1)
8239 .m(m)
8240 .n(n)
8241 .k(k)
8242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008243 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008244 }
8245 }
8246 }
8247 }
8248
8249 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
8250 TEST_REQUIRES_X86_SSE41;
8251 for (size_t k = 9; k < 16; k++) {
8252 GemmMicrokernelTester()
8253 .mr(4)
8254 .nr(4)
8255 .kr(2)
8256 .sr(1)
8257 .m(4)
8258 .n(4)
8259 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008260 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008261 }
8262 }
8263
8264 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_strided_a) {
8265 TEST_REQUIRES_X86_SSE41;
8266 for (size_t k = 9; k < 16; k++) {
8267 GemmMicrokernelTester()
8268 .mr(4)
8269 .nr(4)
8270 .kr(2)
8271 .sr(1)
8272 .m(4)
8273 .n(4)
8274 .k(k)
8275 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008276 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008277 }
8278 }
8279
8280 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
8281 TEST_REQUIRES_X86_SSE41;
8282 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008283 for (uint32_t n = 1; n <= 4; n++) {
8284 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008285 GemmMicrokernelTester()
8286 .mr(4)
8287 .nr(4)
8288 .kr(2)
8289 .sr(1)
8290 .m(m)
8291 .n(n)
8292 .k(k)
8293 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008294 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008295 }
8296 }
8297 }
8298 }
8299
8300 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
8301 TEST_REQUIRES_X86_SSE41;
8302 for (size_t k = 16; k <= 80; k += 8) {
8303 GemmMicrokernelTester()
8304 .mr(4)
8305 .nr(4)
8306 .kr(2)
8307 .sr(1)
8308 .m(4)
8309 .n(4)
8310 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008311 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008312 }
8313 }
8314
8315 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_strided_a) {
8316 TEST_REQUIRES_X86_SSE41;
8317 for (size_t k = 16; k <= 80; k += 8) {
8318 GemmMicrokernelTester()
8319 .mr(4)
8320 .nr(4)
8321 .kr(2)
8322 .sr(1)
8323 .m(4)
8324 .n(4)
8325 .k(k)
8326 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008327 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008328 }
8329 }
8330
8331 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
8332 TEST_REQUIRES_X86_SSE41;
8333 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008334 for (uint32_t n = 1; n <= 4; n++) {
8335 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008336 GemmMicrokernelTester()
8337 .mr(4)
8338 .nr(4)
8339 .kr(2)
8340 .sr(1)
8341 .m(m)
8342 .n(n)
8343 .k(k)
8344 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008345 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008346 }
8347 }
8348 }
8349 }
8350
8351 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
8352 TEST_REQUIRES_X86_SSE41;
8353 for (uint32_t n = 5; n < 8; n++) {
8354 for (size_t k = 1; k <= 40; k += 9) {
8355 GemmMicrokernelTester()
8356 .mr(4)
8357 .nr(4)
8358 .kr(2)
8359 .sr(1)
8360 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008361 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008362 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008363 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008364 }
8365 }
8366 }
8367
8368 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
8369 TEST_REQUIRES_X86_SSE41;
8370 for (uint32_t n = 5; n < 8; n++) {
8371 for (size_t k = 1; k <= 40; k += 9) {
8372 GemmMicrokernelTester()
8373 .mr(4)
8374 .nr(4)
8375 .kr(2)
8376 .sr(1)
8377 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008378 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008379 .k(k)
8380 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008381 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008382 }
8383 }
8384 }
8385
8386 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_a) {
8387 TEST_REQUIRES_X86_SSE41;
8388 for (uint32_t n = 5; n < 8; n++) {
8389 for (size_t k = 1; k <= 40; k += 9) {
8390 GemmMicrokernelTester()
8391 .mr(4)
8392 .nr(4)
8393 .kr(2)
8394 .sr(1)
8395 .m(4)
8396 .n(n)
8397 .k(k)
8398 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008399 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008400 }
8401 }
8402 }
8403
8404 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
8405 TEST_REQUIRES_X86_SSE41;
8406 for (uint32_t n = 5; n < 8; n++) {
8407 for (size_t k = 1; k <= 40; k += 9) {
8408 for (uint32_t m = 1; m <= 4; m++) {
8409 GemmMicrokernelTester()
8410 .mr(4)
8411 .nr(4)
8412 .kr(2)
8413 .sr(1)
8414 .m(m)
8415 .n(n)
8416 .k(k)
8417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008418 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008419 }
8420 }
8421 }
8422 }
8423
8424 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
8425 TEST_REQUIRES_X86_SSE41;
8426 for (uint32_t n = 8; n <= 12; n += 4) {
8427 for (size_t k = 1; k <= 40; k += 9) {
8428 GemmMicrokernelTester()
8429 .mr(4)
8430 .nr(4)
8431 .kr(2)
8432 .sr(1)
8433 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008434 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008435 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008436 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008437 }
8438 }
8439 }
8440
8441 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
8442 TEST_REQUIRES_X86_SSE41;
8443 for (uint32_t n = 8; n <= 12; n += 4) {
8444 for (size_t k = 1; k <= 40; k += 9) {
8445 GemmMicrokernelTester()
8446 .mr(4)
8447 .nr(4)
8448 .kr(2)
8449 .sr(1)
8450 .m(4)
8451 .n(n)
8452 .k(k)
8453 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008454 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008455 }
8456 }
8457 }
8458
8459 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_a) {
8460 TEST_REQUIRES_X86_SSE41;
8461 for (uint32_t n = 8; n <= 12; n += 4) {
8462 for (size_t k = 1; k <= 40; k += 9) {
8463 GemmMicrokernelTester()
8464 .mr(4)
8465 .nr(4)
8466 .kr(2)
8467 .sr(1)
8468 .m(4)
8469 .n(n)
8470 .k(k)
8471 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008472 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008473 }
8474 }
8475 }
8476
8477 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
8478 TEST_REQUIRES_X86_SSE41;
8479 for (uint32_t n = 8; n <= 12; n += 4) {
8480 for (size_t k = 1; k <= 40; k += 9) {
8481 for (uint32_t m = 1; m <= 4; m++) {
8482 GemmMicrokernelTester()
8483 .mr(4)
8484 .nr(4)
8485 .kr(2)
8486 .sr(1)
8487 .m(m)
8488 .n(n)
8489 .k(k)
8490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008491 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008492 }
8493 }
8494 }
8495 }
8496
8497 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
8498 TEST_REQUIRES_X86_SSE41;
8499 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008500 for (uint32_t n = 1; n <= 4; n++) {
8501 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008502 GemmMicrokernelTester()
8503 .mr(4)
8504 .nr(4)
8505 .kr(2)
8506 .sr(1)
8507 .m(m)
8508 .n(n)
8509 .k(k)
8510 .cm_stride(7)
8511 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008512 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008513 }
8514 }
8515 }
8516 }
8517
8518 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
8519 TEST_REQUIRES_X86_SSE41;
8520 GemmMicrokernelTester()
8521 .mr(4)
8522 .nr(4)
8523 .kr(2)
8524 .sr(1)
8525 .m(4)
8526 .n(4)
8527 .k(8)
8528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008529 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008530 }
8531
8532 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
8533 TEST_REQUIRES_X86_SSE41;
8534 GemmMicrokernelTester()
8535 .mr(4)
8536 .nr(4)
8537 .kr(2)
8538 .sr(1)
8539 .m(4)
8540 .n(4)
8541 .k(8)
8542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008544 }
8545
8546 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
8547 TEST_REQUIRES_X86_SSE41;
8548 GemmMicrokernelTester()
8549 .mr(4)
8550 .nr(4)
8551 .kr(2)
8552 .sr(1)
8553 .m(4)
8554 .n(4)
8555 .k(8)
8556 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008557 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008558 }
8559
8560 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_a_zero_point) {
8561 TEST_REQUIRES_X86_SSE41;
8562 for (size_t k = 1; k <= 40; k += 9) {
8563 GemmMicrokernelTester()
8564 .mr(4)
8565 .nr(4)
8566 .kr(2)
8567 .sr(1)
8568 .m(4)
8569 .n(4)
8570 .k(k)
8571 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008572 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008573 }
8574 }
8575
8576 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_b_zero_point) {
8577 TEST_REQUIRES_X86_SSE41;
8578 for (size_t k = 1; k <= 40; k += 9) {
8579 GemmMicrokernelTester()
8580 .mr(4)
8581 .nr(4)
8582 .kr(2)
8583 .sr(1)
8584 .m(4)
8585 .n(4)
8586 .k(k)
8587 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008588 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008589 }
8590 }
8591
8592 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_zero_point) {
8593 TEST_REQUIRES_X86_SSE41;
8594 for (size_t k = 1; k <= 40; k += 9) {
8595 GemmMicrokernelTester()
8596 .mr(4)
8597 .nr(4)
8598 .kr(2)
8599 .sr(1)
8600 .m(4)
8601 .n(4)
8602 .k(k)
8603 .a_zero_point(0)
8604 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008605 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008606 }
8607 }
8608#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8609
8610
8611#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8612 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
8613 TEST_REQUIRES_X86_AVX;
8614 GemmMicrokernelTester()
8615 .mr(3)
8616 .nr(4)
8617 .kr(2)
8618 .sr(1)
8619 .m(3)
8620 .n(4)
8621 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008622 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008623 }
8624
8625 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
8626 TEST_REQUIRES_X86_AVX;
8627 GemmMicrokernelTester()
8628 .mr(3)
8629 .nr(4)
8630 .kr(2)
8631 .sr(1)
8632 .m(3)
8633 .n(4)
8634 .k(8)
8635 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008636 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008637 }
8638
8639 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_strided_a) {
8640 TEST_REQUIRES_X86_AVX;
8641 GemmMicrokernelTester()
8642 .mr(3)
8643 .nr(4)
8644 .kr(2)
8645 .sr(1)
8646 .m(3)
8647 .n(4)
8648 .k(8)
8649 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008650 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008651 }
8652
8653 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
8654 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008655 for (uint32_t n = 1; n <= 4; n++) {
8656 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008657 GemmMicrokernelTester()
8658 .mr(3)
8659 .nr(4)
8660 .kr(2)
8661 .sr(1)
8662 .m(m)
8663 .n(n)
8664 .k(8)
8665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008666 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008667 }
8668 }
8669 }
8670
8671 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
8672 TEST_REQUIRES_X86_AVX;
8673 for (uint32_t m = 1; m <= 3; m++) {
8674 GemmMicrokernelTester()
8675 .mr(3)
8676 .nr(4)
8677 .kr(2)
8678 .sr(1)
8679 .m(m)
8680 .n(4)
8681 .k(8)
8682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008683 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008684 }
8685 }
8686
8687 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
8688 TEST_REQUIRES_X86_AVX;
8689 for (uint32_t n = 1; n <= 4; n++) {
8690 GemmMicrokernelTester()
8691 .mr(3)
8692 .nr(4)
8693 .kr(2)
8694 .sr(1)
8695 .m(3)
8696 .n(n)
8697 .k(8)
8698 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008699 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008700 }
8701 }
8702
8703 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
8704 TEST_REQUIRES_X86_AVX;
8705 for (size_t k = 1; k < 8; k++) {
8706 GemmMicrokernelTester()
8707 .mr(3)
8708 .nr(4)
8709 .kr(2)
8710 .sr(1)
8711 .m(3)
8712 .n(4)
8713 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008714 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008715 }
8716 }
8717
8718 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_strided_a) {
8719 TEST_REQUIRES_X86_AVX;
8720 for (size_t k = 1; k < 8; k++) {
8721 GemmMicrokernelTester()
8722 .mr(3)
8723 .nr(4)
8724 .kr(2)
8725 .sr(1)
8726 .m(3)
8727 .n(4)
8728 .k(k)
8729 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008730 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008731 }
8732 }
8733
8734 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
8735 TEST_REQUIRES_X86_AVX;
8736 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008737 for (uint32_t n = 1; n <= 4; n++) {
8738 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008739 GemmMicrokernelTester()
8740 .mr(3)
8741 .nr(4)
8742 .kr(2)
8743 .sr(1)
8744 .m(m)
8745 .n(n)
8746 .k(k)
8747 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008748 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008749 }
8750 }
8751 }
8752 }
8753
8754 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
8755 TEST_REQUIRES_X86_AVX;
8756 for (size_t k = 9; k < 16; k++) {
8757 GemmMicrokernelTester()
8758 .mr(3)
8759 .nr(4)
8760 .kr(2)
8761 .sr(1)
8762 .m(3)
8763 .n(4)
8764 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008765 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008766 }
8767 }
8768
8769 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_strided_a) {
8770 TEST_REQUIRES_X86_AVX;
8771 for (size_t k = 9; k < 16; k++) {
8772 GemmMicrokernelTester()
8773 .mr(3)
8774 .nr(4)
8775 .kr(2)
8776 .sr(1)
8777 .m(3)
8778 .n(4)
8779 .k(k)
8780 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008782 }
8783 }
8784
8785 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
8786 TEST_REQUIRES_X86_AVX;
8787 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008788 for (uint32_t n = 1; n <= 4; n++) {
8789 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008790 GemmMicrokernelTester()
8791 .mr(3)
8792 .nr(4)
8793 .kr(2)
8794 .sr(1)
8795 .m(m)
8796 .n(n)
8797 .k(k)
8798 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008799 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008800 }
8801 }
8802 }
8803 }
8804
8805 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
8806 TEST_REQUIRES_X86_AVX;
8807 for (size_t k = 16; k <= 80; k += 8) {
8808 GemmMicrokernelTester()
8809 .mr(3)
8810 .nr(4)
8811 .kr(2)
8812 .sr(1)
8813 .m(3)
8814 .n(4)
8815 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008816 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008817 }
8818 }
8819
8820 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_strided_a) {
8821 TEST_REQUIRES_X86_AVX;
8822 for (size_t k = 16; k <= 80; k += 8) {
8823 GemmMicrokernelTester()
8824 .mr(3)
8825 .nr(4)
8826 .kr(2)
8827 .sr(1)
8828 .m(3)
8829 .n(4)
8830 .k(k)
8831 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008832 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008833 }
8834 }
8835
8836 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
8837 TEST_REQUIRES_X86_AVX;
8838 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008839 for (uint32_t n = 1; n <= 4; n++) {
8840 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008841 GemmMicrokernelTester()
8842 .mr(3)
8843 .nr(4)
8844 .kr(2)
8845 .sr(1)
8846 .m(m)
8847 .n(n)
8848 .k(k)
8849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008850 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008851 }
8852 }
8853 }
8854 }
8855
8856 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
8857 TEST_REQUIRES_X86_AVX;
8858 for (uint32_t n = 5; n < 8; n++) {
8859 for (size_t k = 1; k <= 40; k += 9) {
8860 GemmMicrokernelTester()
8861 .mr(3)
8862 .nr(4)
8863 .kr(2)
8864 .sr(1)
8865 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008866 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008867 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008868 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008869 }
8870 }
8871 }
8872
8873 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
8874 TEST_REQUIRES_X86_AVX;
8875 for (uint32_t n = 5; n < 8; n++) {
8876 for (size_t k = 1; k <= 40; k += 9) {
8877 GemmMicrokernelTester()
8878 .mr(3)
8879 .nr(4)
8880 .kr(2)
8881 .sr(1)
8882 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008883 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008884 .k(k)
8885 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008886 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008887 }
8888 }
8889 }
8890
8891 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_a) {
8892 TEST_REQUIRES_X86_AVX;
8893 for (uint32_t n = 5; n < 8; n++) {
8894 for (size_t k = 1; k <= 40; k += 9) {
8895 GemmMicrokernelTester()
8896 .mr(3)
8897 .nr(4)
8898 .kr(2)
8899 .sr(1)
8900 .m(3)
8901 .n(n)
8902 .k(k)
8903 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008904 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008905 }
8906 }
8907 }
8908
8909 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
8910 TEST_REQUIRES_X86_AVX;
8911 for (uint32_t n = 5; n < 8; n++) {
8912 for (size_t k = 1; k <= 40; k += 9) {
8913 for (uint32_t m = 1; m <= 3; m++) {
8914 GemmMicrokernelTester()
8915 .mr(3)
8916 .nr(4)
8917 .kr(2)
8918 .sr(1)
8919 .m(m)
8920 .n(n)
8921 .k(k)
8922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008923 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008924 }
8925 }
8926 }
8927 }
8928
8929 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
8930 TEST_REQUIRES_X86_AVX;
8931 for (uint32_t n = 8; n <= 12; n += 4) {
8932 for (size_t k = 1; k <= 40; k += 9) {
8933 GemmMicrokernelTester()
8934 .mr(3)
8935 .nr(4)
8936 .kr(2)
8937 .sr(1)
8938 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008939 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008940 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008941 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008942 }
8943 }
8944 }
8945
8946 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
8947 TEST_REQUIRES_X86_AVX;
8948 for (uint32_t n = 8; n <= 12; n += 4) {
8949 for (size_t k = 1; k <= 40; k += 9) {
8950 GemmMicrokernelTester()
8951 .mr(3)
8952 .nr(4)
8953 .kr(2)
8954 .sr(1)
8955 .m(3)
8956 .n(n)
8957 .k(k)
8958 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008959 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008960 }
8961 }
8962 }
8963
8964 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_a) {
8965 TEST_REQUIRES_X86_AVX;
8966 for (uint32_t n = 8; n <= 12; n += 4) {
8967 for (size_t k = 1; k <= 40; k += 9) {
8968 GemmMicrokernelTester()
8969 .mr(3)
8970 .nr(4)
8971 .kr(2)
8972 .sr(1)
8973 .m(3)
8974 .n(n)
8975 .k(k)
8976 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008977 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008978 }
8979 }
8980 }
8981
8982 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
8983 TEST_REQUIRES_X86_AVX;
8984 for (uint32_t n = 8; n <= 12; n += 4) {
8985 for (size_t k = 1; k <= 40; k += 9) {
8986 for (uint32_t m = 1; m <= 3; m++) {
8987 GemmMicrokernelTester()
8988 .mr(3)
8989 .nr(4)
8990 .kr(2)
8991 .sr(1)
8992 .m(m)
8993 .n(n)
8994 .k(k)
8995 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008996 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008997 }
8998 }
8999 }
9000 }
9001
9002 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
9003 TEST_REQUIRES_X86_AVX;
9004 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009005 for (uint32_t n = 1; n <= 4; n++) {
9006 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009007 GemmMicrokernelTester()
9008 .mr(3)
9009 .nr(4)
9010 .kr(2)
9011 .sr(1)
9012 .m(m)
9013 .n(n)
9014 .k(k)
9015 .cm_stride(7)
9016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009018 }
9019 }
9020 }
9021 }
9022
9023 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
9024 TEST_REQUIRES_X86_AVX;
9025 GemmMicrokernelTester()
9026 .mr(3)
9027 .nr(4)
9028 .kr(2)
9029 .sr(1)
9030 .m(3)
9031 .n(4)
9032 .k(8)
9033 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009034 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009035 }
9036
9037 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
9038 TEST_REQUIRES_X86_AVX;
9039 GemmMicrokernelTester()
9040 .mr(3)
9041 .nr(4)
9042 .kr(2)
9043 .sr(1)
9044 .m(3)
9045 .n(4)
9046 .k(8)
9047 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009049 }
9050
9051 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
9052 TEST_REQUIRES_X86_AVX;
9053 GemmMicrokernelTester()
9054 .mr(3)
9055 .nr(4)
9056 .kr(2)
9057 .sr(1)
9058 .m(3)
9059 .n(4)
9060 .k(8)
9061 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009062 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009063 }
9064
9065 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_a_zero_point) {
9066 TEST_REQUIRES_X86_AVX;
9067 for (size_t k = 1; k <= 40; k += 9) {
9068 GemmMicrokernelTester()
9069 .mr(3)
9070 .nr(4)
9071 .kr(2)
9072 .sr(1)
9073 .m(3)
9074 .n(4)
9075 .k(k)
9076 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009077 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009078 }
9079 }
9080
9081 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_b_zero_point) {
9082 TEST_REQUIRES_X86_AVX;
9083 for (size_t k = 1; k <= 40; k += 9) {
9084 GemmMicrokernelTester()
9085 .mr(3)
9086 .nr(4)
9087 .kr(2)
9088 .sr(1)
9089 .m(3)
9090 .n(4)
9091 .k(k)
9092 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009093 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009094 }
9095 }
9096
9097 TEST(QU8_GEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_zero_point) {
9098 TEST_REQUIRES_X86_AVX;
9099 for (size_t k = 1; k <= 40; k += 9) {
9100 GemmMicrokernelTester()
9101 .mr(3)
9102 .nr(4)
9103 .kr(2)
9104 .sr(1)
9105 .m(3)
9106 .n(4)
9107 .k(k)
9108 .a_zero_point(0)
9109 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009110 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009111 }
9112 }
9113#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9114
9115
9116#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9117 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
9118 TEST_REQUIRES_X86_XOP;
9119 GemmMicrokernelTester()
9120 .mr(1)
9121 .nr(4)
9122 .kr(2)
9123 .sr(1)
9124 .m(1)
9125 .n(4)
9126 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009127 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009128 }
9129
9130 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
9131 TEST_REQUIRES_X86_XOP;
9132 GemmMicrokernelTester()
9133 .mr(1)
9134 .nr(4)
9135 .kr(2)
9136 .sr(1)
9137 .m(1)
9138 .n(4)
9139 .k(8)
9140 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009141 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009142 }
9143
9144 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_strided_a) {
9145 TEST_REQUIRES_X86_XOP;
9146 GemmMicrokernelTester()
9147 .mr(1)
9148 .nr(4)
9149 .kr(2)
9150 .sr(1)
9151 .m(1)
9152 .n(4)
9153 .k(8)
9154 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009155 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009156 }
9157
9158 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
9159 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009160 for (uint32_t n = 1; n <= 4; n++) {
9161 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009162 GemmMicrokernelTester()
9163 .mr(1)
9164 .nr(4)
9165 .kr(2)
9166 .sr(1)
9167 .m(m)
9168 .n(n)
9169 .k(8)
9170 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009171 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009172 }
9173 }
9174 }
9175
9176 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
9177 TEST_REQUIRES_X86_XOP;
9178 for (uint32_t m = 1; m <= 1; m++) {
9179 GemmMicrokernelTester()
9180 .mr(1)
9181 .nr(4)
9182 .kr(2)
9183 .sr(1)
9184 .m(m)
9185 .n(4)
9186 .k(8)
9187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009188 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009189 }
9190 }
9191
9192 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
9193 TEST_REQUIRES_X86_XOP;
9194 for (uint32_t n = 1; n <= 4; n++) {
9195 GemmMicrokernelTester()
9196 .mr(1)
9197 .nr(4)
9198 .kr(2)
9199 .sr(1)
9200 .m(1)
9201 .n(n)
9202 .k(8)
9203 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009204 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009205 }
9206 }
9207
9208 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
9209 TEST_REQUIRES_X86_XOP;
9210 for (size_t k = 1; k < 8; k++) {
9211 GemmMicrokernelTester()
9212 .mr(1)
9213 .nr(4)
9214 .kr(2)
9215 .sr(1)
9216 .m(1)
9217 .n(4)
9218 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009219 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009220 }
9221 }
9222
9223 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_strided_a) {
9224 TEST_REQUIRES_X86_XOP;
9225 for (size_t k = 1; k < 8; k++) {
9226 GemmMicrokernelTester()
9227 .mr(1)
9228 .nr(4)
9229 .kr(2)
9230 .sr(1)
9231 .m(1)
9232 .n(4)
9233 .k(k)
9234 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009235 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009236 }
9237 }
9238
9239 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
9240 TEST_REQUIRES_X86_XOP;
9241 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009242 for (uint32_t n = 1; n <= 4; n++) {
9243 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009244 GemmMicrokernelTester()
9245 .mr(1)
9246 .nr(4)
9247 .kr(2)
9248 .sr(1)
9249 .m(m)
9250 .n(n)
9251 .k(k)
9252 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009253 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009254 }
9255 }
9256 }
9257 }
9258
9259 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
9260 TEST_REQUIRES_X86_XOP;
9261 for (size_t k = 9; k < 16; k++) {
9262 GemmMicrokernelTester()
9263 .mr(1)
9264 .nr(4)
9265 .kr(2)
9266 .sr(1)
9267 .m(1)
9268 .n(4)
9269 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009270 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009271 }
9272 }
9273
9274 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_strided_a) {
9275 TEST_REQUIRES_X86_XOP;
9276 for (size_t k = 9; k < 16; k++) {
9277 GemmMicrokernelTester()
9278 .mr(1)
9279 .nr(4)
9280 .kr(2)
9281 .sr(1)
9282 .m(1)
9283 .n(4)
9284 .k(k)
9285 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009286 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009287 }
9288 }
9289
9290 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
9291 TEST_REQUIRES_X86_XOP;
9292 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009293 for (uint32_t n = 1; n <= 4; n++) {
9294 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009295 GemmMicrokernelTester()
9296 .mr(1)
9297 .nr(4)
9298 .kr(2)
9299 .sr(1)
9300 .m(m)
9301 .n(n)
9302 .k(k)
9303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009304 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009305 }
9306 }
9307 }
9308 }
9309
9310 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
9311 TEST_REQUIRES_X86_XOP;
9312 for (size_t k = 16; k <= 80; k += 8) {
9313 GemmMicrokernelTester()
9314 .mr(1)
9315 .nr(4)
9316 .kr(2)
9317 .sr(1)
9318 .m(1)
9319 .n(4)
9320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009321 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009322 }
9323 }
9324
9325 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_strided_a) {
9326 TEST_REQUIRES_X86_XOP;
9327 for (size_t k = 16; k <= 80; k += 8) {
9328 GemmMicrokernelTester()
9329 .mr(1)
9330 .nr(4)
9331 .kr(2)
9332 .sr(1)
9333 .m(1)
9334 .n(4)
9335 .k(k)
9336 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009337 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009338 }
9339 }
9340
9341 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
9342 TEST_REQUIRES_X86_XOP;
9343 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009344 for (uint32_t n = 1; n <= 4; n++) {
9345 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009346 GemmMicrokernelTester()
9347 .mr(1)
9348 .nr(4)
9349 .kr(2)
9350 .sr(1)
9351 .m(m)
9352 .n(n)
9353 .k(k)
9354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009355 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009356 }
9357 }
9358 }
9359 }
9360
9361 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
9362 TEST_REQUIRES_X86_XOP;
9363 for (uint32_t n = 5; n < 8; n++) {
9364 for (size_t k = 1; k <= 40; k += 9) {
9365 GemmMicrokernelTester()
9366 .mr(1)
9367 .nr(4)
9368 .kr(2)
9369 .sr(1)
9370 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009371 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009372 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009373 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009374 }
9375 }
9376 }
9377
9378 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
9379 TEST_REQUIRES_X86_XOP;
9380 for (uint32_t n = 5; n < 8; n++) {
9381 for (size_t k = 1; k <= 40; k += 9) {
9382 GemmMicrokernelTester()
9383 .mr(1)
9384 .nr(4)
9385 .kr(2)
9386 .sr(1)
9387 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009388 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009389 .k(k)
9390 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009391 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009392 }
9393 }
9394 }
9395
9396 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_a) {
9397 TEST_REQUIRES_X86_XOP;
9398 for (uint32_t n = 5; n < 8; n++) {
9399 for (size_t k = 1; k <= 40; k += 9) {
9400 GemmMicrokernelTester()
9401 .mr(1)
9402 .nr(4)
9403 .kr(2)
9404 .sr(1)
9405 .m(1)
9406 .n(n)
9407 .k(k)
9408 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009409 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009410 }
9411 }
9412 }
9413
9414 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
9415 TEST_REQUIRES_X86_XOP;
9416 for (uint32_t n = 5; n < 8; n++) {
9417 for (size_t k = 1; k <= 40; k += 9) {
9418 for (uint32_t m = 1; m <= 1; m++) {
9419 GemmMicrokernelTester()
9420 .mr(1)
9421 .nr(4)
9422 .kr(2)
9423 .sr(1)
9424 .m(m)
9425 .n(n)
9426 .k(k)
9427 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009428 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009429 }
9430 }
9431 }
9432 }
9433
9434 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
9435 TEST_REQUIRES_X86_XOP;
9436 for (uint32_t n = 8; n <= 12; n += 4) {
9437 for (size_t k = 1; k <= 40; k += 9) {
9438 GemmMicrokernelTester()
9439 .mr(1)
9440 .nr(4)
9441 .kr(2)
9442 .sr(1)
9443 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009444 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009445 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009446 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009447 }
9448 }
9449 }
9450
9451 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
9452 TEST_REQUIRES_X86_XOP;
9453 for (uint32_t n = 8; n <= 12; n += 4) {
9454 for (size_t k = 1; k <= 40; k += 9) {
9455 GemmMicrokernelTester()
9456 .mr(1)
9457 .nr(4)
9458 .kr(2)
9459 .sr(1)
9460 .m(1)
9461 .n(n)
9462 .k(k)
9463 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009464 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009465 }
9466 }
9467 }
9468
9469 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_a) {
9470 TEST_REQUIRES_X86_XOP;
9471 for (uint32_t n = 8; n <= 12; n += 4) {
9472 for (size_t k = 1; k <= 40; k += 9) {
9473 GemmMicrokernelTester()
9474 .mr(1)
9475 .nr(4)
9476 .kr(2)
9477 .sr(1)
9478 .m(1)
9479 .n(n)
9480 .k(k)
9481 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009482 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009483 }
9484 }
9485 }
9486
9487 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
9488 TEST_REQUIRES_X86_XOP;
9489 for (uint32_t n = 8; n <= 12; n += 4) {
9490 for (size_t k = 1; k <= 40; k += 9) {
9491 for (uint32_t m = 1; m <= 1; m++) {
9492 GemmMicrokernelTester()
9493 .mr(1)
9494 .nr(4)
9495 .kr(2)
9496 .sr(1)
9497 .m(m)
9498 .n(n)
9499 .k(k)
9500 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009501 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009502 }
9503 }
9504 }
9505 }
9506
9507 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
9508 TEST_REQUIRES_X86_XOP;
9509 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009510 for (uint32_t n = 1; n <= 4; n++) {
9511 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009512 GemmMicrokernelTester()
9513 .mr(1)
9514 .nr(4)
9515 .kr(2)
9516 .sr(1)
9517 .m(m)
9518 .n(n)
9519 .k(k)
9520 .cm_stride(7)
9521 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009522 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009523 }
9524 }
9525 }
9526 }
9527
9528 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
9529 TEST_REQUIRES_X86_XOP;
9530 GemmMicrokernelTester()
9531 .mr(1)
9532 .nr(4)
9533 .kr(2)
9534 .sr(1)
9535 .m(1)
9536 .n(4)
9537 .k(8)
9538 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009539 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009540 }
9541
9542 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
9543 TEST_REQUIRES_X86_XOP;
9544 GemmMicrokernelTester()
9545 .mr(1)
9546 .nr(4)
9547 .kr(2)
9548 .sr(1)
9549 .m(1)
9550 .n(4)
9551 .k(8)
9552 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009553 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009554 }
9555
9556 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
9557 TEST_REQUIRES_X86_XOP;
9558 GemmMicrokernelTester()
9559 .mr(1)
9560 .nr(4)
9561 .kr(2)
9562 .sr(1)
9563 .m(1)
9564 .n(4)
9565 .k(8)
9566 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009567 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009568 }
9569
9570 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_a_zero_point) {
9571 TEST_REQUIRES_X86_XOP;
9572 for (size_t k = 1; k <= 40; k += 9) {
9573 GemmMicrokernelTester()
9574 .mr(1)
9575 .nr(4)
9576 .kr(2)
9577 .sr(1)
9578 .m(1)
9579 .n(4)
9580 .k(k)
9581 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009582 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009583 }
9584 }
9585
9586 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_b_zero_point) {
9587 TEST_REQUIRES_X86_XOP;
9588 for (size_t k = 1; k <= 40; k += 9) {
9589 GemmMicrokernelTester()
9590 .mr(1)
9591 .nr(4)
9592 .kr(2)
9593 .sr(1)
9594 .m(1)
9595 .n(4)
9596 .k(k)
9597 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009598 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009599 }
9600 }
9601
9602 TEST(QU8_GEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_zero_point) {
9603 TEST_REQUIRES_X86_XOP;
9604 for (size_t k = 1; k <= 40; k += 9) {
9605 GemmMicrokernelTester()
9606 .mr(1)
9607 .nr(4)
9608 .kr(2)
9609 .sr(1)
9610 .m(1)
9611 .n(4)
9612 .k(k)
9613 .a_zero_point(0)
9614 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009615 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009616 }
9617 }
9618#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9619
9620
9621#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9622 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
9623 TEST_REQUIRES_X86_XOP;
9624 GemmMicrokernelTester()
9625 .mr(2)
9626 .nr(4)
9627 .kr(2)
9628 .sr(1)
9629 .m(2)
9630 .n(4)
9631 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009632 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009633 }
9634
9635 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
9636 TEST_REQUIRES_X86_XOP;
9637 GemmMicrokernelTester()
9638 .mr(2)
9639 .nr(4)
9640 .kr(2)
9641 .sr(1)
9642 .m(2)
9643 .n(4)
9644 .k(8)
9645 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009646 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009647 }
9648
9649 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_strided_a) {
9650 TEST_REQUIRES_X86_XOP;
9651 GemmMicrokernelTester()
9652 .mr(2)
9653 .nr(4)
9654 .kr(2)
9655 .sr(1)
9656 .m(2)
9657 .n(4)
9658 .k(8)
9659 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009660 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009661 }
9662
9663 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
9664 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009665 for (uint32_t n = 1; n <= 4; n++) {
9666 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009667 GemmMicrokernelTester()
9668 .mr(2)
9669 .nr(4)
9670 .kr(2)
9671 .sr(1)
9672 .m(m)
9673 .n(n)
9674 .k(8)
9675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009676 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009677 }
9678 }
9679 }
9680
9681 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
9682 TEST_REQUIRES_X86_XOP;
9683 for (uint32_t m = 1; m <= 2; m++) {
9684 GemmMicrokernelTester()
9685 .mr(2)
9686 .nr(4)
9687 .kr(2)
9688 .sr(1)
9689 .m(m)
9690 .n(4)
9691 .k(8)
9692 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009693 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009694 }
9695 }
9696
9697 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
9698 TEST_REQUIRES_X86_XOP;
9699 for (uint32_t n = 1; n <= 4; n++) {
9700 GemmMicrokernelTester()
9701 .mr(2)
9702 .nr(4)
9703 .kr(2)
9704 .sr(1)
9705 .m(2)
9706 .n(n)
9707 .k(8)
9708 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009709 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009710 }
9711 }
9712
9713 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
9714 TEST_REQUIRES_X86_XOP;
9715 for (size_t k = 1; k < 8; k++) {
9716 GemmMicrokernelTester()
9717 .mr(2)
9718 .nr(4)
9719 .kr(2)
9720 .sr(1)
9721 .m(2)
9722 .n(4)
9723 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009724 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009725 }
9726 }
9727
9728 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_strided_a) {
9729 TEST_REQUIRES_X86_XOP;
9730 for (size_t k = 1; k < 8; k++) {
9731 GemmMicrokernelTester()
9732 .mr(2)
9733 .nr(4)
9734 .kr(2)
9735 .sr(1)
9736 .m(2)
9737 .n(4)
9738 .k(k)
9739 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009740 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009741 }
9742 }
9743
9744 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
9745 TEST_REQUIRES_X86_XOP;
9746 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009747 for (uint32_t n = 1; n <= 4; n++) {
9748 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009749 GemmMicrokernelTester()
9750 .mr(2)
9751 .nr(4)
9752 .kr(2)
9753 .sr(1)
9754 .m(m)
9755 .n(n)
9756 .k(k)
9757 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009758 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009759 }
9760 }
9761 }
9762 }
9763
9764 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
9765 TEST_REQUIRES_X86_XOP;
9766 for (size_t k = 9; k < 16; k++) {
9767 GemmMicrokernelTester()
9768 .mr(2)
9769 .nr(4)
9770 .kr(2)
9771 .sr(1)
9772 .m(2)
9773 .n(4)
9774 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009775 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009776 }
9777 }
9778
9779 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_strided_a) {
9780 TEST_REQUIRES_X86_XOP;
9781 for (size_t k = 9; k < 16; k++) {
9782 GemmMicrokernelTester()
9783 .mr(2)
9784 .nr(4)
9785 .kr(2)
9786 .sr(1)
9787 .m(2)
9788 .n(4)
9789 .k(k)
9790 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009791 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009792 }
9793 }
9794
9795 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
9796 TEST_REQUIRES_X86_XOP;
9797 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009798 for (uint32_t n = 1; n <= 4; n++) {
9799 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009800 GemmMicrokernelTester()
9801 .mr(2)
9802 .nr(4)
9803 .kr(2)
9804 .sr(1)
9805 .m(m)
9806 .n(n)
9807 .k(k)
9808 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009809 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009810 }
9811 }
9812 }
9813 }
9814
9815 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
9816 TEST_REQUIRES_X86_XOP;
9817 for (size_t k = 16; k <= 80; k += 8) {
9818 GemmMicrokernelTester()
9819 .mr(2)
9820 .nr(4)
9821 .kr(2)
9822 .sr(1)
9823 .m(2)
9824 .n(4)
9825 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009826 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009827 }
9828 }
9829
9830 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_strided_a) {
9831 TEST_REQUIRES_X86_XOP;
9832 for (size_t k = 16; k <= 80; k += 8) {
9833 GemmMicrokernelTester()
9834 .mr(2)
9835 .nr(4)
9836 .kr(2)
9837 .sr(1)
9838 .m(2)
9839 .n(4)
9840 .k(k)
9841 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009842 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009843 }
9844 }
9845
9846 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
9847 TEST_REQUIRES_X86_XOP;
9848 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009849 for (uint32_t n = 1; n <= 4; n++) {
9850 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009851 GemmMicrokernelTester()
9852 .mr(2)
9853 .nr(4)
9854 .kr(2)
9855 .sr(1)
9856 .m(m)
9857 .n(n)
9858 .k(k)
9859 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009860 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009861 }
9862 }
9863 }
9864 }
9865
9866 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
9867 TEST_REQUIRES_X86_XOP;
9868 for (uint32_t n = 5; n < 8; n++) {
9869 for (size_t k = 1; k <= 40; k += 9) {
9870 GemmMicrokernelTester()
9871 .mr(2)
9872 .nr(4)
9873 .kr(2)
9874 .sr(1)
9875 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009876 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009877 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009878 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009879 }
9880 }
9881 }
9882
9883 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
9884 TEST_REQUIRES_X86_XOP;
9885 for (uint32_t n = 5; n < 8; n++) {
9886 for (size_t k = 1; k <= 40; k += 9) {
9887 GemmMicrokernelTester()
9888 .mr(2)
9889 .nr(4)
9890 .kr(2)
9891 .sr(1)
9892 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009893 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009894 .k(k)
9895 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009896 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009897 }
9898 }
9899 }
9900
9901 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_a) {
9902 TEST_REQUIRES_X86_XOP;
9903 for (uint32_t n = 5; n < 8; n++) {
9904 for (size_t k = 1; k <= 40; k += 9) {
9905 GemmMicrokernelTester()
9906 .mr(2)
9907 .nr(4)
9908 .kr(2)
9909 .sr(1)
9910 .m(2)
9911 .n(n)
9912 .k(k)
9913 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009914 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009915 }
9916 }
9917 }
9918
9919 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
9920 TEST_REQUIRES_X86_XOP;
9921 for (uint32_t n = 5; n < 8; n++) {
9922 for (size_t k = 1; k <= 40; k += 9) {
9923 for (uint32_t m = 1; m <= 2; m++) {
9924 GemmMicrokernelTester()
9925 .mr(2)
9926 .nr(4)
9927 .kr(2)
9928 .sr(1)
9929 .m(m)
9930 .n(n)
9931 .k(k)
9932 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009933 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009934 }
9935 }
9936 }
9937 }
9938
9939 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
9940 TEST_REQUIRES_X86_XOP;
9941 for (uint32_t n = 8; n <= 12; n += 4) {
9942 for (size_t k = 1; k <= 40; k += 9) {
9943 GemmMicrokernelTester()
9944 .mr(2)
9945 .nr(4)
9946 .kr(2)
9947 .sr(1)
9948 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009949 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009950 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009951 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009952 }
9953 }
9954 }
9955
9956 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
9957 TEST_REQUIRES_X86_XOP;
9958 for (uint32_t n = 8; n <= 12; n += 4) {
9959 for (size_t k = 1; k <= 40; k += 9) {
9960 GemmMicrokernelTester()
9961 .mr(2)
9962 .nr(4)
9963 .kr(2)
9964 .sr(1)
9965 .m(2)
9966 .n(n)
9967 .k(k)
9968 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009969 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009970 }
9971 }
9972 }
9973
9974 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_a) {
9975 TEST_REQUIRES_X86_XOP;
9976 for (uint32_t n = 8; n <= 12; n += 4) {
9977 for (size_t k = 1; k <= 40; k += 9) {
9978 GemmMicrokernelTester()
9979 .mr(2)
9980 .nr(4)
9981 .kr(2)
9982 .sr(1)
9983 .m(2)
9984 .n(n)
9985 .k(k)
9986 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009987 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009988 }
9989 }
9990 }
9991
9992 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
9993 TEST_REQUIRES_X86_XOP;
9994 for (uint32_t n = 8; n <= 12; n += 4) {
9995 for (size_t k = 1; k <= 40; k += 9) {
9996 for (uint32_t m = 1; m <= 2; m++) {
9997 GemmMicrokernelTester()
9998 .mr(2)
9999 .nr(4)
10000 .kr(2)
10001 .sr(1)
10002 .m(m)
10003 .n(n)
10004 .k(k)
10005 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010006 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010007 }
10008 }
10009 }
10010 }
10011
10012 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
10013 TEST_REQUIRES_X86_XOP;
10014 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010015 for (uint32_t n = 1; n <= 4; n++) {
10016 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010017 GemmMicrokernelTester()
10018 .mr(2)
10019 .nr(4)
10020 .kr(2)
10021 .sr(1)
10022 .m(m)
10023 .n(n)
10024 .k(k)
10025 .cm_stride(7)
10026 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010027 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010028 }
10029 }
10030 }
10031 }
10032
10033 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
10034 TEST_REQUIRES_X86_XOP;
10035 GemmMicrokernelTester()
10036 .mr(2)
10037 .nr(4)
10038 .kr(2)
10039 .sr(1)
10040 .m(2)
10041 .n(4)
10042 .k(8)
10043 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010044 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010045 }
10046
10047 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
10048 TEST_REQUIRES_X86_XOP;
10049 GemmMicrokernelTester()
10050 .mr(2)
10051 .nr(4)
10052 .kr(2)
10053 .sr(1)
10054 .m(2)
10055 .n(4)
10056 .k(8)
10057 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010058 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010059 }
10060
10061 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
10062 TEST_REQUIRES_X86_XOP;
10063 GemmMicrokernelTester()
10064 .mr(2)
10065 .nr(4)
10066 .kr(2)
10067 .sr(1)
10068 .m(2)
10069 .n(4)
10070 .k(8)
10071 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010072 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010073 }
10074
10075 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_a_zero_point) {
10076 TEST_REQUIRES_X86_XOP;
10077 for (size_t k = 1; k <= 40; k += 9) {
10078 GemmMicrokernelTester()
10079 .mr(2)
10080 .nr(4)
10081 .kr(2)
10082 .sr(1)
10083 .m(2)
10084 .n(4)
10085 .k(k)
10086 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010087 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010088 }
10089 }
10090
10091 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_b_zero_point) {
10092 TEST_REQUIRES_X86_XOP;
10093 for (size_t k = 1; k <= 40; k += 9) {
10094 GemmMicrokernelTester()
10095 .mr(2)
10096 .nr(4)
10097 .kr(2)
10098 .sr(1)
10099 .m(2)
10100 .n(4)
10101 .k(k)
10102 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010103 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010104 }
10105 }
10106
10107 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_zero_point) {
10108 TEST_REQUIRES_X86_XOP;
10109 for (size_t k = 1; k <= 40; k += 9) {
10110 GemmMicrokernelTester()
10111 .mr(2)
10112 .nr(4)
10113 .kr(2)
10114 .sr(1)
10115 .m(2)
10116 .n(4)
10117 .k(k)
10118 .a_zero_point(0)
10119 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010120 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010121 }
10122 }
10123#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10124
10125
10126#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10127 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
10128 TEST_REQUIRES_X86_XOP;
10129 GemmMicrokernelTester()
10130 .mr(4)
10131 .nr(4)
10132 .kr(2)
10133 .sr(1)
10134 .m(4)
10135 .n(4)
10136 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010137 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010138 }
10139
10140 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
10141 TEST_REQUIRES_X86_XOP;
10142 GemmMicrokernelTester()
10143 .mr(4)
10144 .nr(4)
10145 .kr(2)
10146 .sr(1)
10147 .m(4)
10148 .n(4)
10149 .k(8)
10150 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010151 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010152 }
10153
10154 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_strided_a) {
10155 TEST_REQUIRES_X86_XOP;
10156 GemmMicrokernelTester()
10157 .mr(4)
10158 .nr(4)
10159 .kr(2)
10160 .sr(1)
10161 .m(4)
10162 .n(4)
10163 .k(8)
10164 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010165 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010166 }
10167
10168 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
10169 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010170 for (uint32_t n = 1; n <= 4; n++) {
10171 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010172 GemmMicrokernelTester()
10173 .mr(4)
10174 .nr(4)
10175 .kr(2)
10176 .sr(1)
10177 .m(m)
10178 .n(n)
10179 .k(8)
10180 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010181 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010182 }
10183 }
10184 }
10185
10186 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
10187 TEST_REQUIRES_X86_XOP;
10188 for (uint32_t m = 1; m <= 4; m++) {
10189 GemmMicrokernelTester()
10190 .mr(4)
10191 .nr(4)
10192 .kr(2)
10193 .sr(1)
10194 .m(m)
10195 .n(4)
10196 .k(8)
10197 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010198 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010199 }
10200 }
10201
10202 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
10203 TEST_REQUIRES_X86_XOP;
10204 for (uint32_t n = 1; n <= 4; n++) {
10205 GemmMicrokernelTester()
10206 .mr(4)
10207 .nr(4)
10208 .kr(2)
10209 .sr(1)
10210 .m(4)
10211 .n(n)
10212 .k(8)
10213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010214 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010215 }
10216 }
10217
10218 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
10219 TEST_REQUIRES_X86_XOP;
10220 for (size_t k = 1; k < 8; k++) {
10221 GemmMicrokernelTester()
10222 .mr(4)
10223 .nr(4)
10224 .kr(2)
10225 .sr(1)
10226 .m(4)
10227 .n(4)
10228 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010229 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010230 }
10231 }
10232
10233 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_strided_a) {
10234 TEST_REQUIRES_X86_XOP;
10235 for (size_t k = 1; k < 8; k++) {
10236 GemmMicrokernelTester()
10237 .mr(4)
10238 .nr(4)
10239 .kr(2)
10240 .sr(1)
10241 .m(4)
10242 .n(4)
10243 .k(k)
10244 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010245 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010246 }
10247 }
10248
10249 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
10250 TEST_REQUIRES_X86_XOP;
10251 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010252 for (uint32_t n = 1; n <= 4; n++) {
10253 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010254 GemmMicrokernelTester()
10255 .mr(4)
10256 .nr(4)
10257 .kr(2)
10258 .sr(1)
10259 .m(m)
10260 .n(n)
10261 .k(k)
10262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010263 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010264 }
10265 }
10266 }
10267 }
10268
10269 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
10270 TEST_REQUIRES_X86_XOP;
10271 for (size_t k = 9; k < 16; k++) {
10272 GemmMicrokernelTester()
10273 .mr(4)
10274 .nr(4)
10275 .kr(2)
10276 .sr(1)
10277 .m(4)
10278 .n(4)
10279 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010280 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010281 }
10282 }
10283
10284 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_strided_a) {
10285 TEST_REQUIRES_X86_XOP;
10286 for (size_t k = 9; k < 16; k++) {
10287 GemmMicrokernelTester()
10288 .mr(4)
10289 .nr(4)
10290 .kr(2)
10291 .sr(1)
10292 .m(4)
10293 .n(4)
10294 .k(k)
10295 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010296 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010297 }
10298 }
10299
10300 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
10301 TEST_REQUIRES_X86_XOP;
10302 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010303 for (uint32_t n = 1; n <= 4; n++) {
10304 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010305 GemmMicrokernelTester()
10306 .mr(4)
10307 .nr(4)
10308 .kr(2)
10309 .sr(1)
10310 .m(m)
10311 .n(n)
10312 .k(k)
10313 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010314 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010315 }
10316 }
10317 }
10318 }
10319
10320 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
10321 TEST_REQUIRES_X86_XOP;
10322 for (size_t k = 16; k <= 80; k += 8) {
10323 GemmMicrokernelTester()
10324 .mr(4)
10325 .nr(4)
10326 .kr(2)
10327 .sr(1)
10328 .m(4)
10329 .n(4)
10330 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010331 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010332 }
10333 }
10334
10335 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_strided_a) {
10336 TEST_REQUIRES_X86_XOP;
10337 for (size_t k = 16; k <= 80; k += 8) {
10338 GemmMicrokernelTester()
10339 .mr(4)
10340 .nr(4)
10341 .kr(2)
10342 .sr(1)
10343 .m(4)
10344 .n(4)
10345 .k(k)
10346 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010347 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010348 }
10349 }
10350
10351 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
10352 TEST_REQUIRES_X86_XOP;
10353 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010354 for (uint32_t n = 1; n <= 4; n++) {
10355 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010356 GemmMicrokernelTester()
10357 .mr(4)
10358 .nr(4)
10359 .kr(2)
10360 .sr(1)
10361 .m(m)
10362 .n(n)
10363 .k(k)
10364 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010365 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010366 }
10367 }
10368 }
10369 }
10370
10371 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
10372 TEST_REQUIRES_X86_XOP;
10373 for (uint32_t n = 5; n < 8; n++) {
10374 for (size_t k = 1; k <= 40; k += 9) {
10375 GemmMicrokernelTester()
10376 .mr(4)
10377 .nr(4)
10378 .kr(2)
10379 .sr(1)
10380 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010381 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010382 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010383 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010384 }
10385 }
10386 }
10387
10388 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
10389 TEST_REQUIRES_X86_XOP;
10390 for (uint32_t n = 5; n < 8; n++) {
10391 for (size_t k = 1; k <= 40; k += 9) {
10392 GemmMicrokernelTester()
10393 .mr(4)
10394 .nr(4)
10395 .kr(2)
10396 .sr(1)
10397 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010398 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010399 .k(k)
10400 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010401 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010402 }
10403 }
10404 }
10405
10406 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_a) {
10407 TEST_REQUIRES_X86_XOP;
10408 for (uint32_t n = 5; n < 8; n++) {
10409 for (size_t k = 1; k <= 40; k += 9) {
10410 GemmMicrokernelTester()
10411 .mr(4)
10412 .nr(4)
10413 .kr(2)
10414 .sr(1)
10415 .m(4)
10416 .n(n)
10417 .k(k)
10418 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010419 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010420 }
10421 }
10422 }
10423
10424 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
10425 TEST_REQUIRES_X86_XOP;
10426 for (uint32_t n = 5; n < 8; n++) {
10427 for (size_t k = 1; k <= 40; k += 9) {
10428 for (uint32_t m = 1; m <= 4; m++) {
10429 GemmMicrokernelTester()
10430 .mr(4)
10431 .nr(4)
10432 .kr(2)
10433 .sr(1)
10434 .m(m)
10435 .n(n)
10436 .k(k)
10437 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010438 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010439 }
10440 }
10441 }
10442 }
10443
10444 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
10445 TEST_REQUIRES_X86_XOP;
10446 for (uint32_t n = 8; n <= 12; n += 4) {
10447 for (size_t k = 1; k <= 40; k += 9) {
10448 GemmMicrokernelTester()
10449 .mr(4)
10450 .nr(4)
10451 .kr(2)
10452 .sr(1)
10453 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010454 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010455 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010456 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010457 }
10458 }
10459 }
10460
10461 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
10462 TEST_REQUIRES_X86_XOP;
10463 for (uint32_t n = 8; n <= 12; n += 4) {
10464 for (size_t k = 1; k <= 40; k += 9) {
10465 GemmMicrokernelTester()
10466 .mr(4)
10467 .nr(4)
10468 .kr(2)
10469 .sr(1)
10470 .m(4)
10471 .n(n)
10472 .k(k)
10473 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010474 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010475 }
10476 }
10477 }
10478
10479 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_a) {
10480 TEST_REQUIRES_X86_XOP;
10481 for (uint32_t n = 8; n <= 12; n += 4) {
10482 for (size_t k = 1; k <= 40; k += 9) {
10483 GemmMicrokernelTester()
10484 .mr(4)
10485 .nr(4)
10486 .kr(2)
10487 .sr(1)
10488 .m(4)
10489 .n(n)
10490 .k(k)
10491 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010493 }
10494 }
10495 }
10496
10497 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
10498 TEST_REQUIRES_X86_XOP;
10499 for (uint32_t n = 8; n <= 12; n += 4) {
10500 for (size_t k = 1; k <= 40; k += 9) {
10501 for (uint32_t m = 1; m <= 4; m++) {
10502 GemmMicrokernelTester()
10503 .mr(4)
10504 .nr(4)
10505 .kr(2)
10506 .sr(1)
10507 .m(m)
10508 .n(n)
10509 .k(k)
10510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010511 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010512 }
10513 }
10514 }
10515 }
10516
10517 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
10518 TEST_REQUIRES_X86_XOP;
10519 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010520 for (uint32_t n = 1; n <= 4; n++) {
10521 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010522 GemmMicrokernelTester()
10523 .mr(4)
10524 .nr(4)
10525 .kr(2)
10526 .sr(1)
10527 .m(m)
10528 .n(n)
10529 .k(k)
10530 .cm_stride(7)
10531 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010532 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010533 }
10534 }
10535 }
10536 }
10537
10538 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
10539 TEST_REQUIRES_X86_XOP;
10540 GemmMicrokernelTester()
10541 .mr(4)
10542 .nr(4)
10543 .kr(2)
10544 .sr(1)
10545 .m(4)
10546 .n(4)
10547 .k(8)
10548 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010549 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010550 }
10551
10552 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
10553 TEST_REQUIRES_X86_XOP;
10554 GemmMicrokernelTester()
10555 .mr(4)
10556 .nr(4)
10557 .kr(2)
10558 .sr(1)
10559 .m(4)
10560 .n(4)
10561 .k(8)
10562 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010563 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010564 }
10565
10566 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
10567 TEST_REQUIRES_X86_XOP;
10568 GemmMicrokernelTester()
10569 .mr(4)
10570 .nr(4)
10571 .kr(2)
10572 .sr(1)
10573 .m(4)
10574 .n(4)
10575 .k(8)
10576 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010578 }
10579
10580 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_a_zero_point) {
10581 TEST_REQUIRES_X86_XOP;
10582 for (size_t k = 1; k <= 40; k += 9) {
10583 GemmMicrokernelTester()
10584 .mr(4)
10585 .nr(4)
10586 .kr(2)
10587 .sr(1)
10588 .m(4)
10589 .n(4)
10590 .k(k)
10591 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010592 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010593 }
10594 }
10595
10596 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_b_zero_point) {
10597 TEST_REQUIRES_X86_XOP;
10598 for (size_t k = 1; k <= 40; k += 9) {
10599 GemmMicrokernelTester()
10600 .mr(4)
10601 .nr(4)
10602 .kr(2)
10603 .sr(1)
10604 .m(4)
10605 .n(4)
10606 .k(k)
10607 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010608 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010609 }
10610 }
10611
10612 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_zero_point) {
10613 TEST_REQUIRES_X86_XOP;
10614 for (size_t k = 1; k <= 40; k += 9) {
10615 GemmMicrokernelTester()
10616 .mr(4)
10617 .nr(4)
10618 .kr(2)
10619 .sr(1)
10620 .m(4)
10621 .n(4)
10622 .k(k)
10623 .a_zero_point(0)
10624 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010625 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010626 }
10627 }
10628#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10629
10630
10631#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10632 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
10633 TEST_REQUIRES_X86_SSE2;
10634 GemmMicrokernelTester()
10635 .mr(1)
10636 .nr(4)
10637 .kr(8)
10638 .sr(1)
10639 .m(1)
10640 .n(4)
10641 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010642 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010643 }
10644
10645 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
10646 TEST_REQUIRES_X86_SSE2;
10647 GemmMicrokernelTester()
10648 .mr(1)
10649 .nr(4)
10650 .kr(8)
10651 .sr(1)
10652 .m(1)
10653 .n(4)
10654 .k(8)
10655 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010656 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010657 }
10658
10659 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_strided_a) {
10660 TEST_REQUIRES_X86_SSE2;
10661 GemmMicrokernelTester()
10662 .mr(1)
10663 .nr(4)
10664 .kr(8)
10665 .sr(1)
10666 .m(1)
10667 .n(4)
10668 .k(8)
10669 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010670 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010671 }
10672
10673 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
10674 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010675 for (uint32_t n = 1; n <= 4; n++) {
10676 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010677 GemmMicrokernelTester()
10678 .mr(1)
10679 .nr(4)
10680 .kr(8)
10681 .sr(1)
10682 .m(m)
10683 .n(n)
10684 .k(8)
10685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010687 }
10688 }
10689 }
10690
10691 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
10692 TEST_REQUIRES_X86_SSE2;
10693 for (uint32_t m = 1; m <= 1; m++) {
10694 GemmMicrokernelTester()
10695 .mr(1)
10696 .nr(4)
10697 .kr(8)
10698 .sr(1)
10699 .m(m)
10700 .n(4)
10701 .k(8)
10702 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010703 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010704 }
10705 }
10706
10707 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
10708 TEST_REQUIRES_X86_SSE2;
10709 for (uint32_t n = 1; n <= 4; n++) {
10710 GemmMicrokernelTester()
10711 .mr(1)
10712 .nr(4)
10713 .kr(8)
10714 .sr(1)
10715 .m(1)
10716 .n(n)
10717 .k(8)
10718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010719 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010720 }
10721 }
10722
10723 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
10724 TEST_REQUIRES_X86_SSE2;
10725 for (size_t k = 1; k < 8; k++) {
10726 GemmMicrokernelTester()
10727 .mr(1)
10728 .nr(4)
10729 .kr(8)
10730 .sr(1)
10731 .m(1)
10732 .n(4)
10733 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010734 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010735 }
10736 }
10737
10738 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_strided_a) {
10739 TEST_REQUIRES_X86_SSE2;
10740 for (size_t k = 1; k < 8; k++) {
10741 GemmMicrokernelTester()
10742 .mr(1)
10743 .nr(4)
10744 .kr(8)
10745 .sr(1)
10746 .m(1)
10747 .n(4)
10748 .k(k)
10749 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010750 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010751 }
10752 }
10753
10754 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
10755 TEST_REQUIRES_X86_SSE2;
10756 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010757 for (uint32_t n = 1; n <= 4; n++) {
10758 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010759 GemmMicrokernelTester()
10760 .mr(1)
10761 .nr(4)
10762 .kr(8)
10763 .sr(1)
10764 .m(m)
10765 .n(n)
10766 .k(k)
10767 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010768 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010769 }
10770 }
10771 }
10772 }
10773
10774 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
10775 TEST_REQUIRES_X86_SSE2;
10776 for (size_t k = 9; k < 16; k++) {
10777 GemmMicrokernelTester()
10778 .mr(1)
10779 .nr(4)
10780 .kr(8)
10781 .sr(1)
10782 .m(1)
10783 .n(4)
10784 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010785 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010786 }
10787 }
10788
10789 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_strided_a) {
10790 TEST_REQUIRES_X86_SSE2;
10791 for (size_t k = 9; k < 16; k++) {
10792 GemmMicrokernelTester()
10793 .mr(1)
10794 .nr(4)
10795 .kr(8)
10796 .sr(1)
10797 .m(1)
10798 .n(4)
10799 .k(k)
10800 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010802 }
10803 }
10804
10805 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
10806 TEST_REQUIRES_X86_SSE2;
10807 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010808 for (uint32_t n = 1; n <= 4; n++) {
10809 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010810 GemmMicrokernelTester()
10811 .mr(1)
10812 .nr(4)
10813 .kr(8)
10814 .sr(1)
10815 .m(m)
10816 .n(n)
10817 .k(k)
10818 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010819 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010820 }
10821 }
10822 }
10823 }
10824
10825 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
10826 TEST_REQUIRES_X86_SSE2;
10827 for (size_t k = 16; k <= 80; k += 8) {
10828 GemmMicrokernelTester()
10829 .mr(1)
10830 .nr(4)
10831 .kr(8)
10832 .sr(1)
10833 .m(1)
10834 .n(4)
10835 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010836 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010837 }
10838 }
10839
10840 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_strided_a) {
10841 TEST_REQUIRES_X86_SSE2;
10842 for (size_t k = 16; k <= 80; k += 8) {
10843 GemmMicrokernelTester()
10844 .mr(1)
10845 .nr(4)
10846 .kr(8)
10847 .sr(1)
10848 .m(1)
10849 .n(4)
10850 .k(k)
10851 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010852 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010853 }
10854 }
10855
10856 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
10857 TEST_REQUIRES_X86_SSE2;
10858 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010859 for (uint32_t n = 1; n <= 4; n++) {
10860 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010861 GemmMicrokernelTester()
10862 .mr(1)
10863 .nr(4)
10864 .kr(8)
10865 .sr(1)
10866 .m(m)
10867 .n(n)
10868 .k(k)
10869 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010870 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010871 }
10872 }
10873 }
10874 }
10875
10876 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
10877 TEST_REQUIRES_X86_SSE2;
10878 for (uint32_t n = 5; n < 8; n++) {
10879 for (size_t k = 1; k <= 40; k += 9) {
10880 GemmMicrokernelTester()
10881 .mr(1)
10882 .nr(4)
10883 .kr(8)
10884 .sr(1)
10885 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010886 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010887 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010888 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010889 }
10890 }
10891 }
10892
10893 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
10894 TEST_REQUIRES_X86_SSE2;
10895 for (uint32_t n = 5; n < 8; n++) {
10896 for (size_t k = 1; k <= 40; k += 9) {
10897 GemmMicrokernelTester()
10898 .mr(1)
10899 .nr(4)
10900 .kr(8)
10901 .sr(1)
10902 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010903 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010904 .k(k)
10905 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010906 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010907 }
10908 }
10909 }
10910
10911 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_a) {
10912 TEST_REQUIRES_X86_SSE2;
10913 for (uint32_t n = 5; n < 8; n++) {
10914 for (size_t k = 1; k <= 40; k += 9) {
10915 GemmMicrokernelTester()
10916 .mr(1)
10917 .nr(4)
10918 .kr(8)
10919 .sr(1)
10920 .m(1)
10921 .n(n)
10922 .k(k)
10923 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010924 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010925 }
10926 }
10927 }
10928
10929 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
10930 TEST_REQUIRES_X86_SSE2;
10931 for (uint32_t n = 5; n < 8; n++) {
10932 for (size_t k = 1; k <= 40; k += 9) {
10933 for (uint32_t m = 1; m <= 1; m++) {
10934 GemmMicrokernelTester()
10935 .mr(1)
10936 .nr(4)
10937 .kr(8)
10938 .sr(1)
10939 .m(m)
10940 .n(n)
10941 .k(k)
10942 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010943 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010944 }
10945 }
10946 }
10947 }
10948
10949 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
10950 TEST_REQUIRES_X86_SSE2;
10951 for (uint32_t n = 8; n <= 12; n += 4) {
10952 for (size_t k = 1; k <= 40; k += 9) {
10953 GemmMicrokernelTester()
10954 .mr(1)
10955 .nr(4)
10956 .kr(8)
10957 .sr(1)
10958 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010959 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010960 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010961 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010962 }
10963 }
10964 }
10965
10966 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
10967 TEST_REQUIRES_X86_SSE2;
10968 for (uint32_t n = 8; n <= 12; n += 4) {
10969 for (size_t k = 1; k <= 40; k += 9) {
10970 GemmMicrokernelTester()
10971 .mr(1)
10972 .nr(4)
10973 .kr(8)
10974 .sr(1)
10975 .m(1)
10976 .n(n)
10977 .k(k)
10978 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010979 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010980 }
10981 }
10982 }
10983
10984 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_a) {
10985 TEST_REQUIRES_X86_SSE2;
10986 for (uint32_t n = 8; n <= 12; n += 4) {
10987 for (size_t k = 1; k <= 40; k += 9) {
10988 GemmMicrokernelTester()
10989 .mr(1)
10990 .nr(4)
10991 .kr(8)
10992 .sr(1)
10993 .m(1)
10994 .n(n)
10995 .k(k)
10996 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010997 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010998 }
10999 }
11000 }
11001
11002 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
11003 TEST_REQUIRES_X86_SSE2;
11004 for (uint32_t n = 8; n <= 12; n += 4) {
11005 for (size_t k = 1; k <= 40; k += 9) {
11006 for (uint32_t m = 1; m <= 1; m++) {
11007 GemmMicrokernelTester()
11008 .mr(1)
11009 .nr(4)
11010 .kr(8)
11011 .sr(1)
11012 .m(m)
11013 .n(n)
11014 .k(k)
11015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011016 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011017 }
11018 }
11019 }
11020 }
11021
11022 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
11023 TEST_REQUIRES_X86_SSE2;
11024 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011025 for (uint32_t n = 1; n <= 4; n++) {
11026 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011027 GemmMicrokernelTester()
11028 .mr(1)
11029 .nr(4)
11030 .kr(8)
11031 .sr(1)
11032 .m(m)
11033 .n(n)
11034 .k(k)
11035 .cm_stride(7)
11036 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011038 }
11039 }
11040 }
11041 }
11042
11043 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
11044 TEST_REQUIRES_X86_SSE2;
11045 GemmMicrokernelTester()
11046 .mr(1)
11047 .nr(4)
11048 .kr(8)
11049 .sr(1)
11050 .m(1)
11051 .n(4)
11052 .k(8)
11053 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011054 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011055 }
11056
11057 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
11058 TEST_REQUIRES_X86_SSE2;
11059 GemmMicrokernelTester()
11060 .mr(1)
11061 .nr(4)
11062 .kr(8)
11063 .sr(1)
11064 .m(1)
11065 .n(4)
11066 .k(8)
11067 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011069 }
11070
11071 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
11072 TEST_REQUIRES_X86_SSE2;
11073 GemmMicrokernelTester()
11074 .mr(1)
11075 .nr(4)
11076 .kr(8)
11077 .sr(1)
11078 .m(1)
11079 .n(4)
11080 .k(8)
11081 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011082 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011083 }
11084
11085 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_a_zero_point) {
11086 TEST_REQUIRES_X86_SSE2;
11087 for (size_t k = 1; k <= 40; k += 9) {
11088 GemmMicrokernelTester()
11089 .mr(1)
11090 .nr(4)
11091 .kr(8)
11092 .sr(1)
11093 .m(1)
11094 .n(4)
11095 .k(k)
11096 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011097 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011098 }
11099 }
11100
11101 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_b_zero_point) {
11102 TEST_REQUIRES_X86_SSE2;
11103 for (size_t k = 1; k <= 40; k += 9) {
11104 GemmMicrokernelTester()
11105 .mr(1)
11106 .nr(4)
11107 .kr(8)
11108 .sr(1)
11109 .m(1)
11110 .n(4)
11111 .k(k)
11112 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011113 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011114 }
11115 }
11116
11117 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_zero_point) {
11118 TEST_REQUIRES_X86_SSE2;
11119 for (size_t k = 1; k <= 40; k += 9) {
11120 GemmMicrokernelTester()
11121 .mr(1)
11122 .nr(4)
11123 .kr(8)
11124 .sr(1)
11125 .m(1)
11126 .n(4)
11127 .k(k)
11128 .a_zero_point(0)
11129 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011130 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011131 }
11132 }
11133#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11134
11135
11136#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11137 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
11138 TEST_REQUIRES_X86_SSE2;
11139 GemmMicrokernelTester()
11140 .mr(2)
11141 .nr(4)
11142 .kr(8)
11143 .sr(1)
11144 .m(2)
11145 .n(4)
11146 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011147 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011148 }
11149
11150 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
11151 TEST_REQUIRES_X86_SSE2;
11152 GemmMicrokernelTester()
11153 .mr(2)
11154 .nr(4)
11155 .kr(8)
11156 .sr(1)
11157 .m(2)
11158 .n(4)
11159 .k(8)
11160 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011161 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011162 }
11163
11164 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_strided_a) {
11165 TEST_REQUIRES_X86_SSE2;
11166 GemmMicrokernelTester()
11167 .mr(2)
11168 .nr(4)
11169 .kr(8)
11170 .sr(1)
11171 .m(2)
11172 .n(4)
11173 .k(8)
11174 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011176 }
11177
11178 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
11179 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011180 for (uint32_t n = 1; n <= 4; n++) {
11181 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011182 GemmMicrokernelTester()
11183 .mr(2)
11184 .nr(4)
11185 .kr(8)
11186 .sr(1)
11187 .m(m)
11188 .n(n)
11189 .k(8)
11190 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011191 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011192 }
11193 }
11194 }
11195
11196 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
11197 TEST_REQUIRES_X86_SSE2;
11198 for (uint32_t m = 1; m <= 2; m++) {
11199 GemmMicrokernelTester()
11200 .mr(2)
11201 .nr(4)
11202 .kr(8)
11203 .sr(1)
11204 .m(m)
11205 .n(4)
11206 .k(8)
11207 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011208 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011209 }
11210 }
11211
11212 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
11213 TEST_REQUIRES_X86_SSE2;
11214 for (uint32_t n = 1; n <= 4; n++) {
11215 GemmMicrokernelTester()
11216 .mr(2)
11217 .nr(4)
11218 .kr(8)
11219 .sr(1)
11220 .m(2)
11221 .n(n)
11222 .k(8)
11223 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011224 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011225 }
11226 }
11227
11228 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
11229 TEST_REQUIRES_X86_SSE2;
11230 for (size_t k = 1; k < 8; k++) {
11231 GemmMicrokernelTester()
11232 .mr(2)
11233 .nr(4)
11234 .kr(8)
11235 .sr(1)
11236 .m(2)
11237 .n(4)
11238 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011239 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011240 }
11241 }
11242
11243 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_strided_a) {
11244 TEST_REQUIRES_X86_SSE2;
11245 for (size_t k = 1; k < 8; k++) {
11246 GemmMicrokernelTester()
11247 .mr(2)
11248 .nr(4)
11249 .kr(8)
11250 .sr(1)
11251 .m(2)
11252 .n(4)
11253 .k(k)
11254 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011255 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011256 }
11257 }
11258
11259 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
11260 TEST_REQUIRES_X86_SSE2;
11261 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011262 for (uint32_t n = 1; n <= 4; n++) {
11263 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011264 GemmMicrokernelTester()
11265 .mr(2)
11266 .nr(4)
11267 .kr(8)
11268 .sr(1)
11269 .m(m)
11270 .n(n)
11271 .k(k)
11272 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011273 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011274 }
11275 }
11276 }
11277 }
11278
11279 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
11280 TEST_REQUIRES_X86_SSE2;
11281 for (size_t k = 9; k < 16; k++) {
11282 GemmMicrokernelTester()
11283 .mr(2)
11284 .nr(4)
11285 .kr(8)
11286 .sr(1)
11287 .m(2)
11288 .n(4)
11289 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011290 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011291 }
11292 }
11293
11294 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_strided_a) {
11295 TEST_REQUIRES_X86_SSE2;
11296 for (size_t k = 9; k < 16; k++) {
11297 GemmMicrokernelTester()
11298 .mr(2)
11299 .nr(4)
11300 .kr(8)
11301 .sr(1)
11302 .m(2)
11303 .n(4)
11304 .k(k)
11305 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011306 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011307 }
11308 }
11309
11310 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
11311 TEST_REQUIRES_X86_SSE2;
11312 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011313 for (uint32_t n = 1; n <= 4; n++) {
11314 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011315 GemmMicrokernelTester()
11316 .mr(2)
11317 .nr(4)
11318 .kr(8)
11319 .sr(1)
11320 .m(m)
11321 .n(n)
11322 .k(k)
11323 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011325 }
11326 }
11327 }
11328 }
11329
11330 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
11331 TEST_REQUIRES_X86_SSE2;
11332 for (size_t k = 16; k <= 80; k += 8) {
11333 GemmMicrokernelTester()
11334 .mr(2)
11335 .nr(4)
11336 .kr(8)
11337 .sr(1)
11338 .m(2)
11339 .n(4)
11340 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011341 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011342 }
11343 }
11344
11345 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_strided_a) {
11346 TEST_REQUIRES_X86_SSE2;
11347 for (size_t k = 16; k <= 80; k += 8) {
11348 GemmMicrokernelTester()
11349 .mr(2)
11350 .nr(4)
11351 .kr(8)
11352 .sr(1)
11353 .m(2)
11354 .n(4)
11355 .k(k)
11356 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011357 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011358 }
11359 }
11360
11361 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
11362 TEST_REQUIRES_X86_SSE2;
11363 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011364 for (uint32_t n = 1; n <= 4; n++) {
11365 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011366 GemmMicrokernelTester()
11367 .mr(2)
11368 .nr(4)
11369 .kr(8)
11370 .sr(1)
11371 .m(m)
11372 .n(n)
11373 .k(k)
11374 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011376 }
11377 }
11378 }
11379 }
11380
11381 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
11382 TEST_REQUIRES_X86_SSE2;
11383 for (uint32_t n = 5; n < 8; n++) {
11384 for (size_t k = 1; k <= 40; k += 9) {
11385 GemmMicrokernelTester()
11386 .mr(2)
11387 .nr(4)
11388 .kr(8)
11389 .sr(1)
11390 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011391 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011392 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011393 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011394 }
11395 }
11396 }
11397
11398 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
11399 TEST_REQUIRES_X86_SSE2;
11400 for (uint32_t n = 5; n < 8; n++) {
11401 for (size_t k = 1; k <= 40; k += 9) {
11402 GemmMicrokernelTester()
11403 .mr(2)
11404 .nr(4)
11405 .kr(8)
11406 .sr(1)
11407 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011408 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011409 .k(k)
11410 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011411 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011412 }
11413 }
11414 }
11415
11416 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_a) {
11417 TEST_REQUIRES_X86_SSE2;
11418 for (uint32_t n = 5; n < 8; n++) {
11419 for (size_t k = 1; k <= 40; k += 9) {
11420 GemmMicrokernelTester()
11421 .mr(2)
11422 .nr(4)
11423 .kr(8)
11424 .sr(1)
11425 .m(2)
11426 .n(n)
11427 .k(k)
11428 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011429 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011430 }
11431 }
11432 }
11433
11434 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
11435 TEST_REQUIRES_X86_SSE2;
11436 for (uint32_t n = 5; n < 8; n++) {
11437 for (size_t k = 1; k <= 40; k += 9) {
11438 for (uint32_t m = 1; m <= 2; m++) {
11439 GemmMicrokernelTester()
11440 .mr(2)
11441 .nr(4)
11442 .kr(8)
11443 .sr(1)
11444 .m(m)
11445 .n(n)
11446 .k(k)
11447 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011448 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011449 }
11450 }
11451 }
11452 }
11453
11454 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
11455 TEST_REQUIRES_X86_SSE2;
11456 for (uint32_t n = 8; n <= 12; n += 4) {
11457 for (size_t k = 1; k <= 40; k += 9) {
11458 GemmMicrokernelTester()
11459 .mr(2)
11460 .nr(4)
11461 .kr(8)
11462 .sr(1)
11463 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011464 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011465 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011466 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011467 }
11468 }
11469 }
11470
11471 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
11472 TEST_REQUIRES_X86_SSE2;
11473 for (uint32_t n = 8; n <= 12; n += 4) {
11474 for (size_t k = 1; k <= 40; k += 9) {
11475 GemmMicrokernelTester()
11476 .mr(2)
11477 .nr(4)
11478 .kr(8)
11479 .sr(1)
11480 .m(2)
11481 .n(n)
11482 .k(k)
11483 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011485 }
11486 }
11487 }
11488
11489 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_a) {
11490 TEST_REQUIRES_X86_SSE2;
11491 for (uint32_t n = 8; n <= 12; n += 4) {
11492 for (size_t k = 1; k <= 40; k += 9) {
11493 GemmMicrokernelTester()
11494 .mr(2)
11495 .nr(4)
11496 .kr(8)
11497 .sr(1)
11498 .m(2)
11499 .n(n)
11500 .k(k)
11501 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011502 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011503 }
11504 }
11505 }
11506
11507 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
11508 TEST_REQUIRES_X86_SSE2;
11509 for (uint32_t n = 8; n <= 12; n += 4) {
11510 for (size_t k = 1; k <= 40; k += 9) {
11511 for (uint32_t m = 1; m <= 2; m++) {
11512 GemmMicrokernelTester()
11513 .mr(2)
11514 .nr(4)
11515 .kr(8)
11516 .sr(1)
11517 .m(m)
11518 .n(n)
11519 .k(k)
11520 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011521 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011522 }
11523 }
11524 }
11525 }
11526
11527 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
11528 TEST_REQUIRES_X86_SSE2;
11529 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011530 for (uint32_t n = 1; n <= 4; n++) {
11531 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011532 GemmMicrokernelTester()
11533 .mr(2)
11534 .nr(4)
11535 .kr(8)
11536 .sr(1)
11537 .m(m)
11538 .n(n)
11539 .k(k)
11540 .cm_stride(7)
11541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011543 }
11544 }
11545 }
11546 }
11547
11548 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
11549 TEST_REQUIRES_X86_SSE2;
11550 GemmMicrokernelTester()
11551 .mr(2)
11552 .nr(4)
11553 .kr(8)
11554 .sr(1)
11555 .m(2)
11556 .n(4)
11557 .k(8)
11558 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011559 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011560 }
11561
11562 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
11563 TEST_REQUIRES_X86_SSE2;
11564 GemmMicrokernelTester()
11565 .mr(2)
11566 .nr(4)
11567 .kr(8)
11568 .sr(1)
11569 .m(2)
11570 .n(4)
11571 .k(8)
11572 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011574 }
11575
11576 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
11577 TEST_REQUIRES_X86_SSE2;
11578 GemmMicrokernelTester()
11579 .mr(2)
11580 .nr(4)
11581 .kr(8)
11582 .sr(1)
11583 .m(2)
11584 .n(4)
11585 .k(8)
11586 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011587 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011588 }
11589
11590 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_a_zero_point) {
11591 TEST_REQUIRES_X86_SSE2;
11592 for (size_t k = 1; k <= 40; k += 9) {
11593 GemmMicrokernelTester()
11594 .mr(2)
11595 .nr(4)
11596 .kr(8)
11597 .sr(1)
11598 .m(2)
11599 .n(4)
11600 .k(k)
11601 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011603 }
11604 }
11605
11606 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_b_zero_point) {
11607 TEST_REQUIRES_X86_SSE2;
11608 for (size_t k = 1; k <= 40; k += 9) {
11609 GemmMicrokernelTester()
11610 .mr(2)
11611 .nr(4)
11612 .kr(8)
11613 .sr(1)
11614 .m(2)
11615 .n(4)
11616 .k(k)
11617 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011618 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011619 }
11620 }
11621
11622 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_zero_point) {
11623 TEST_REQUIRES_X86_SSE2;
11624 for (size_t k = 1; k <= 40; k += 9) {
11625 GemmMicrokernelTester()
11626 .mr(2)
11627 .nr(4)
11628 .kr(8)
11629 .sr(1)
11630 .m(2)
11631 .n(4)
11632 .k(k)
11633 .a_zero_point(0)
11634 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011635 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011636 }
11637 }
11638#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11639
11640
11641#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11642 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
11643 TEST_REQUIRES_X86_SSE41;
11644 GemmMicrokernelTester()
11645 .mr(3)
11646 .nr(4)
11647 .kr(8)
11648 .sr(1)
11649 .m(3)
11650 .n(4)
11651 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011652 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011653 }
11654
11655 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
11656 TEST_REQUIRES_X86_SSE41;
11657 GemmMicrokernelTester()
11658 .mr(3)
11659 .nr(4)
11660 .kr(8)
11661 .sr(1)
11662 .m(3)
11663 .n(4)
11664 .k(8)
11665 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011666 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011667 }
11668
11669 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_strided_a) {
11670 TEST_REQUIRES_X86_SSE41;
11671 GemmMicrokernelTester()
11672 .mr(3)
11673 .nr(4)
11674 .kr(8)
11675 .sr(1)
11676 .m(3)
11677 .n(4)
11678 .k(8)
11679 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011680 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011681 }
11682
11683 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
11684 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011685 for (uint32_t n = 1; n <= 4; n++) {
11686 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011687 GemmMicrokernelTester()
11688 .mr(3)
11689 .nr(4)
11690 .kr(8)
11691 .sr(1)
11692 .m(m)
11693 .n(n)
11694 .k(8)
11695 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011696 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011697 }
11698 }
11699 }
11700
11701 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
11702 TEST_REQUIRES_X86_SSE41;
11703 for (uint32_t m = 1; m <= 3; m++) {
11704 GemmMicrokernelTester()
11705 .mr(3)
11706 .nr(4)
11707 .kr(8)
11708 .sr(1)
11709 .m(m)
11710 .n(4)
11711 .k(8)
11712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011713 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011714 }
11715 }
11716
11717 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
11718 TEST_REQUIRES_X86_SSE41;
11719 for (uint32_t n = 1; n <= 4; n++) {
11720 GemmMicrokernelTester()
11721 .mr(3)
11722 .nr(4)
11723 .kr(8)
11724 .sr(1)
11725 .m(3)
11726 .n(n)
11727 .k(8)
11728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011729 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011730 }
11731 }
11732
11733 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
11734 TEST_REQUIRES_X86_SSE41;
11735 for (size_t k = 1; k < 8; k++) {
11736 GemmMicrokernelTester()
11737 .mr(3)
11738 .nr(4)
11739 .kr(8)
11740 .sr(1)
11741 .m(3)
11742 .n(4)
11743 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011744 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011745 }
11746 }
11747
11748 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_strided_a) {
11749 TEST_REQUIRES_X86_SSE41;
11750 for (size_t k = 1; k < 8; k++) {
11751 GemmMicrokernelTester()
11752 .mr(3)
11753 .nr(4)
11754 .kr(8)
11755 .sr(1)
11756 .m(3)
11757 .n(4)
11758 .k(k)
11759 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011760 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011761 }
11762 }
11763
11764 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
11765 TEST_REQUIRES_X86_SSE41;
11766 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011767 for (uint32_t n = 1; n <= 4; n++) {
11768 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011769 GemmMicrokernelTester()
11770 .mr(3)
11771 .nr(4)
11772 .kr(8)
11773 .sr(1)
11774 .m(m)
11775 .n(n)
11776 .k(k)
11777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011778 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011779 }
11780 }
11781 }
11782 }
11783
11784 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
11785 TEST_REQUIRES_X86_SSE41;
11786 for (size_t k = 9; k < 16; k++) {
11787 GemmMicrokernelTester()
11788 .mr(3)
11789 .nr(4)
11790 .kr(8)
11791 .sr(1)
11792 .m(3)
11793 .n(4)
11794 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011795 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011796 }
11797 }
11798
11799 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_strided_a) {
11800 TEST_REQUIRES_X86_SSE41;
11801 for (size_t k = 9; k < 16; k++) {
11802 GemmMicrokernelTester()
11803 .mr(3)
11804 .nr(4)
11805 .kr(8)
11806 .sr(1)
11807 .m(3)
11808 .n(4)
11809 .k(k)
11810 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011811 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011812 }
11813 }
11814
11815 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
11816 TEST_REQUIRES_X86_SSE41;
11817 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011818 for (uint32_t n = 1; n <= 4; n++) {
11819 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011820 GemmMicrokernelTester()
11821 .mr(3)
11822 .nr(4)
11823 .kr(8)
11824 .sr(1)
11825 .m(m)
11826 .n(n)
11827 .k(k)
11828 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011829 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011830 }
11831 }
11832 }
11833 }
11834
11835 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
11836 TEST_REQUIRES_X86_SSE41;
11837 for (size_t k = 16; k <= 80; k += 8) {
11838 GemmMicrokernelTester()
11839 .mr(3)
11840 .nr(4)
11841 .kr(8)
11842 .sr(1)
11843 .m(3)
11844 .n(4)
11845 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011846 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011847 }
11848 }
11849
11850 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_strided_a) {
11851 TEST_REQUIRES_X86_SSE41;
11852 for (size_t k = 16; k <= 80; k += 8) {
11853 GemmMicrokernelTester()
11854 .mr(3)
11855 .nr(4)
11856 .kr(8)
11857 .sr(1)
11858 .m(3)
11859 .n(4)
11860 .k(k)
11861 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011862 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011863 }
11864 }
11865
11866 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
11867 TEST_REQUIRES_X86_SSE41;
11868 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011869 for (uint32_t n = 1; n <= 4; n++) {
11870 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011871 GemmMicrokernelTester()
11872 .mr(3)
11873 .nr(4)
11874 .kr(8)
11875 .sr(1)
11876 .m(m)
11877 .n(n)
11878 .k(k)
11879 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011880 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011881 }
11882 }
11883 }
11884 }
11885
11886 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
11887 TEST_REQUIRES_X86_SSE41;
11888 for (uint32_t n = 5; n < 8; n++) {
11889 for (size_t k = 1; k <= 40; k += 9) {
11890 GemmMicrokernelTester()
11891 .mr(3)
11892 .nr(4)
11893 .kr(8)
11894 .sr(1)
11895 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011896 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011897 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011898 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011899 }
11900 }
11901 }
11902
11903 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
11904 TEST_REQUIRES_X86_SSE41;
11905 for (uint32_t n = 5; n < 8; n++) {
11906 for (size_t k = 1; k <= 40; k += 9) {
11907 GemmMicrokernelTester()
11908 .mr(3)
11909 .nr(4)
11910 .kr(8)
11911 .sr(1)
11912 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011913 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011914 .k(k)
11915 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011917 }
11918 }
11919 }
11920
11921 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_a) {
11922 TEST_REQUIRES_X86_SSE41;
11923 for (uint32_t n = 5; n < 8; n++) {
11924 for (size_t k = 1; k <= 40; k += 9) {
11925 GemmMicrokernelTester()
11926 .mr(3)
11927 .nr(4)
11928 .kr(8)
11929 .sr(1)
11930 .m(3)
11931 .n(n)
11932 .k(k)
11933 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011934 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011935 }
11936 }
11937 }
11938
11939 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
11940 TEST_REQUIRES_X86_SSE41;
11941 for (uint32_t n = 5; n < 8; n++) {
11942 for (size_t k = 1; k <= 40; k += 9) {
11943 for (uint32_t m = 1; m <= 3; m++) {
11944 GemmMicrokernelTester()
11945 .mr(3)
11946 .nr(4)
11947 .kr(8)
11948 .sr(1)
11949 .m(m)
11950 .n(n)
11951 .k(k)
11952 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011953 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011954 }
11955 }
11956 }
11957 }
11958
11959 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
11960 TEST_REQUIRES_X86_SSE41;
11961 for (uint32_t n = 8; n <= 12; n += 4) {
11962 for (size_t k = 1; k <= 40; k += 9) {
11963 GemmMicrokernelTester()
11964 .mr(3)
11965 .nr(4)
11966 .kr(8)
11967 .sr(1)
11968 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011969 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011970 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011971 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011972 }
11973 }
11974 }
11975
11976 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
11977 TEST_REQUIRES_X86_SSE41;
11978 for (uint32_t n = 8; n <= 12; n += 4) {
11979 for (size_t k = 1; k <= 40; k += 9) {
11980 GemmMicrokernelTester()
11981 .mr(3)
11982 .nr(4)
11983 .kr(8)
11984 .sr(1)
11985 .m(3)
11986 .n(n)
11987 .k(k)
11988 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011989 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011990 }
11991 }
11992 }
11993
11994 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_a) {
11995 TEST_REQUIRES_X86_SSE41;
11996 for (uint32_t n = 8; n <= 12; n += 4) {
11997 for (size_t k = 1; k <= 40; k += 9) {
11998 GemmMicrokernelTester()
11999 .mr(3)
12000 .nr(4)
12001 .kr(8)
12002 .sr(1)
12003 .m(3)
12004 .n(n)
12005 .k(k)
12006 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012007 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012008 }
12009 }
12010 }
12011
12012 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
12013 TEST_REQUIRES_X86_SSE41;
12014 for (uint32_t n = 8; n <= 12; n += 4) {
12015 for (size_t k = 1; k <= 40; k += 9) {
12016 for (uint32_t m = 1; m <= 3; m++) {
12017 GemmMicrokernelTester()
12018 .mr(3)
12019 .nr(4)
12020 .kr(8)
12021 .sr(1)
12022 .m(m)
12023 .n(n)
12024 .k(k)
12025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012026 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012027 }
12028 }
12029 }
12030 }
12031
12032 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
12033 TEST_REQUIRES_X86_SSE41;
12034 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012035 for (uint32_t n = 1; n <= 4; n++) {
12036 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012037 GemmMicrokernelTester()
12038 .mr(3)
12039 .nr(4)
12040 .kr(8)
12041 .sr(1)
12042 .m(m)
12043 .n(n)
12044 .k(k)
12045 .cm_stride(7)
12046 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012048 }
12049 }
12050 }
12051 }
12052
12053 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
12054 TEST_REQUIRES_X86_SSE41;
12055 GemmMicrokernelTester()
12056 .mr(3)
12057 .nr(4)
12058 .kr(8)
12059 .sr(1)
12060 .m(3)
12061 .n(4)
12062 .k(8)
12063 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012064 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012065 }
12066
12067 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
12068 TEST_REQUIRES_X86_SSE41;
12069 GemmMicrokernelTester()
12070 .mr(3)
12071 .nr(4)
12072 .kr(8)
12073 .sr(1)
12074 .m(3)
12075 .n(4)
12076 .k(8)
12077 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012078 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012079 }
12080
12081 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
12082 TEST_REQUIRES_X86_SSE41;
12083 GemmMicrokernelTester()
12084 .mr(3)
12085 .nr(4)
12086 .kr(8)
12087 .sr(1)
12088 .m(3)
12089 .n(4)
12090 .k(8)
12091 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012092 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012093 }
12094
12095 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_a_zero_point) {
12096 TEST_REQUIRES_X86_SSE41;
12097 for (size_t k = 1; k <= 40; k += 9) {
12098 GemmMicrokernelTester()
12099 .mr(3)
12100 .nr(4)
12101 .kr(8)
12102 .sr(1)
12103 .m(3)
12104 .n(4)
12105 .k(k)
12106 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012107 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012108 }
12109 }
12110
12111 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_b_zero_point) {
12112 TEST_REQUIRES_X86_SSE41;
12113 for (size_t k = 1; k <= 40; k += 9) {
12114 GemmMicrokernelTester()
12115 .mr(3)
12116 .nr(4)
12117 .kr(8)
12118 .sr(1)
12119 .m(3)
12120 .n(4)
12121 .k(k)
12122 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012123 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012124 }
12125 }
12126
12127 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_zero_point) {
12128 TEST_REQUIRES_X86_SSE41;
12129 for (size_t k = 1; k <= 40; k += 9) {
12130 GemmMicrokernelTester()
12131 .mr(3)
12132 .nr(4)
12133 .kr(8)
12134 .sr(1)
12135 .m(3)
12136 .n(4)
12137 .k(k)
12138 .a_zero_point(0)
12139 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012140 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012141 }
12142 }
12143#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12144
12145
12146#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12147 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
12148 TEST_REQUIRES_X86_AVX;
12149 GemmMicrokernelTester()
12150 .mr(2)
12151 .nr(4)
12152 .kr(8)
12153 .sr(1)
12154 .m(2)
12155 .n(4)
12156 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012157 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012158 }
12159
12160 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
12161 TEST_REQUIRES_X86_AVX;
12162 GemmMicrokernelTester()
12163 .mr(2)
12164 .nr(4)
12165 .kr(8)
12166 .sr(1)
12167 .m(2)
12168 .n(4)
12169 .k(8)
12170 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012171 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012172 }
12173
12174 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_strided_a) {
12175 TEST_REQUIRES_X86_AVX;
12176 GemmMicrokernelTester()
12177 .mr(2)
12178 .nr(4)
12179 .kr(8)
12180 .sr(1)
12181 .m(2)
12182 .n(4)
12183 .k(8)
12184 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012185 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012186 }
12187
12188 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
12189 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012190 for (uint32_t n = 1; n <= 4; n++) {
12191 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012192 GemmMicrokernelTester()
12193 .mr(2)
12194 .nr(4)
12195 .kr(8)
12196 .sr(1)
12197 .m(m)
12198 .n(n)
12199 .k(8)
12200 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012201 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012202 }
12203 }
12204 }
12205
12206 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
12207 TEST_REQUIRES_X86_AVX;
12208 for (uint32_t m = 1; m <= 2; m++) {
12209 GemmMicrokernelTester()
12210 .mr(2)
12211 .nr(4)
12212 .kr(8)
12213 .sr(1)
12214 .m(m)
12215 .n(4)
12216 .k(8)
12217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012218 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012219 }
12220 }
12221
12222 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
12223 TEST_REQUIRES_X86_AVX;
12224 for (uint32_t n = 1; n <= 4; n++) {
12225 GemmMicrokernelTester()
12226 .mr(2)
12227 .nr(4)
12228 .kr(8)
12229 .sr(1)
12230 .m(2)
12231 .n(n)
12232 .k(8)
12233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012234 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012235 }
12236 }
12237
12238 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
12239 TEST_REQUIRES_X86_AVX;
12240 for (size_t k = 1; k < 8; k++) {
12241 GemmMicrokernelTester()
12242 .mr(2)
12243 .nr(4)
12244 .kr(8)
12245 .sr(1)
12246 .m(2)
12247 .n(4)
12248 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012249 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012250 }
12251 }
12252
12253 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_strided_a) {
12254 TEST_REQUIRES_X86_AVX;
12255 for (size_t k = 1; k < 8; k++) {
12256 GemmMicrokernelTester()
12257 .mr(2)
12258 .nr(4)
12259 .kr(8)
12260 .sr(1)
12261 .m(2)
12262 .n(4)
12263 .k(k)
12264 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012265 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012266 }
12267 }
12268
12269 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
12270 TEST_REQUIRES_X86_AVX;
12271 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012272 for (uint32_t n = 1; n <= 4; n++) {
12273 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012274 GemmMicrokernelTester()
12275 .mr(2)
12276 .nr(4)
12277 .kr(8)
12278 .sr(1)
12279 .m(m)
12280 .n(n)
12281 .k(k)
12282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012283 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012284 }
12285 }
12286 }
12287 }
12288
12289 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
12290 TEST_REQUIRES_X86_AVX;
12291 for (size_t k = 9; k < 16; k++) {
12292 GemmMicrokernelTester()
12293 .mr(2)
12294 .nr(4)
12295 .kr(8)
12296 .sr(1)
12297 .m(2)
12298 .n(4)
12299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012300 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012301 }
12302 }
12303
12304 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_strided_a) {
12305 TEST_REQUIRES_X86_AVX;
12306 for (size_t k = 9; k < 16; k++) {
12307 GemmMicrokernelTester()
12308 .mr(2)
12309 .nr(4)
12310 .kr(8)
12311 .sr(1)
12312 .m(2)
12313 .n(4)
12314 .k(k)
12315 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012316 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012317 }
12318 }
12319
12320 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
12321 TEST_REQUIRES_X86_AVX;
12322 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012323 for (uint32_t n = 1; n <= 4; n++) {
12324 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012325 GemmMicrokernelTester()
12326 .mr(2)
12327 .nr(4)
12328 .kr(8)
12329 .sr(1)
12330 .m(m)
12331 .n(n)
12332 .k(k)
12333 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012334 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012335 }
12336 }
12337 }
12338 }
12339
12340 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
12341 TEST_REQUIRES_X86_AVX;
12342 for (size_t k = 16; k <= 80; k += 8) {
12343 GemmMicrokernelTester()
12344 .mr(2)
12345 .nr(4)
12346 .kr(8)
12347 .sr(1)
12348 .m(2)
12349 .n(4)
12350 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012351 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012352 }
12353 }
12354
12355 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_strided_a) {
12356 TEST_REQUIRES_X86_AVX;
12357 for (size_t k = 16; k <= 80; k += 8) {
12358 GemmMicrokernelTester()
12359 .mr(2)
12360 .nr(4)
12361 .kr(8)
12362 .sr(1)
12363 .m(2)
12364 .n(4)
12365 .k(k)
12366 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012367 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012368 }
12369 }
12370
12371 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
12372 TEST_REQUIRES_X86_AVX;
12373 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012374 for (uint32_t n = 1; n <= 4; n++) {
12375 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012376 GemmMicrokernelTester()
12377 .mr(2)
12378 .nr(4)
12379 .kr(8)
12380 .sr(1)
12381 .m(m)
12382 .n(n)
12383 .k(k)
12384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012385 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012386 }
12387 }
12388 }
12389 }
12390
12391 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
12392 TEST_REQUIRES_X86_AVX;
12393 for (uint32_t n = 5; n < 8; n++) {
12394 for (size_t k = 1; k <= 40; k += 9) {
12395 GemmMicrokernelTester()
12396 .mr(2)
12397 .nr(4)
12398 .kr(8)
12399 .sr(1)
12400 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012401 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012402 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012403 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012404 }
12405 }
12406 }
12407
12408 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
12409 TEST_REQUIRES_X86_AVX;
12410 for (uint32_t n = 5; n < 8; n++) {
12411 for (size_t k = 1; k <= 40; k += 9) {
12412 GemmMicrokernelTester()
12413 .mr(2)
12414 .nr(4)
12415 .kr(8)
12416 .sr(1)
12417 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012418 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012419 .k(k)
12420 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012421 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012422 }
12423 }
12424 }
12425
12426 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_a) {
12427 TEST_REQUIRES_X86_AVX;
12428 for (uint32_t n = 5; n < 8; n++) {
12429 for (size_t k = 1; k <= 40; k += 9) {
12430 GemmMicrokernelTester()
12431 .mr(2)
12432 .nr(4)
12433 .kr(8)
12434 .sr(1)
12435 .m(2)
12436 .n(n)
12437 .k(k)
12438 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012439 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012440 }
12441 }
12442 }
12443
12444 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
12445 TEST_REQUIRES_X86_AVX;
12446 for (uint32_t n = 5; n < 8; n++) {
12447 for (size_t k = 1; k <= 40; k += 9) {
12448 for (uint32_t m = 1; m <= 2; m++) {
12449 GemmMicrokernelTester()
12450 .mr(2)
12451 .nr(4)
12452 .kr(8)
12453 .sr(1)
12454 .m(m)
12455 .n(n)
12456 .k(k)
12457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012458 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012459 }
12460 }
12461 }
12462 }
12463
12464 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
12465 TEST_REQUIRES_X86_AVX;
12466 for (uint32_t n = 8; n <= 12; n += 4) {
12467 for (size_t k = 1; k <= 40; k += 9) {
12468 GemmMicrokernelTester()
12469 .mr(2)
12470 .nr(4)
12471 .kr(8)
12472 .sr(1)
12473 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012474 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012476 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012477 }
12478 }
12479 }
12480
12481 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
12482 TEST_REQUIRES_X86_AVX;
12483 for (uint32_t n = 8; n <= 12; n += 4) {
12484 for (size_t k = 1; k <= 40; k += 9) {
12485 GemmMicrokernelTester()
12486 .mr(2)
12487 .nr(4)
12488 .kr(8)
12489 .sr(1)
12490 .m(2)
12491 .n(n)
12492 .k(k)
12493 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012494 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012495 }
12496 }
12497 }
12498
12499 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_a) {
12500 TEST_REQUIRES_X86_AVX;
12501 for (uint32_t n = 8; n <= 12; n += 4) {
12502 for (size_t k = 1; k <= 40; k += 9) {
12503 GemmMicrokernelTester()
12504 .mr(2)
12505 .nr(4)
12506 .kr(8)
12507 .sr(1)
12508 .m(2)
12509 .n(n)
12510 .k(k)
12511 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012512 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012513 }
12514 }
12515 }
12516
12517 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
12518 TEST_REQUIRES_X86_AVX;
12519 for (uint32_t n = 8; n <= 12; n += 4) {
12520 for (size_t k = 1; k <= 40; k += 9) {
12521 for (uint32_t m = 1; m <= 2; m++) {
12522 GemmMicrokernelTester()
12523 .mr(2)
12524 .nr(4)
12525 .kr(8)
12526 .sr(1)
12527 .m(m)
12528 .n(n)
12529 .k(k)
12530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012531 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012532 }
12533 }
12534 }
12535 }
12536
12537 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
12538 TEST_REQUIRES_X86_AVX;
12539 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012540 for (uint32_t n = 1; n <= 4; n++) {
12541 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012542 GemmMicrokernelTester()
12543 .mr(2)
12544 .nr(4)
12545 .kr(8)
12546 .sr(1)
12547 .m(m)
12548 .n(n)
12549 .k(k)
12550 .cm_stride(7)
12551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012553 }
12554 }
12555 }
12556 }
12557
12558 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
12559 TEST_REQUIRES_X86_AVX;
12560 GemmMicrokernelTester()
12561 .mr(2)
12562 .nr(4)
12563 .kr(8)
12564 .sr(1)
12565 .m(2)
12566 .n(4)
12567 .k(8)
12568 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012569 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012570 }
12571
12572 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
12573 TEST_REQUIRES_X86_AVX;
12574 GemmMicrokernelTester()
12575 .mr(2)
12576 .nr(4)
12577 .kr(8)
12578 .sr(1)
12579 .m(2)
12580 .n(4)
12581 .k(8)
12582 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012583 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012584 }
12585
12586 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
12587 TEST_REQUIRES_X86_AVX;
12588 GemmMicrokernelTester()
12589 .mr(2)
12590 .nr(4)
12591 .kr(8)
12592 .sr(1)
12593 .m(2)
12594 .n(4)
12595 .k(8)
12596 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012597 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012598 }
12599
12600 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_a_zero_point) {
12601 TEST_REQUIRES_X86_AVX;
12602 for (size_t k = 1; k <= 40; k += 9) {
12603 GemmMicrokernelTester()
12604 .mr(2)
12605 .nr(4)
12606 .kr(8)
12607 .sr(1)
12608 .m(2)
12609 .n(4)
12610 .k(k)
12611 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012612 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012613 }
12614 }
12615
12616 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_b_zero_point) {
12617 TEST_REQUIRES_X86_AVX;
12618 for (size_t k = 1; k <= 40; k += 9) {
12619 GemmMicrokernelTester()
12620 .mr(2)
12621 .nr(4)
12622 .kr(8)
12623 .sr(1)
12624 .m(2)
12625 .n(4)
12626 .k(k)
12627 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012628 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012629 }
12630 }
12631
12632 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_zero_point) {
12633 TEST_REQUIRES_X86_AVX;
12634 for (size_t k = 1; k <= 40; k += 9) {
12635 GemmMicrokernelTester()
12636 .mr(2)
12637 .nr(4)
12638 .kr(8)
12639 .sr(1)
12640 .m(2)
12641 .n(4)
12642 .k(k)
12643 .a_zero_point(0)
12644 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012645 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012646 }
12647 }
12648#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12649
12650
12651#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12652 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
12653 TEST_REQUIRES_X86_AVX;
12654 GemmMicrokernelTester()
12655 .mr(3)
12656 .nr(4)
12657 .kr(8)
12658 .sr(1)
12659 .m(3)
12660 .n(4)
12661 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012662 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012663 }
12664
12665 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
12666 TEST_REQUIRES_X86_AVX;
12667 GemmMicrokernelTester()
12668 .mr(3)
12669 .nr(4)
12670 .kr(8)
12671 .sr(1)
12672 .m(3)
12673 .n(4)
12674 .k(8)
12675 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012676 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012677 }
12678
12679 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_strided_a) {
12680 TEST_REQUIRES_X86_AVX;
12681 GemmMicrokernelTester()
12682 .mr(3)
12683 .nr(4)
12684 .kr(8)
12685 .sr(1)
12686 .m(3)
12687 .n(4)
12688 .k(8)
12689 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012690 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012691 }
12692
12693 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
12694 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012695 for (uint32_t n = 1; n <= 4; n++) {
12696 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012697 GemmMicrokernelTester()
12698 .mr(3)
12699 .nr(4)
12700 .kr(8)
12701 .sr(1)
12702 .m(m)
12703 .n(n)
12704 .k(8)
12705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012706 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012707 }
12708 }
12709 }
12710
12711 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
12712 TEST_REQUIRES_X86_AVX;
12713 for (uint32_t m = 1; m <= 3; m++) {
12714 GemmMicrokernelTester()
12715 .mr(3)
12716 .nr(4)
12717 .kr(8)
12718 .sr(1)
12719 .m(m)
12720 .n(4)
12721 .k(8)
12722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012723 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012724 }
12725 }
12726
12727 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
12728 TEST_REQUIRES_X86_AVX;
12729 for (uint32_t n = 1; n <= 4; n++) {
12730 GemmMicrokernelTester()
12731 .mr(3)
12732 .nr(4)
12733 .kr(8)
12734 .sr(1)
12735 .m(3)
12736 .n(n)
12737 .k(8)
12738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012739 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012740 }
12741 }
12742
12743 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
12744 TEST_REQUIRES_X86_AVX;
12745 for (size_t k = 1; k < 8; k++) {
12746 GemmMicrokernelTester()
12747 .mr(3)
12748 .nr(4)
12749 .kr(8)
12750 .sr(1)
12751 .m(3)
12752 .n(4)
12753 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012754 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012755 }
12756 }
12757
12758 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_strided_a) {
12759 TEST_REQUIRES_X86_AVX;
12760 for (size_t k = 1; k < 8; k++) {
12761 GemmMicrokernelTester()
12762 .mr(3)
12763 .nr(4)
12764 .kr(8)
12765 .sr(1)
12766 .m(3)
12767 .n(4)
12768 .k(k)
12769 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012770 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012771 }
12772 }
12773
12774 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
12775 TEST_REQUIRES_X86_AVX;
12776 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012777 for (uint32_t n = 1; n <= 4; n++) {
12778 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012779 GemmMicrokernelTester()
12780 .mr(3)
12781 .nr(4)
12782 .kr(8)
12783 .sr(1)
12784 .m(m)
12785 .n(n)
12786 .k(k)
12787 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012788 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012789 }
12790 }
12791 }
12792 }
12793
12794 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
12795 TEST_REQUIRES_X86_AVX;
12796 for (size_t k = 9; k < 16; k++) {
12797 GemmMicrokernelTester()
12798 .mr(3)
12799 .nr(4)
12800 .kr(8)
12801 .sr(1)
12802 .m(3)
12803 .n(4)
12804 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012805 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012806 }
12807 }
12808
12809 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_strided_a) {
12810 TEST_REQUIRES_X86_AVX;
12811 for (size_t k = 9; k < 16; k++) {
12812 GemmMicrokernelTester()
12813 .mr(3)
12814 .nr(4)
12815 .kr(8)
12816 .sr(1)
12817 .m(3)
12818 .n(4)
12819 .k(k)
12820 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012821 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012822 }
12823 }
12824
12825 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
12826 TEST_REQUIRES_X86_AVX;
12827 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012828 for (uint32_t n = 1; n <= 4; n++) {
12829 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012830 GemmMicrokernelTester()
12831 .mr(3)
12832 .nr(4)
12833 .kr(8)
12834 .sr(1)
12835 .m(m)
12836 .n(n)
12837 .k(k)
12838 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012839 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012840 }
12841 }
12842 }
12843 }
12844
12845 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
12846 TEST_REQUIRES_X86_AVX;
12847 for (size_t k = 16; k <= 80; k += 8) {
12848 GemmMicrokernelTester()
12849 .mr(3)
12850 .nr(4)
12851 .kr(8)
12852 .sr(1)
12853 .m(3)
12854 .n(4)
12855 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012856 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012857 }
12858 }
12859
12860 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_strided_a) {
12861 TEST_REQUIRES_X86_AVX;
12862 for (size_t k = 16; k <= 80; k += 8) {
12863 GemmMicrokernelTester()
12864 .mr(3)
12865 .nr(4)
12866 .kr(8)
12867 .sr(1)
12868 .m(3)
12869 .n(4)
12870 .k(k)
12871 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012872 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012873 }
12874 }
12875
12876 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
12877 TEST_REQUIRES_X86_AVX;
12878 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012879 for (uint32_t n = 1; n <= 4; n++) {
12880 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012881 GemmMicrokernelTester()
12882 .mr(3)
12883 .nr(4)
12884 .kr(8)
12885 .sr(1)
12886 .m(m)
12887 .n(n)
12888 .k(k)
12889 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012890 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012891 }
12892 }
12893 }
12894 }
12895
12896 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
12897 TEST_REQUIRES_X86_AVX;
12898 for (uint32_t n = 5; n < 8; n++) {
12899 for (size_t k = 1; k <= 40; k += 9) {
12900 GemmMicrokernelTester()
12901 .mr(3)
12902 .nr(4)
12903 .kr(8)
12904 .sr(1)
12905 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012906 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012907 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012908 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012909 }
12910 }
12911 }
12912
12913 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
12914 TEST_REQUIRES_X86_AVX;
12915 for (uint32_t n = 5; n < 8; n++) {
12916 for (size_t k = 1; k <= 40; k += 9) {
12917 GemmMicrokernelTester()
12918 .mr(3)
12919 .nr(4)
12920 .kr(8)
12921 .sr(1)
12922 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012923 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012924 .k(k)
12925 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012926 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012927 }
12928 }
12929 }
12930
12931 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_a) {
12932 TEST_REQUIRES_X86_AVX;
12933 for (uint32_t n = 5; n < 8; n++) {
12934 for (size_t k = 1; k <= 40; k += 9) {
12935 GemmMicrokernelTester()
12936 .mr(3)
12937 .nr(4)
12938 .kr(8)
12939 .sr(1)
12940 .m(3)
12941 .n(n)
12942 .k(k)
12943 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012944 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012945 }
12946 }
12947 }
12948
12949 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
12950 TEST_REQUIRES_X86_AVX;
12951 for (uint32_t n = 5; n < 8; n++) {
12952 for (size_t k = 1; k <= 40; k += 9) {
12953 for (uint32_t m = 1; m <= 3; m++) {
12954 GemmMicrokernelTester()
12955 .mr(3)
12956 .nr(4)
12957 .kr(8)
12958 .sr(1)
12959 .m(m)
12960 .n(n)
12961 .k(k)
12962 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012963 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012964 }
12965 }
12966 }
12967 }
12968
12969 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
12970 TEST_REQUIRES_X86_AVX;
12971 for (uint32_t n = 8; n <= 12; n += 4) {
12972 for (size_t k = 1; k <= 40; k += 9) {
12973 GemmMicrokernelTester()
12974 .mr(3)
12975 .nr(4)
12976 .kr(8)
12977 .sr(1)
12978 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012979 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012980 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012981 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012982 }
12983 }
12984 }
12985
12986 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
12987 TEST_REQUIRES_X86_AVX;
12988 for (uint32_t n = 8; n <= 12; n += 4) {
12989 for (size_t k = 1; k <= 40; k += 9) {
12990 GemmMicrokernelTester()
12991 .mr(3)
12992 .nr(4)
12993 .kr(8)
12994 .sr(1)
12995 .m(3)
12996 .n(n)
12997 .k(k)
12998 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012999 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013000 }
13001 }
13002 }
13003
13004 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_a) {
13005 TEST_REQUIRES_X86_AVX;
13006 for (uint32_t n = 8; n <= 12; n += 4) {
13007 for (size_t k = 1; k <= 40; k += 9) {
13008 GemmMicrokernelTester()
13009 .mr(3)
13010 .nr(4)
13011 .kr(8)
13012 .sr(1)
13013 .m(3)
13014 .n(n)
13015 .k(k)
13016 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013018 }
13019 }
13020 }
13021
13022 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
13023 TEST_REQUIRES_X86_AVX;
13024 for (uint32_t n = 8; n <= 12; n += 4) {
13025 for (size_t k = 1; k <= 40; k += 9) {
13026 for (uint32_t m = 1; m <= 3; m++) {
13027 GemmMicrokernelTester()
13028 .mr(3)
13029 .nr(4)
13030 .kr(8)
13031 .sr(1)
13032 .m(m)
13033 .n(n)
13034 .k(k)
13035 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013036 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013037 }
13038 }
13039 }
13040 }
13041
13042 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
13043 TEST_REQUIRES_X86_AVX;
13044 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013045 for (uint32_t n = 1; n <= 4; n++) {
13046 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013047 GemmMicrokernelTester()
13048 .mr(3)
13049 .nr(4)
13050 .kr(8)
13051 .sr(1)
13052 .m(m)
13053 .n(n)
13054 .k(k)
13055 .cm_stride(7)
13056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013058 }
13059 }
13060 }
13061 }
13062
13063 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
13064 TEST_REQUIRES_X86_AVX;
13065 GemmMicrokernelTester()
13066 .mr(3)
13067 .nr(4)
13068 .kr(8)
13069 .sr(1)
13070 .m(3)
13071 .n(4)
13072 .k(8)
13073 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013074 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013075 }
13076
13077 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
13078 TEST_REQUIRES_X86_AVX;
13079 GemmMicrokernelTester()
13080 .mr(3)
13081 .nr(4)
13082 .kr(8)
13083 .sr(1)
13084 .m(3)
13085 .n(4)
13086 .k(8)
13087 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013088 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013089 }
13090
13091 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
13092 TEST_REQUIRES_X86_AVX;
13093 GemmMicrokernelTester()
13094 .mr(3)
13095 .nr(4)
13096 .kr(8)
13097 .sr(1)
13098 .m(3)
13099 .n(4)
13100 .k(8)
13101 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013103 }
13104
13105 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_a_zero_point) {
13106 TEST_REQUIRES_X86_AVX;
13107 for (size_t k = 1; k <= 40; k += 9) {
13108 GemmMicrokernelTester()
13109 .mr(3)
13110 .nr(4)
13111 .kr(8)
13112 .sr(1)
13113 .m(3)
13114 .n(4)
13115 .k(k)
13116 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013117 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013118 }
13119 }
13120
13121 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_b_zero_point) {
13122 TEST_REQUIRES_X86_AVX;
13123 for (size_t k = 1; k <= 40; k += 9) {
13124 GemmMicrokernelTester()
13125 .mr(3)
13126 .nr(4)
13127 .kr(8)
13128 .sr(1)
13129 .m(3)
13130 .n(4)
13131 .k(k)
13132 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013133 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013134 }
13135 }
13136
13137 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_zero_point) {
13138 TEST_REQUIRES_X86_AVX;
13139 for (size_t k = 1; k <= 40; k += 9) {
13140 GemmMicrokernelTester()
13141 .mr(3)
13142 .nr(4)
13143 .kr(8)
13144 .sr(1)
13145 .m(3)
13146 .n(4)
13147 .k(k)
13148 .a_zero_point(0)
13149 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013150 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013151 }
13152 }
13153#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13154
13155
13156#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13157 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
13158 TEST_REQUIRES_X86_XOP;
13159 GemmMicrokernelTester()
13160 .mr(2)
13161 .nr(4)
13162 .kr(8)
13163 .sr(1)
13164 .m(2)
13165 .n(4)
13166 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013168 }
13169
13170 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
13171 TEST_REQUIRES_X86_XOP;
13172 GemmMicrokernelTester()
13173 .mr(2)
13174 .nr(4)
13175 .kr(8)
13176 .sr(1)
13177 .m(2)
13178 .n(4)
13179 .k(8)
13180 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013181 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013182 }
13183
13184 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_strided_a) {
13185 TEST_REQUIRES_X86_XOP;
13186 GemmMicrokernelTester()
13187 .mr(2)
13188 .nr(4)
13189 .kr(8)
13190 .sr(1)
13191 .m(2)
13192 .n(4)
13193 .k(8)
13194 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013195 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013196 }
13197
13198 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
13199 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013200 for (uint32_t n = 1; n <= 4; n++) {
13201 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013202 GemmMicrokernelTester()
13203 .mr(2)
13204 .nr(4)
13205 .kr(8)
13206 .sr(1)
13207 .m(m)
13208 .n(n)
13209 .k(8)
13210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013211 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013212 }
13213 }
13214 }
13215
13216 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
13217 TEST_REQUIRES_X86_XOP;
13218 for (uint32_t m = 1; m <= 2; m++) {
13219 GemmMicrokernelTester()
13220 .mr(2)
13221 .nr(4)
13222 .kr(8)
13223 .sr(1)
13224 .m(m)
13225 .n(4)
13226 .k(8)
13227 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013228 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013229 }
13230 }
13231
13232 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
13233 TEST_REQUIRES_X86_XOP;
13234 for (uint32_t n = 1; n <= 4; n++) {
13235 GemmMicrokernelTester()
13236 .mr(2)
13237 .nr(4)
13238 .kr(8)
13239 .sr(1)
13240 .m(2)
13241 .n(n)
13242 .k(8)
13243 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013244 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013245 }
13246 }
13247
13248 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
13249 TEST_REQUIRES_X86_XOP;
13250 for (size_t k = 1; k < 8; k++) {
13251 GemmMicrokernelTester()
13252 .mr(2)
13253 .nr(4)
13254 .kr(8)
13255 .sr(1)
13256 .m(2)
13257 .n(4)
13258 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013259 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013260 }
13261 }
13262
13263 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_strided_a) {
13264 TEST_REQUIRES_X86_XOP;
13265 for (size_t k = 1; k < 8; k++) {
13266 GemmMicrokernelTester()
13267 .mr(2)
13268 .nr(4)
13269 .kr(8)
13270 .sr(1)
13271 .m(2)
13272 .n(4)
13273 .k(k)
13274 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013275 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013276 }
13277 }
13278
13279 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
13280 TEST_REQUIRES_X86_XOP;
13281 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013282 for (uint32_t n = 1; n <= 4; n++) {
13283 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013284 GemmMicrokernelTester()
13285 .mr(2)
13286 .nr(4)
13287 .kr(8)
13288 .sr(1)
13289 .m(m)
13290 .n(n)
13291 .k(k)
13292 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013293 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013294 }
13295 }
13296 }
13297 }
13298
13299 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
13300 TEST_REQUIRES_X86_XOP;
13301 for (size_t k = 9; k < 16; k++) {
13302 GemmMicrokernelTester()
13303 .mr(2)
13304 .nr(4)
13305 .kr(8)
13306 .sr(1)
13307 .m(2)
13308 .n(4)
13309 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013310 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013311 }
13312 }
13313
13314 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_strided_a) {
13315 TEST_REQUIRES_X86_XOP;
13316 for (size_t k = 9; k < 16; k++) {
13317 GemmMicrokernelTester()
13318 .mr(2)
13319 .nr(4)
13320 .kr(8)
13321 .sr(1)
13322 .m(2)
13323 .n(4)
13324 .k(k)
13325 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013326 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013327 }
13328 }
13329
13330 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
13331 TEST_REQUIRES_X86_XOP;
13332 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013333 for (uint32_t n = 1; n <= 4; n++) {
13334 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013335 GemmMicrokernelTester()
13336 .mr(2)
13337 .nr(4)
13338 .kr(8)
13339 .sr(1)
13340 .m(m)
13341 .n(n)
13342 .k(k)
13343 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013344 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013345 }
13346 }
13347 }
13348 }
13349
13350 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
13351 TEST_REQUIRES_X86_XOP;
13352 for (size_t k = 16; k <= 80; k += 8) {
13353 GemmMicrokernelTester()
13354 .mr(2)
13355 .nr(4)
13356 .kr(8)
13357 .sr(1)
13358 .m(2)
13359 .n(4)
13360 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013361 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013362 }
13363 }
13364
13365 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_strided_a) {
13366 TEST_REQUIRES_X86_XOP;
13367 for (size_t k = 16; k <= 80; k += 8) {
13368 GemmMicrokernelTester()
13369 .mr(2)
13370 .nr(4)
13371 .kr(8)
13372 .sr(1)
13373 .m(2)
13374 .n(4)
13375 .k(k)
13376 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013377 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013378 }
13379 }
13380
13381 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
13382 TEST_REQUIRES_X86_XOP;
13383 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013384 for (uint32_t n = 1; n <= 4; n++) {
13385 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013386 GemmMicrokernelTester()
13387 .mr(2)
13388 .nr(4)
13389 .kr(8)
13390 .sr(1)
13391 .m(m)
13392 .n(n)
13393 .k(k)
13394 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013395 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013396 }
13397 }
13398 }
13399 }
13400
13401 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
13402 TEST_REQUIRES_X86_XOP;
13403 for (uint32_t n = 5; n < 8; n++) {
13404 for (size_t k = 1; k <= 40; k += 9) {
13405 GemmMicrokernelTester()
13406 .mr(2)
13407 .nr(4)
13408 .kr(8)
13409 .sr(1)
13410 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013411 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013412 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013413 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013414 }
13415 }
13416 }
13417
13418 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
13419 TEST_REQUIRES_X86_XOP;
13420 for (uint32_t n = 5; n < 8; n++) {
13421 for (size_t k = 1; k <= 40; k += 9) {
13422 GemmMicrokernelTester()
13423 .mr(2)
13424 .nr(4)
13425 .kr(8)
13426 .sr(1)
13427 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013428 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013429 .k(k)
13430 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013431 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013432 }
13433 }
13434 }
13435
13436 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_a) {
13437 TEST_REQUIRES_X86_XOP;
13438 for (uint32_t n = 5; n < 8; n++) {
13439 for (size_t k = 1; k <= 40; k += 9) {
13440 GemmMicrokernelTester()
13441 .mr(2)
13442 .nr(4)
13443 .kr(8)
13444 .sr(1)
13445 .m(2)
13446 .n(n)
13447 .k(k)
13448 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013449 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013450 }
13451 }
13452 }
13453
13454 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
13455 TEST_REQUIRES_X86_XOP;
13456 for (uint32_t n = 5; n < 8; n++) {
13457 for (size_t k = 1; k <= 40; k += 9) {
13458 for (uint32_t m = 1; m <= 2; m++) {
13459 GemmMicrokernelTester()
13460 .mr(2)
13461 .nr(4)
13462 .kr(8)
13463 .sr(1)
13464 .m(m)
13465 .n(n)
13466 .k(k)
13467 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013468 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013469 }
13470 }
13471 }
13472 }
13473
13474 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
13475 TEST_REQUIRES_X86_XOP;
13476 for (uint32_t n = 8; n <= 12; n += 4) {
13477 for (size_t k = 1; k <= 40; k += 9) {
13478 GemmMicrokernelTester()
13479 .mr(2)
13480 .nr(4)
13481 .kr(8)
13482 .sr(1)
13483 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013484 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013485 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013486 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013487 }
13488 }
13489 }
13490
13491 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
13492 TEST_REQUIRES_X86_XOP;
13493 for (uint32_t n = 8; n <= 12; n += 4) {
13494 for (size_t k = 1; k <= 40; k += 9) {
13495 GemmMicrokernelTester()
13496 .mr(2)
13497 .nr(4)
13498 .kr(8)
13499 .sr(1)
13500 .m(2)
13501 .n(n)
13502 .k(k)
13503 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013504 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013505 }
13506 }
13507 }
13508
13509 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_a) {
13510 TEST_REQUIRES_X86_XOP;
13511 for (uint32_t n = 8; n <= 12; n += 4) {
13512 for (size_t k = 1; k <= 40; k += 9) {
13513 GemmMicrokernelTester()
13514 .mr(2)
13515 .nr(4)
13516 .kr(8)
13517 .sr(1)
13518 .m(2)
13519 .n(n)
13520 .k(k)
13521 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013522 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013523 }
13524 }
13525 }
13526
13527 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
13528 TEST_REQUIRES_X86_XOP;
13529 for (uint32_t n = 8; n <= 12; n += 4) {
13530 for (size_t k = 1; k <= 40; k += 9) {
13531 for (uint32_t m = 1; m <= 2; m++) {
13532 GemmMicrokernelTester()
13533 .mr(2)
13534 .nr(4)
13535 .kr(8)
13536 .sr(1)
13537 .m(m)
13538 .n(n)
13539 .k(k)
13540 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013541 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013542 }
13543 }
13544 }
13545 }
13546
13547 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
13548 TEST_REQUIRES_X86_XOP;
13549 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013550 for (uint32_t n = 1; n <= 4; n++) {
13551 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013552 GemmMicrokernelTester()
13553 .mr(2)
13554 .nr(4)
13555 .kr(8)
13556 .sr(1)
13557 .m(m)
13558 .n(n)
13559 .k(k)
13560 .cm_stride(7)
13561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013563 }
13564 }
13565 }
13566 }
13567
13568 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
13569 TEST_REQUIRES_X86_XOP;
13570 GemmMicrokernelTester()
13571 .mr(2)
13572 .nr(4)
13573 .kr(8)
13574 .sr(1)
13575 .m(2)
13576 .n(4)
13577 .k(8)
13578 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013579 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013580 }
13581
13582 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
13583 TEST_REQUIRES_X86_XOP;
13584 GemmMicrokernelTester()
13585 .mr(2)
13586 .nr(4)
13587 .kr(8)
13588 .sr(1)
13589 .m(2)
13590 .n(4)
13591 .k(8)
13592 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013593 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013594 }
13595
13596 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
13597 TEST_REQUIRES_X86_XOP;
13598 GemmMicrokernelTester()
13599 .mr(2)
13600 .nr(4)
13601 .kr(8)
13602 .sr(1)
13603 .m(2)
13604 .n(4)
13605 .k(8)
13606 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013607 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013608 }
13609
13610 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_a_zero_point) {
13611 TEST_REQUIRES_X86_XOP;
13612 for (size_t k = 1; k <= 40; k += 9) {
13613 GemmMicrokernelTester()
13614 .mr(2)
13615 .nr(4)
13616 .kr(8)
13617 .sr(1)
13618 .m(2)
13619 .n(4)
13620 .k(k)
13621 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013622 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013623 }
13624 }
13625
13626 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_b_zero_point) {
13627 TEST_REQUIRES_X86_XOP;
13628 for (size_t k = 1; k <= 40; k += 9) {
13629 GemmMicrokernelTester()
13630 .mr(2)
13631 .nr(4)
13632 .kr(8)
13633 .sr(1)
13634 .m(2)
13635 .n(4)
13636 .k(k)
13637 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013638 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013639 }
13640 }
13641
13642 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_zero_point) {
13643 TEST_REQUIRES_X86_XOP;
13644 for (size_t k = 1; k <= 40; k += 9) {
13645 GemmMicrokernelTester()
13646 .mr(2)
13647 .nr(4)
13648 .kr(8)
13649 .sr(1)
13650 .m(2)
13651 .n(4)
13652 .k(k)
13653 .a_zero_point(0)
13654 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013655 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013656 }
13657 }
13658#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13659
13660
13661#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13662 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
13663 TEST_REQUIRES_X86_XOP;
13664 GemmMicrokernelTester()
13665 .mr(3)
13666 .nr(4)
13667 .kr(8)
13668 .sr(1)
13669 .m(3)
13670 .n(4)
13671 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013672 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013673 }
13674
13675 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
13676 TEST_REQUIRES_X86_XOP;
13677 GemmMicrokernelTester()
13678 .mr(3)
13679 .nr(4)
13680 .kr(8)
13681 .sr(1)
13682 .m(3)
13683 .n(4)
13684 .k(8)
13685 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013686 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013687 }
13688
13689 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_strided_a) {
13690 TEST_REQUIRES_X86_XOP;
13691 GemmMicrokernelTester()
13692 .mr(3)
13693 .nr(4)
13694 .kr(8)
13695 .sr(1)
13696 .m(3)
13697 .n(4)
13698 .k(8)
13699 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013700 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013701 }
13702
13703 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
13704 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013705 for (uint32_t n = 1; n <= 4; n++) {
13706 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013707 GemmMicrokernelTester()
13708 .mr(3)
13709 .nr(4)
13710 .kr(8)
13711 .sr(1)
13712 .m(m)
13713 .n(n)
13714 .k(8)
13715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013716 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013717 }
13718 }
13719 }
13720
13721 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
13722 TEST_REQUIRES_X86_XOP;
13723 for (uint32_t m = 1; m <= 3; m++) {
13724 GemmMicrokernelTester()
13725 .mr(3)
13726 .nr(4)
13727 .kr(8)
13728 .sr(1)
13729 .m(m)
13730 .n(4)
13731 .k(8)
13732 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013733 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013734 }
13735 }
13736
13737 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
13738 TEST_REQUIRES_X86_XOP;
13739 for (uint32_t n = 1; n <= 4; n++) {
13740 GemmMicrokernelTester()
13741 .mr(3)
13742 .nr(4)
13743 .kr(8)
13744 .sr(1)
13745 .m(3)
13746 .n(n)
13747 .k(8)
13748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013749 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013750 }
13751 }
13752
13753 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
13754 TEST_REQUIRES_X86_XOP;
13755 for (size_t k = 1; k < 8; k++) {
13756 GemmMicrokernelTester()
13757 .mr(3)
13758 .nr(4)
13759 .kr(8)
13760 .sr(1)
13761 .m(3)
13762 .n(4)
13763 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013764 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013765 }
13766 }
13767
13768 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_strided_a) {
13769 TEST_REQUIRES_X86_XOP;
13770 for (size_t k = 1; k < 8; k++) {
13771 GemmMicrokernelTester()
13772 .mr(3)
13773 .nr(4)
13774 .kr(8)
13775 .sr(1)
13776 .m(3)
13777 .n(4)
13778 .k(k)
13779 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013780 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013781 }
13782 }
13783
13784 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
13785 TEST_REQUIRES_X86_XOP;
13786 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013787 for (uint32_t n = 1; n <= 4; n++) {
13788 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013789 GemmMicrokernelTester()
13790 .mr(3)
13791 .nr(4)
13792 .kr(8)
13793 .sr(1)
13794 .m(m)
13795 .n(n)
13796 .k(k)
13797 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013798 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013799 }
13800 }
13801 }
13802 }
13803
13804 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
13805 TEST_REQUIRES_X86_XOP;
13806 for (size_t k = 9; k < 16; k++) {
13807 GemmMicrokernelTester()
13808 .mr(3)
13809 .nr(4)
13810 .kr(8)
13811 .sr(1)
13812 .m(3)
13813 .n(4)
13814 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013815 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013816 }
13817 }
13818
13819 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_strided_a) {
13820 TEST_REQUIRES_X86_XOP;
13821 for (size_t k = 9; k < 16; k++) {
13822 GemmMicrokernelTester()
13823 .mr(3)
13824 .nr(4)
13825 .kr(8)
13826 .sr(1)
13827 .m(3)
13828 .n(4)
13829 .k(k)
13830 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013831 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013832 }
13833 }
13834
13835 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
13836 TEST_REQUIRES_X86_XOP;
13837 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013838 for (uint32_t n = 1; n <= 4; n++) {
13839 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013840 GemmMicrokernelTester()
13841 .mr(3)
13842 .nr(4)
13843 .kr(8)
13844 .sr(1)
13845 .m(m)
13846 .n(n)
13847 .k(k)
13848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013849 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013850 }
13851 }
13852 }
13853 }
13854
13855 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
13856 TEST_REQUIRES_X86_XOP;
13857 for (size_t k = 16; k <= 80; k += 8) {
13858 GemmMicrokernelTester()
13859 .mr(3)
13860 .nr(4)
13861 .kr(8)
13862 .sr(1)
13863 .m(3)
13864 .n(4)
13865 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013866 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013867 }
13868 }
13869
13870 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_strided_a) {
13871 TEST_REQUIRES_X86_XOP;
13872 for (size_t k = 16; k <= 80; k += 8) {
13873 GemmMicrokernelTester()
13874 .mr(3)
13875 .nr(4)
13876 .kr(8)
13877 .sr(1)
13878 .m(3)
13879 .n(4)
13880 .k(k)
13881 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013882 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013883 }
13884 }
13885
13886 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
13887 TEST_REQUIRES_X86_XOP;
13888 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013889 for (uint32_t n = 1; n <= 4; n++) {
13890 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013891 GemmMicrokernelTester()
13892 .mr(3)
13893 .nr(4)
13894 .kr(8)
13895 .sr(1)
13896 .m(m)
13897 .n(n)
13898 .k(k)
13899 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013900 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013901 }
13902 }
13903 }
13904 }
13905
13906 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
13907 TEST_REQUIRES_X86_XOP;
13908 for (uint32_t n = 5; n < 8; n++) {
13909 for (size_t k = 1; k <= 40; k += 9) {
13910 GemmMicrokernelTester()
13911 .mr(3)
13912 .nr(4)
13913 .kr(8)
13914 .sr(1)
13915 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013916 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013917 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013918 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013919 }
13920 }
13921 }
13922
13923 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
13924 TEST_REQUIRES_X86_XOP;
13925 for (uint32_t n = 5; n < 8; n++) {
13926 for (size_t k = 1; k <= 40; k += 9) {
13927 GemmMicrokernelTester()
13928 .mr(3)
13929 .nr(4)
13930 .kr(8)
13931 .sr(1)
13932 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013933 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013934 .k(k)
13935 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013936 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013937 }
13938 }
13939 }
13940
13941 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_a) {
13942 TEST_REQUIRES_X86_XOP;
13943 for (uint32_t n = 5; n < 8; n++) {
13944 for (size_t k = 1; k <= 40; k += 9) {
13945 GemmMicrokernelTester()
13946 .mr(3)
13947 .nr(4)
13948 .kr(8)
13949 .sr(1)
13950 .m(3)
13951 .n(n)
13952 .k(k)
13953 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013954 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013955 }
13956 }
13957 }
13958
13959 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
13960 TEST_REQUIRES_X86_XOP;
13961 for (uint32_t n = 5; n < 8; n++) {
13962 for (size_t k = 1; k <= 40; k += 9) {
13963 for (uint32_t m = 1; m <= 3; m++) {
13964 GemmMicrokernelTester()
13965 .mr(3)
13966 .nr(4)
13967 .kr(8)
13968 .sr(1)
13969 .m(m)
13970 .n(n)
13971 .k(k)
13972 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013973 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013974 }
13975 }
13976 }
13977 }
13978
13979 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
13980 TEST_REQUIRES_X86_XOP;
13981 for (uint32_t n = 8; n <= 12; n += 4) {
13982 for (size_t k = 1; k <= 40; k += 9) {
13983 GemmMicrokernelTester()
13984 .mr(3)
13985 .nr(4)
13986 .kr(8)
13987 .sr(1)
13988 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013989 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013990 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013991 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013992 }
13993 }
13994 }
13995
13996 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
13997 TEST_REQUIRES_X86_XOP;
13998 for (uint32_t n = 8; n <= 12; n += 4) {
13999 for (size_t k = 1; k <= 40; k += 9) {
14000 GemmMicrokernelTester()
14001 .mr(3)
14002 .nr(4)
14003 .kr(8)
14004 .sr(1)
14005 .m(3)
14006 .n(n)
14007 .k(k)
14008 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014009 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014010 }
14011 }
14012 }
14013
14014 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_a) {
14015 TEST_REQUIRES_X86_XOP;
14016 for (uint32_t n = 8; n <= 12; n += 4) {
14017 for (size_t k = 1; k <= 40; k += 9) {
14018 GemmMicrokernelTester()
14019 .mr(3)
14020 .nr(4)
14021 .kr(8)
14022 .sr(1)
14023 .m(3)
14024 .n(n)
14025 .k(k)
14026 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014027 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014028 }
14029 }
14030 }
14031
14032 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
14033 TEST_REQUIRES_X86_XOP;
14034 for (uint32_t n = 8; n <= 12; n += 4) {
14035 for (size_t k = 1; k <= 40; k += 9) {
14036 for (uint32_t m = 1; m <= 3; m++) {
14037 GemmMicrokernelTester()
14038 .mr(3)
14039 .nr(4)
14040 .kr(8)
14041 .sr(1)
14042 .m(m)
14043 .n(n)
14044 .k(k)
14045 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014046 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014047 }
14048 }
14049 }
14050 }
14051
14052 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
14053 TEST_REQUIRES_X86_XOP;
14054 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014055 for (uint32_t n = 1; n <= 4; n++) {
14056 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014057 GemmMicrokernelTester()
14058 .mr(3)
14059 .nr(4)
14060 .kr(8)
14061 .sr(1)
14062 .m(m)
14063 .n(n)
14064 .k(k)
14065 .cm_stride(7)
14066 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014068 }
14069 }
14070 }
14071 }
14072
14073 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
14074 TEST_REQUIRES_X86_XOP;
14075 GemmMicrokernelTester()
14076 .mr(3)
14077 .nr(4)
14078 .kr(8)
14079 .sr(1)
14080 .m(3)
14081 .n(4)
14082 .k(8)
14083 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014084 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014085 }
14086
14087 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
14088 TEST_REQUIRES_X86_XOP;
14089 GemmMicrokernelTester()
14090 .mr(3)
14091 .nr(4)
14092 .kr(8)
14093 .sr(1)
14094 .m(3)
14095 .n(4)
14096 .k(8)
14097 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014099 }
14100
14101 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
14102 TEST_REQUIRES_X86_XOP;
14103 GemmMicrokernelTester()
14104 .mr(3)
14105 .nr(4)
14106 .kr(8)
14107 .sr(1)
14108 .m(3)
14109 .n(4)
14110 .k(8)
14111 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014112 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014113 }
14114
14115 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_a_zero_point) {
14116 TEST_REQUIRES_X86_XOP;
14117 for (size_t k = 1; k <= 40; k += 9) {
14118 GemmMicrokernelTester()
14119 .mr(3)
14120 .nr(4)
14121 .kr(8)
14122 .sr(1)
14123 .m(3)
14124 .n(4)
14125 .k(k)
14126 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014127 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014128 }
14129 }
14130
14131 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_b_zero_point) {
14132 TEST_REQUIRES_X86_XOP;
14133 for (size_t k = 1; k <= 40; k += 9) {
14134 GemmMicrokernelTester()
14135 .mr(3)
14136 .nr(4)
14137 .kr(8)
14138 .sr(1)
14139 .m(3)
14140 .n(4)
14141 .k(k)
14142 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014143 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014144 }
14145 }
14146
14147 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_zero_point) {
14148 TEST_REQUIRES_X86_XOP;
14149 for (size_t k = 1; k <= 40; k += 9) {
14150 GemmMicrokernelTester()
14151 .mr(3)
14152 .nr(4)
14153 .kr(8)
14154 .sr(1)
14155 .m(3)
14156 .n(4)
14157 .k(k)
14158 .a_zero_point(0)
14159 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014160 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014161 }
14162 }
14163#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14164
14165
14166#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14167 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
14168 TEST_REQUIRES_X86_SSE2;
14169 GemmMicrokernelTester()
14170 .mr(1)
14171 .nr(4)
14172 .kr(8)
14173 .sr(1)
14174 .m(1)
14175 .n(4)
14176 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014177 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014178 }
14179
14180 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
14181 TEST_REQUIRES_X86_SSE2;
14182 GemmMicrokernelTester()
14183 .mr(1)
14184 .nr(4)
14185 .kr(8)
14186 .sr(1)
14187 .m(1)
14188 .n(4)
14189 .k(8)
14190 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014191 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014192 }
14193
14194 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_strided_a) {
14195 TEST_REQUIRES_X86_SSE2;
14196 GemmMicrokernelTester()
14197 .mr(1)
14198 .nr(4)
14199 .kr(8)
14200 .sr(1)
14201 .m(1)
14202 .n(4)
14203 .k(8)
14204 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014205 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014206 }
14207
14208 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
14209 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014210 for (uint32_t n = 1; n <= 4; n++) {
14211 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014212 GemmMicrokernelTester()
14213 .mr(1)
14214 .nr(4)
14215 .kr(8)
14216 .sr(1)
14217 .m(m)
14218 .n(n)
14219 .k(8)
14220 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014221 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014222 }
14223 }
14224 }
14225
14226 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
14227 TEST_REQUIRES_X86_SSE2;
14228 for (uint32_t m = 1; m <= 1; m++) {
14229 GemmMicrokernelTester()
14230 .mr(1)
14231 .nr(4)
14232 .kr(8)
14233 .sr(1)
14234 .m(m)
14235 .n(4)
14236 .k(8)
14237 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014238 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014239 }
14240 }
14241
14242 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
14243 TEST_REQUIRES_X86_SSE2;
14244 for (uint32_t n = 1; n <= 4; n++) {
14245 GemmMicrokernelTester()
14246 .mr(1)
14247 .nr(4)
14248 .kr(8)
14249 .sr(1)
14250 .m(1)
14251 .n(n)
14252 .k(8)
14253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014254 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014255 }
14256 }
14257
14258 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
14259 TEST_REQUIRES_X86_SSE2;
14260 for (size_t k = 1; k < 8; k++) {
14261 GemmMicrokernelTester()
14262 .mr(1)
14263 .nr(4)
14264 .kr(8)
14265 .sr(1)
14266 .m(1)
14267 .n(4)
14268 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014269 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014270 }
14271 }
14272
14273 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_strided_a) {
14274 TEST_REQUIRES_X86_SSE2;
14275 for (size_t k = 1; k < 8; k++) {
14276 GemmMicrokernelTester()
14277 .mr(1)
14278 .nr(4)
14279 .kr(8)
14280 .sr(1)
14281 .m(1)
14282 .n(4)
14283 .k(k)
14284 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014286 }
14287 }
14288
14289 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
14290 TEST_REQUIRES_X86_SSE2;
14291 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014292 for (uint32_t n = 1; n <= 4; n++) {
14293 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014294 GemmMicrokernelTester()
14295 .mr(1)
14296 .nr(4)
14297 .kr(8)
14298 .sr(1)
14299 .m(m)
14300 .n(n)
14301 .k(k)
14302 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014303 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014304 }
14305 }
14306 }
14307 }
14308
14309 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
14310 TEST_REQUIRES_X86_SSE2;
14311 for (size_t k = 9; k < 16; k++) {
14312 GemmMicrokernelTester()
14313 .mr(1)
14314 .nr(4)
14315 .kr(8)
14316 .sr(1)
14317 .m(1)
14318 .n(4)
14319 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014320 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014321 }
14322 }
14323
14324 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_strided_a) {
14325 TEST_REQUIRES_X86_SSE2;
14326 for (size_t k = 9; k < 16; k++) {
14327 GemmMicrokernelTester()
14328 .mr(1)
14329 .nr(4)
14330 .kr(8)
14331 .sr(1)
14332 .m(1)
14333 .n(4)
14334 .k(k)
14335 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014336 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014337 }
14338 }
14339
14340 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
14341 TEST_REQUIRES_X86_SSE2;
14342 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014343 for (uint32_t n = 1; n <= 4; n++) {
14344 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014345 GemmMicrokernelTester()
14346 .mr(1)
14347 .nr(4)
14348 .kr(8)
14349 .sr(1)
14350 .m(m)
14351 .n(n)
14352 .k(k)
14353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014354 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014355 }
14356 }
14357 }
14358 }
14359
14360 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
14361 TEST_REQUIRES_X86_SSE2;
14362 for (size_t k = 16; k <= 80; k += 8) {
14363 GemmMicrokernelTester()
14364 .mr(1)
14365 .nr(4)
14366 .kr(8)
14367 .sr(1)
14368 .m(1)
14369 .n(4)
14370 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014371 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014372 }
14373 }
14374
14375 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_strided_a) {
14376 TEST_REQUIRES_X86_SSE2;
14377 for (size_t k = 16; k <= 80; k += 8) {
14378 GemmMicrokernelTester()
14379 .mr(1)
14380 .nr(4)
14381 .kr(8)
14382 .sr(1)
14383 .m(1)
14384 .n(4)
14385 .k(k)
14386 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014387 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014388 }
14389 }
14390
14391 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
14392 TEST_REQUIRES_X86_SSE2;
14393 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014394 for (uint32_t n = 1; n <= 4; n++) {
14395 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014396 GemmMicrokernelTester()
14397 .mr(1)
14398 .nr(4)
14399 .kr(8)
14400 .sr(1)
14401 .m(m)
14402 .n(n)
14403 .k(k)
14404 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014405 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014406 }
14407 }
14408 }
14409 }
14410
14411 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
14412 TEST_REQUIRES_X86_SSE2;
14413 for (uint32_t n = 5; n < 8; n++) {
14414 for (size_t k = 1; k <= 40; k += 9) {
14415 GemmMicrokernelTester()
14416 .mr(1)
14417 .nr(4)
14418 .kr(8)
14419 .sr(1)
14420 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014421 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014423 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014424 }
14425 }
14426 }
14427
14428 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
14429 TEST_REQUIRES_X86_SSE2;
14430 for (uint32_t n = 5; n < 8; n++) {
14431 for (size_t k = 1; k <= 40; k += 9) {
14432 GemmMicrokernelTester()
14433 .mr(1)
14434 .nr(4)
14435 .kr(8)
14436 .sr(1)
14437 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014438 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014439 .k(k)
14440 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014441 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014442 }
14443 }
14444 }
14445
14446 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_a) {
14447 TEST_REQUIRES_X86_SSE2;
14448 for (uint32_t n = 5; n < 8; n++) {
14449 for (size_t k = 1; k <= 40; k += 9) {
14450 GemmMicrokernelTester()
14451 .mr(1)
14452 .nr(4)
14453 .kr(8)
14454 .sr(1)
14455 .m(1)
14456 .n(n)
14457 .k(k)
14458 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014459 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014460 }
14461 }
14462 }
14463
14464 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
14465 TEST_REQUIRES_X86_SSE2;
14466 for (uint32_t n = 5; n < 8; n++) {
14467 for (size_t k = 1; k <= 40; k += 9) {
14468 for (uint32_t m = 1; m <= 1; m++) {
14469 GemmMicrokernelTester()
14470 .mr(1)
14471 .nr(4)
14472 .kr(8)
14473 .sr(1)
14474 .m(m)
14475 .n(n)
14476 .k(k)
14477 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014478 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014479 }
14480 }
14481 }
14482 }
14483
14484 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
14485 TEST_REQUIRES_X86_SSE2;
14486 for (uint32_t n = 8; n <= 12; n += 4) {
14487 for (size_t k = 1; k <= 40; k += 9) {
14488 GemmMicrokernelTester()
14489 .mr(1)
14490 .nr(4)
14491 .kr(8)
14492 .sr(1)
14493 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014494 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014495 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014496 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014497 }
14498 }
14499 }
14500
14501 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
14502 TEST_REQUIRES_X86_SSE2;
14503 for (uint32_t n = 8; n <= 12; n += 4) {
14504 for (size_t k = 1; k <= 40; k += 9) {
14505 GemmMicrokernelTester()
14506 .mr(1)
14507 .nr(4)
14508 .kr(8)
14509 .sr(1)
14510 .m(1)
14511 .n(n)
14512 .k(k)
14513 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014515 }
14516 }
14517 }
14518
14519 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_a) {
14520 TEST_REQUIRES_X86_SSE2;
14521 for (uint32_t n = 8; n <= 12; n += 4) {
14522 for (size_t k = 1; k <= 40; k += 9) {
14523 GemmMicrokernelTester()
14524 .mr(1)
14525 .nr(4)
14526 .kr(8)
14527 .sr(1)
14528 .m(1)
14529 .n(n)
14530 .k(k)
14531 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014532 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014533 }
14534 }
14535 }
14536
14537 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
14538 TEST_REQUIRES_X86_SSE2;
14539 for (uint32_t n = 8; n <= 12; n += 4) {
14540 for (size_t k = 1; k <= 40; k += 9) {
14541 for (uint32_t m = 1; m <= 1; m++) {
14542 GemmMicrokernelTester()
14543 .mr(1)
14544 .nr(4)
14545 .kr(8)
14546 .sr(1)
14547 .m(m)
14548 .n(n)
14549 .k(k)
14550 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014551 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014552 }
14553 }
14554 }
14555 }
14556
14557 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
14558 TEST_REQUIRES_X86_SSE2;
14559 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014560 for (uint32_t n = 1; n <= 4; n++) {
14561 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014562 GemmMicrokernelTester()
14563 .mr(1)
14564 .nr(4)
14565 .kr(8)
14566 .sr(1)
14567 .m(m)
14568 .n(n)
14569 .k(k)
14570 .cm_stride(7)
14571 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014572 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014573 }
14574 }
14575 }
14576 }
14577
14578 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
14579 TEST_REQUIRES_X86_SSE2;
14580 GemmMicrokernelTester()
14581 .mr(1)
14582 .nr(4)
14583 .kr(8)
14584 .sr(1)
14585 .m(1)
14586 .n(4)
14587 .k(8)
14588 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014589 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014590 }
14591
14592 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
14593 TEST_REQUIRES_X86_SSE2;
14594 GemmMicrokernelTester()
14595 .mr(1)
14596 .nr(4)
14597 .kr(8)
14598 .sr(1)
14599 .m(1)
14600 .n(4)
14601 .k(8)
14602 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014604 }
14605
14606 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
14607 TEST_REQUIRES_X86_SSE2;
14608 GemmMicrokernelTester()
14609 .mr(1)
14610 .nr(4)
14611 .kr(8)
14612 .sr(1)
14613 .m(1)
14614 .n(4)
14615 .k(8)
14616 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014617 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014618 }
14619
14620 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_a_zero_point) {
14621 TEST_REQUIRES_X86_SSE2;
14622 for (size_t k = 1; k <= 40; k += 9) {
14623 GemmMicrokernelTester()
14624 .mr(1)
14625 .nr(4)
14626 .kr(8)
14627 .sr(1)
14628 .m(1)
14629 .n(4)
14630 .k(k)
14631 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014632 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014633 }
14634 }
14635
14636 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_b_zero_point) {
14637 TEST_REQUIRES_X86_SSE2;
14638 for (size_t k = 1; k <= 40; k += 9) {
14639 GemmMicrokernelTester()
14640 .mr(1)
14641 .nr(4)
14642 .kr(8)
14643 .sr(1)
14644 .m(1)
14645 .n(4)
14646 .k(k)
14647 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014648 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014649 }
14650 }
14651
14652 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_zero_point) {
14653 TEST_REQUIRES_X86_SSE2;
14654 for (size_t k = 1; k <= 40; k += 9) {
14655 GemmMicrokernelTester()
14656 .mr(1)
14657 .nr(4)
14658 .kr(8)
14659 .sr(1)
14660 .m(1)
14661 .n(4)
14662 .k(k)
14663 .a_zero_point(0)
14664 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014665 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014666 }
14667 }
14668#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14669
14670
14671#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14672 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
14673 TEST_REQUIRES_X86_SSE2;
14674 GemmMicrokernelTester()
14675 .mr(2)
14676 .nr(4)
14677 .kr(8)
14678 .sr(1)
14679 .m(2)
14680 .n(4)
14681 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014682 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014683 }
14684
14685 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
14686 TEST_REQUIRES_X86_SSE2;
14687 GemmMicrokernelTester()
14688 .mr(2)
14689 .nr(4)
14690 .kr(8)
14691 .sr(1)
14692 .m(2)
14693 .n(4)
14694 .k(8)
14695 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014696 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014697 }
14698
14699 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_strided_a) {
14700 TEST_REQUIRES_X86_SSE2;
14701 GemmMicrokernelTester()
14702 .mr(2)
14703 .nr(4)
14704 .kr(8)
14705 .sr(1)
14706 .m(2)
14707 .n(4)
14708 .k(8)
14709 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014710 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014711 }
14712
14713 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
14714 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014715 for (uint32_t n = 1; n <= 4; n++) {
14716 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014717 GemmMicrokernelTester()
14718 .mr(2)
14719 .nr(4)
14720 .kr(8)
14721 .sr(1)
14722 .m(m)
14723 .n(n)
14724 .k(8)
14725 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014726 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014727 }
14728 }
14729 }
14730
14731 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
14732 TEST_REQUIRES_X86_SSE2;
14733 for (uint32_t m = 1; m <= 2; m++) {
14734 GemmMicrokernelTester()
14735 .mr(2)
14736 .nr(4)
14737 .kr(8)
14738 .sr(1)
14739 .m(m)
14740 .n(4)
14741 .k(8)
14742 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014743 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014744 }
14745 }
14746
14747 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
14748 TEST_REQUIRES_X86_SSE2;
14749 for (uint32_t n = 1; n <= 4; n++) {
14750 GemmMicrokernelTester()
14751 .mr(2)
14752 .nr(4)
14753 .kr(8)
14754 .sr(1)
14755 .m(2)
14756 .n(n)
14757 .k(8)
14758 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014759 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014760 }
14761 }
14762
14763 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
14764 TEST_REQUIRES_X86_SSE2;
14765 for (size_t k = 1; k < 8; k++) {
14766 GemmMicrokernelTester()
14767 .mr(2)
14768 .nr(4)
14769 .kr(8)
14770 .sr(1)
14771 .m(2)
14772 .n(4)
14773 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014774 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014775 }
14776 }
14777
14778 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_strided_a) {
14779 TEST_REQUIRES_X86_SSE2;
14780 for (size_t k = 1; k < 8; k++) {
14781 GemmMicrokernelTester()
14782 .mr(2)
14783 .nr(4)
14784 .kr(8)
14785 .sr(1)
14786 .m(2)
14787 .n(4)
14788 .k(k)
14789 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014790 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014791 }
14792 }
14793
14794 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
14795 TEST_REQUIRES_X86_SSE2;
14796 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014797 for (uint32_t n = 1; n <= 4; n++) {
14798 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014799 GemmMicrokernelTester()
14800 .mr(2)
14801 .nr(4)
14802 .kr(8)
14803 .sr(1)
14804 .m(m)
14805 .n(n)
14806 .k(k)
14807 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014808 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014809 }
14810 }
14811 }
14812 }
14813
14814 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
14815 TEST_REQUIRES_X86_SSE2;
14816 for (size_t k = 9; k < 16; k++) {
14817 GemmMicrokernelTester()
14818 .mr(2)
14819 .nr(4)
14820 .kr(8)
14821 .sr(1)
14822 .m(2)
14823 .n(4)
14824 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014825 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014826 }
14827 }
14828
14829 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_strided_a) {
14830 TEST_REQUIRES_X86_SSE2;
14831 for (size_t k = 9; k < 16; k++) {
14832 GemmMicrokernelTester()
14833 .mr(2)
14834 .nr(4)
14835 .kr(8)
14836 .sr(1)
14837 .m(2)
14838 .n(4)
14839 .k(k)
14840 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014841 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014842 }
14843 }
14844
14845 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
14846 TEST_REQUIRES_X86_SSE2;
14847 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014848 for (uint32_t n = 1; n <= 4; n++) {
14849 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014850 GemmMicrokernelTester()
14851 .mr(2)
14852 .nr(4)
14853 .kr(8)
14854 .sr(1)
14855 .m(m)
14856 .n(n)
14857 .k(k)
14858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014859 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014860 }
14861 }
14862 }
14863 }
14864
14865 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
14866 TEST_REQUIRES_X86_SSE2;
14867 for (size_t k = 16; k <= 80; k += 8) {
14868 GemmMicrokernelTester()
14869 .mr(2)
14870 .nr(4)
14871 .kr(8)
14872 .sr(1)
14873 .m(2)
14874 .n(4)
14875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014876 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014877 }
14878 }
14879
14880 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_strided_a) {
14881 TEST_REQUIRES_X86_SSE2;
14882 for (size_t k = 16; k <= 80; k += 8) {
14883 GemmMicrokernelTester()
14884 .mr(2)
14885 .nr(4)
14886 .kr(8)
14887 .sr(1)
14888 .m(2)
14889 .n(4)
14890 .k(k)
14891 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014892 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014893 }
14894 }
14895
14896 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
14897 TEST_REQUIRES_X86_SSE2;
14898 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014899 for (uint32_t n = 1; n <= 4; n++) {
14900 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014901 GemmMicrokernelTester()
14902 .mr(2)
14903 .nr(4)
14904 .kr(8)
14905 .sr(1)
14906 .m(m)
14907 .n(n)
14908 .k(k)
14909 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014910 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014911 }
14912 }
14913 }
14914 }
14915
14916 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
14917 TEST_REQUIRES_X86_SSE2;
14918 for (uint32_t n = 5; n < 8; n++) {
14919 for (size_t k = 1; k <= 40; k += 9) {
14920 GemmMicrokernelTester()
14921 .mr(2)
14922 .nr(4)
14923 .kr(8)
14924 .sr(1)
14925 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014926 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014927 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014928 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014929 }
14930 }
14931 }
14932
14933 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
14934 TEST_REQUIRES_X86_SSE2;
14935 for (uint32_t n = 5; n < 8; n++) {
14936 for (size_t k = 1; k <= 40; k += 9) {
14937 GemmMicrokernelTester()
14938 .mr(2)
14939 .nr(4)
14940 .kr(8)
14941 .sr(1)
14942 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014943 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014944 .k(k)
14945 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014946 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014947 }
14948 }
14949 }
14950
14951 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_a) {
14952 TEST_REQUIRES_X86_SSE2;
14953 for (uint32_t n = 5; n < 8; n++) {
14954 for (size_t k = 1; k <= 40; k += 9) {
14955 GemmMicrokernelTester()
14956 .mr(2)
14957 .nr(4)
14958 .kr(8)
14959 .sr(1)
14960 .m(2)
14961 .n(n)
14962 .k(k)
14963 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014964 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014965 }
14966 }
14967 }
14968
14969 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
14970 TEST_REQUIRES_X86_SSE2;
14971 for (uint32_t n = 5; n < 8; n++) {
14972 for (size_t k = 1; k <= 40; k += 9) {
14973 for (uint32_t m = 1; m <= 2; m++) {
14974 GemmMicrokernelTester()
14975 .mr(2)
14976 .nr(4)
14977 .kr(8)
14978 .sr(1)
14979 .m(m)
14980 .n(n)
14981 .k(k)
14982 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014983 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014984 }
14985 }
14986 }
14987 }
14988
14989 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
14990 TEST_REQUIRES_X86_SSE2;
14991 for (uint32_t n = 8; n <= 12; n += 4) {
14992 for (size_t k = 1; k <= 40; k += 9) {
14993 GemmMicrokernelTester()
14994 .mr(2)
14995 .nr(4)
14996 .kr(8)
14997 .sr(1)
14998 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014999 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015000 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015001 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015002 }
15003 }
15004 }
15005
15006 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
15007 TEST_REQUIRES_X86_SSE2;
15008 for (uint32_t n = 8; n <= 12; n += 4) {
15009 for (size_t k = 1; k <= 40; k += 9) {
15010 GemmMicrokernelTester()
15011 .mr(2)
15012 .nr(4)
15013 .kr(8)
15014 .sr(1)
15015 .m(2)
15016 .n(n)
15017 .k(k)
15018 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015019 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015020 }
15021 }
15022 }
15023
15024 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_a) {
15025 TEST_REQUIRES_X86_SSE2;
15026 for (uint32_t n = 8; n <= 12; n += 4) {
15027 for (size_t k = 1; k <= 40; k += 9) {
15028 GemmMicrokernelTester()
15029 .mr(2)
15030 .nr(4)
15031 .kr(8)
15032 .sr(1)
15033 .m(2)
15034 .n(n)
15035 .k(k)
15036 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015038 }
15039 }
15040 }
15041
15042 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
15043 TEST_REQUIRES_X86_SSE2;
15044 for (uint32_t n = 8; n <= 12; n += 4) {
15045 for (size_t k = 1; k <= 40; k += 9) {
15046 for (uint32_t m = 1; m <= 2; m++) {
15047 GemmMicrokernelTester()
15048 .mr(2)
15049 .nr(4)
15050 .kr(8)
15051 .sr(1)
15052 .m(m)
15053 .n(n)
15054 .k(k)
15055 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015056 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015057 }
15058 }
15059 }
15060 }
15061
15062 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
15063 TEST_REQUIRES_X86_SSE2;
15064 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015065 for (uint32_t n = 1; n <= 4; n++) {
15066 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015067 GemmMicrokernelTester()
15068 .mr(2)
15069 .nr(4)
15070 .kr(8)
15071 .sr(1)
15072 .m(m)
15073 .n(n)
15074 .k(k)
15075 .cm_stride(7)
15076 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015077 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015078 }
15079 }
15080 }
15081 }
15082
15083 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
15084 TEST_REQUIRES_X86_SSE2;
15085 GemmMicrokernelTester()
15086 .mr(2)
15087 .nr(4)
15088 .kr(8)
15089 .sr(1)
15090 .m(2)
15091 .n(4)
15092 .k(8)
15093 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015094 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015095 }
15096
15097 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
15098 TEST_REQUIRES_X86_SSE2;
15099 GemmMicrokernelTester()
15100 .mr(2)
15101 .nr(4)
15102 .kr(8)
15103 .sr(1)
15104 .m(2)
15105 .n(4)
15106 .k(8)
15107 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015109 }
15110
15111 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
15112 TEST_REQUIRES_X86_SSE2;
15113 GemmMicrokernelTester()
15114 .mr(2)
15115 .nr(4)
15116 .kr(8)
15117 .sr(1)
15118 .m(2)
15119 .n(4)
15120 .k(8)
15121 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015122 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015123 }
15124
15125 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_a_zero_point) {
15126 TEST_REQUIRES_X86_SSE2;
15127 for (size_t k = 1; k <= 40; k += 9) {
15128 GemmMicrokernelTester()
15129 .mr(2)
15130 .nr(4)
15131 .kr(8)
15132 .sr(1)
15133 .m(2)
15134 .n(4)
15135 .k(k)
15136 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015137 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015138 }
15139 }
15140
15141 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_b_zero_point) {
15142 TEST_REQUIRES_X86_SSE2;
15143 for (size_t k = 1; k <= 40; k += 9) {
15144 GemmMicrokernelTester()
15145 .mr(2)
15146 .nr(4)
15147 .kr(8)
15148 .sr(1)
15149 .m(2)
15150 .n(4)
15151 .k(k)
15152 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015153 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015154 }
15155 }
15156
15157 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_zero_point) {
15158 TEST_REQUIRES_X86_SSE2;
15159 for (size_t k = 1; k <= 40; k += 9) {
15160 GemmMicrokernelTester()
15161 .mr(2)
15162 .nr(4)
15163 .kr(8)
15164 .sr(1)
15165 .m(2)
15166 .n(4)
15167 .k(k)
15168 .a_zero_point(0)
15169 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015170 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015171 }
15172 }
15173#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15174
15175
15176#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15177 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
15178 TEST_REQUIRES_X86_SSE41;
15179 GemmMicrokernelTester()
15180 .mr(1)
15181 .nr(4)
15182 .kr(8)
15183 .sr(1)
15184 .m(1)
15185 .n(4)
15186 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015187 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015188 }
15189
15190 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
15191 TEST_REQUIRES_X86_SSE41;
15192 GemmMicrokernelTester()
15193 .mr(1)
15194 .nr(4)
15195 .kr(8)
15196 .sr(1)
15197 .m(1)
15198 .n(4)
15199 .k(8)
15200 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015201 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015202 }
15203
15204 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
15205 TEST_REQUIRES_X86_SSE41;
15206 GemmMicrokernelTester()
15207 .mr(1)
15208 .nr(4)
15209 .kr(8)
15210 .sr(1)
15211 .m(1)
15212 .n(4)
15213 .k(8)
15214 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015215 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015216 }
15217
15218 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
15219 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015220 for (uint32_t n = 1; n <= 4; n++) {
15221 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015222 GemmMicrokernelTester()
15223 .mr(1)
15224 .nr(4)
15225 .kr(8)
15226 .sr(1)
15227 .m(m)
15228 .n(n)
15229 .k(8)
15230 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015231 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015232 }
15233 }
15234 }
15235
15236 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
15237 TEST_REQUIRES_X86_SSE41;
15238 for (uint32_t m = 1; m <= 1; m++) {
15239 GemmMicrokernelTester()
15240 .mr(1)
15241 .nr(4)
15242 .kr(8)
15243 .sr(1)
15244 .m(m)
15245 .n(4)
15246 .k(8)
15247 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015248 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015249 }
15250 }
15251
15252 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
15253 TEST_REQUIRES_X86_SSE41;
15254 for (uint32_t n = 1; n <= 4; n++) {
15255 GemmMicrokernelTester()
15256 .mr(1)
15257 .nr(4)
15258 .kr(8)
15259 .sr(1)
15260 .m(1)
15261 .n(n)
15262 .k(8)
15263 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015264 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015265 }
15266 }
15267
15268 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
15269 TEST_REQUIRES_X86_SSE41;
15270 for (size_t k = 1; k < 8; k++) {
15271 GemmMicrokernelTester()
15272 .mr(1)
15273 .nr(4)
15274 .kr(8)
15275 .sr(1)
15276 .m(1)
15277 .n(4)
15278 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015279 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015280 }
15281 }
15282
15283 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_strided_a) {
15284 TEST_REQUIRES_X86_SSE41;
15285 for (size_t k = 1; k < 8; k++) {
15286 GemmMicrokernelTester()
15287 .mr(1)
15288 .nr(4)
15289 .kr(8)
15290 .sr(1)
15291 .m(1)
15292 .n(4)
15293 .k(k)
15294 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015296 }
15297 }
15298
15299 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
15300 TEST_REQUIRES_X86_SSE41;
15301 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015302 for (uint32_t n = 1; n <= 4; n++) {
15303 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015304 GemmMicrokernelTester()
15305 .mr(1)
15306 .nr(4)
15307 .kr(8)
15308 .sr(1)
15309 .m(m)
15310 .n(n)
15311 .k(k)
15312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015313 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015314 }
15315 }
15316 }
15317 }
15318
15319 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
15320 TEST_REQUIRES_X86_SSE41;
15321 for (size_t k = 9; k < 16; k++) {
15322 GemmMicrokernelTester()
15323 .mr(1)
15324 .nr(4)
15325 .kr(8)
15326 .sr(1)
15327 .m(1)
15328 .n(4)
15329 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015330 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015331 }
15332 }
15333
15334 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_strided_a) {
15335 TEST_REQUIRES_X86_SSE41;
15336 for (size_t k = 9; k < 16; k++) {
15337 GemmMicrokernelTester()
15338 .mr(1)
15339 .nr(4)
15340 .kr(8)
15341 .sr(1)
15342 .m(1)
15343 .n(4)
15344 .k(k)
15345 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015346 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015347 }
15348 }
15349
15350 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
15351 TEST_REQUIRES_X86_SSE41;
15352 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015353 for (uint32_t n = 1; n <= 4; n++) {
15354 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015355 GemmMicrokernelTester()
15356 .mr(1)
15357 .nr(4)
15358 .kr(8)
15359 .sr(1)
15360 .m(m)
15361 .n(n)
15362 .k(k)
15363 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015364 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015365 }
15366 }
15367 }
15368 }
15369
15370 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
15371 TEST_REQUIRES_X86_SSE41;
15372 for (size_t k = 16; k <= 80; k += 8) {
15373 GemmMicrokernelTester()
15374 .mr(1)
15375 .nr(4)
15376 .kr(8)
15377 .sr(1)
15378 .m(1)
15379 .n(4)
15380 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015381 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015382 }
15383 }
15384
15385 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_strided_a) {
15386 TEST_REQUIRES_X86_SSE41;
15387 for (size_t k = 16; k <= 80; k += 8) {
15388 GemmMicrokernelTester()
15389 .mr(1)
15390 .nr(4)
15391 .kr(8)
15392 .sr(1)
15393 .m(1)
15394 .n(4)
15395 .k(k)
15396 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015397 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015398 }
15399 }
15400
15401 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
15402 TEST_REQUIRES_X86_SSE41;
15403 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015404 for (uint32_t n = 1; n <= 4; n++) {
15405 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015406 GemmMicrokernelTester()
15407 .mr(1)
15408 .nr(4)
15409 .kr(8)
15410 .sr(1)
15411 .m(m)
15412 .n(n)
15413 .k(k)
15414 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015415 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015416 }
15417 }
15418 }
15419 }
15420
15421 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
15422 TEST_REQUIRES_X86_SSE41;
15423 for (uint32_t n = 5; n < 8; n++) {
15424 for (size_t k = 1; k <= 40; k += 9) {
15425 GemmMicrokernelTester()
15426 .mr(1)
15427 .nr(4)
15428 .kr(8)
15429 .sr(1)
15430 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015431 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015432 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015433 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015434 }
15435 }
15436 }
15437
15438 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
15439 TEST_REQUIRES_X86_SSE41;
15440 for (uint32_t n = 5; n < 8; n++) {
15441 for (size_t k = 1; k <= 40; k += 9) {
15442 GemmMicrokernelTester()
15443 .mr(1)
15444 .nr(4)
15445 .kr(8)
15446 .sr(1)
15447 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015448 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015449 .k(k)
15450 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015451 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015452 }
15453 }
15454 }
15455
15456 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_a) {
15457 TEST_REQUIRES_X86_SSE41;
15458 for (uint32_t n = 5; n < 8; n++) {
15459 for (size_t k = 1; k <= 40; k += 9) {
15460 GemmMicrokernelTester()
15461 .mr(1)
15462 .nr(4)
15463 .kr(8)
15464 .sr(1)
15465 .m(1)
15466 .n(n)
15467 .k(k)
15468 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015469 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015470 }
15471 }
15472 }
15473
15474 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
15475 TEST_REQUIRES_X86_SSE41;
15476 for (uint32_t n = 5; n < 8; n++) {
15477 for (size_t k = 1; k <= 40; k += 9) {
15478 for (uint32_t m = 1; m <= 1; m++) {
15479 GemmMicrokernelTester()
15480 .mr(1)
15481 .nr(4)
15482 .kr(8)
15483 .sr(1)
15484 .m(m)
15485 .n(n)
15486 .k(k)
15487 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015488 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015489 }
15490 }
15491 }
15492 }
15493
15494 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
15495 TEST_REQUIRES_X86_SSE41;
15496 for (uint32_t n = 8; n <= 12; n += 4) {
15497 for (size_t k = 1; k <= 40; k += 9) {
15498 GemmMicrokernelTester()
15499 .mr(1)
15500 .nr(4)
15501 .kr(8)
15502 .sr(1)
15503 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015504 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015505 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015506 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015507 }
15508 }
15509 }
15510
15511 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
15512 TEST_REQUIRES_X86_SSE41;
15513 for (uint32_t n = 8; n <= 12; n += 4) {
15514 for (size_t k = 1; k <= 40; k += 9) {
15515 GemmMicrokernelTester()
15516 .mr(1)
15517 .nr(4)
15518 .kr(8)
15519 .sr(1)
15520 .m(1)
15521 .n(n)
15522 .k(k)
15523 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015524 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015525 }
15526 }
15527 }
15528
15529 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_a) {
15530 TEST_REQUIRES_X86_SSE41;
15531 for (uint32_t n = 8; n <= 12; n += 4) {
15532 for (size_t k = 1; k <= 40; k += 9) {
15533 GemmMicrokernelTester()
15534 .mr(1)
15535 .nr(4)
15536 .kr(8)
15537 .sr(1)
15538 .m(1)
15539 .n(n)
15540 .k(k)
15541 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015543 }
15544 }
15545 }
15546
15547 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
15548 TEST_REQUIRES_X86_SSE41;
15549 for (uint32_t n = 8; n <= 12; n += 4) {
15550 for (size_t k = 1; k <= 40; k += 9) {
15551 for (uint32_t m = 1; m <= 1; m++) {
15552 GemmMicrokernelTester()
15553 .mr(1)
15554 .nr(4)
15555 .kr(8)
15556 .sr(1)
15557 .m(m)
15558 .n(n)
15559 .k(k)
15560 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015561 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015562 }
15563 }
15564 }
15565 }
15566
15567 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
15568 TEST_REQUIRES_X86_SSE41;
15569 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015570 for (uint32_t n = 1; n <= 4; n++) {
15571 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015572 GemmMicrokernelTester()
15573 .mr(1)
15574 .nr(4)
15575 .kr(8)
15576 .sr(1)
15577 .m(m)
15578 .n(n)
15579 .k(k)
15580 .cm_stride(7)
15581 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015582 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015583 }
15584 }
15585 }
15586 }
15587
15588 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
15589 TEST_REQUIRES_X86_SSE41;
15590 GemmMicrokernelTester()
15591 .mr(1)
15592 .nr(4)
15593 .kr(8)
15594 .sr(1)
15595 .m(1)
15596 .n(4)
15597 .k(8)
15598 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015599 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015600 }
15601
15602 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
15603 TEST_REQUIRES_X86_SSE41;
15604 GemmMicrokernelTester()
15605 .mr(1)
15606 .nr(4)
15607 .kr(8)
15608 .sr(1)
15609 .m(1)
15610 .n(4)
15611 .k(8)
15612 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015613 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015614 }
15615
15616 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
15617 TEST_REQUIRES_X86_SSE41;
15618 GemmMicrokernelTester()
15619 .mr(1)
15620 .nr(4)
15621 .kr(8)
15622 .sr(1)
15623 .m(1)
15624 .n(4)
15625 .k(8)
15626 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015627 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015628 }
15629
15630 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_a_zero_point) {
15631 TEST_REQUIRES_X86_SSE41;
15632 for (size_t k = 1; k <= 40; k += 9) {
15633 GemmMicrokernelTester()
15634 .mr(1)
15635 .nr(4)
15636 .kr(8)
15637 .sr(1)
15638 .m(1)
15639 .n(4)
15640 .k(k)
15641 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015642 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015643 }
15644 }
15645
15646 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_b_zero_point) {
15647 TEST_REQUIRES_X86_SSE41;
15648 for (size_t k = 1; k <= 40; k += 9) {
15649 GemmMicrokernelTester()
15650 .mr(1)
15651 .nr(4)
15652 .kr(8)
15653 .sr(1)
15654 .m(1)
15655 .n(4)
15656 .k(k)
15657 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015658 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015659 }
15660 }
15661
15662 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_zero_point) {
15663 TEST_REQUIRES_X86_SSE41;
15664 for (size_t k = 1; k <= 40; k += 9) {
15665 GemmMicrokernelTester()
15666 .mr(1)
15667 .nr(4)
15668 .kr(8)
15669 .sr(1)
15670 .m(1)
15671 .n(4)
15672 .k(k)
15673 .a_zero_point(0)
15674 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015675 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015676 }
15677 }
15678#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15679
15680
15681#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15682 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
15683 TEST_REQUIRES_X86_SSE41;
15684 GemmMicrokernelTester()
15685 .mr(2)
15686 .nr(4)
15687 .kr(8)
15688 .sr(1)
15689 .m(2)
15690 .n(4)
15691 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015692 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015693 }
15694
15695 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
15696 TEST_REQUIRES_X86_SSE41;
15697 GemmMicrokernelTester()
15698 .mr(2)
15699 .nr(4)
15700 .kr(8)
15701 .sr(1)
15702 .m(2)
15703 .n(4)
15704 .k(8)
15705 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015706 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015707 }
15708
15709 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
15710 TEST_REQUIRES_X86_SSE41;
15711 GemmMicrokernelTester()
15712 .mr(2)
15713 .nr(4)
15714 .kr(8)
15715 .sr(1)
15716 .m(2)
15717 .n(4)
15718 .k(8)
15719 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015720 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015721 }
15722
15723 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
15724 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015725 for (uint32_t n = 1; n <= 4; n++) {
15726 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015727 GemmMicrokernelTester()
15728 .mr(2)
15729 .nr(4)
15730 .kr(8)
15731 .sr(1)
15732 .m(m)
15733 .n(n)
15734 .k(8)
15735 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015736 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015737 }
15738 }
15739 }
15740
15741 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
15742 TEST_REQUIRES_X86_SSE41;
15743 for (uint32_t m = 1; m <= 2; m++) {
15744 GemmMicrokernelTester()
15745 .mr(2)
15746 .nr(4)
15747 .kr(8)
15748 .sr(1)
15749 .m(m)
15750 .n(4)
15751 .k(8)
15752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015753 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015754 }
15755 }
15756
15757 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
15758 TEST_REQUIRES_X86_SSE41;
15759 for (uint32_t n = 1; n <= 4; n++) {
15760 GemmMicrokernelTester()
15761 .mr(2)
15762 .nr(4)
15763 .kr(8)
15764 .sr(1)
15765 .m(2)
15766 .n(n)
15767 .k(8)
15768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015769 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015770 }
15771 }
15772
15773 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
15774 TEST_REQUIRES_X86_SSE41;
15775 for (size_t k = 1; k < 8; k++) {
15776 GemmMicrokernelTester()
15777 .mr(2)
15778 .nr(4)
15779 .kr(8)
15780 .sr(1)
15781 .m(2)
15782 .n(4)
15783 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015784 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015785 }
15786 }
15787
15788 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_strided_a) {
15789 TEST_REQUIRES_X86_SSE41;
15790 for (size_t k = 1; k < 8; k++) {
15791 GemmMicrokernelTester()
15792 .mr(2)
15793 .nr(4)
15794 .kr(8)
15795 .sr(1)
15796 .m(2)
15797 .n(4)
15798 .k(k)
15799 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015800 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015801 }
15802 }
15803
15804 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
15805 TEST_REQUIRES_X86_SSE41;
15806 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015807 for (uint32_t n = 1; n <= 4; n++) {
15808 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015809 GemmMicrokernelTester()
15810 .mr(2)
15811 .nr(4)
15812 .kr(8)
15813 .sr(1)
15814 .m(m)
15815 .n(n)
15816 .k(k)
15817 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015818 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015819 }
15820 }
15821 }
15822 }
15823
15824 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
15825 TEST_REQUIRES_X86_SSE41;
15826 for (size_t k = 9; k < 16; k++) {
15827 GemmMicrokernelTester()
15828 .mr(2)
15829 .nr(4)
15830 .kr(8)
15831 .sr(1)
15832 .m(2)
15833 .n(4)
15834 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015835 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015836 }
15837 }
15838
15839 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_strided_a) {
15840 TEST_REQUIRES_X86_SSE41;
15841 for (size_t k = 9; k < 16; k++) {
15842 GemmMicrokernelTester()
15843 .mr(2)
15844 .nr(4)
15845 .kr(8)
15846 .sr(1)
15847 .m(2)
15848 .n(4)
15849 .k(k)
15850 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015851 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015852 }
15853 }
15854
15855 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
15856 TEST_REQUIRES_X86_SSE41;
15857 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015858 for (uint32_t n = 1; n <= 4; n++) {
15859 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015860 GemmMicrokernelTester()
15861 .mr(2)
15862 .nr(4)
15863 .kr(8)
15864 .sr(1)
15865 .m(m)
15866 .n(n)
15867 .k(k)
15868 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015869 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015870 }
15871 }
15872 }
15873 }
15874
15875 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
15876 TEST_REQUIRES_X86_SSE41;
15877 for (size_t k = 16; k <= 80; k += 8) {
15878 GemmMicrokernelTester()
15879 .mr(2)
15880 .nr(4)
15881 .kr(8)
15882 .sr(1)
15883 .m(2)
15884 .n(4)
15885 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015886 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015887 }
15888 }
15889
15890 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_strided_a) {
15891 TEST_REQUIRES_X86_SSE41;
15892 for (size_t k = 16; k <= 80; k += 8) {
15893 GemmMicrokernelTester()
15894 .mr(2)
15895 .nr(4)
15896 .kr(8)
15897 .sr(1)
15898 .m(2)
15899 .n(4)
15900 .k(k)
15901 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015902 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015903 }
15904 }
15905
15906 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
15907 TEST_REQUIRES_X86_SSE41;
15908 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015909 for (uint32_t n = 1; n <= 4; n++) {
15910 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015911 GemmMicrokernelTester()
15912 .mr(2)
15913 .nr(4)
15914 .kr(8)
15915 .sr(1)
15916 .m(m)
15917 .n(n)
15918 .k(k)
15919 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015920 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015921 }
15922 }
15923 }
15924 }
15925
15926 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
15927 TEST_REQUIRES_X86_SSE41;
15928 for (uint32_t n = 5; n < 8; n++) {
15929 for (size_t k = 1; k <= 40; k += 9) {
15930 GemmMicrokernelTester()
15931 .mr(2)
15932 .nr(4)
15933 .kr(8)
15934 .sr(1)
15935 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015936 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015937 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015938 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015939 }
15940 }
15941 }
15942
15943 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
15944 TEST_REQUIRES_X86_SSE41;
15945 for (uint32_t n = 5; n < 8; n++) {
15946 for (size_t k = 1; k <= 40; k += 9) {
15947 GemmMicrokernelTester()
15948 .mr(2)
15949 .nr(4)
15950 .kr(8)
15951 .sr(1)
15952 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015953 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015954 .k(k)
15955 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015956 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015957 }
15958 }
15959 }
15960
15961 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_a) {
15962 TEST_REQUIRES_X86_SSE41;
15963 for (uint32_t n = 5; n < 8; n++) {
15964 for (size_t k = 1; k <= 40; k += 9) {
15965 GemmMicrokernelTester()
15966 .mr(2)
15967 .nr(4)
15968 .kr(8)
15969 .sr(1)
15970 .m(2)
15971 .n(n)
15972 .k(k)
15973 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015974 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015975 }
15976 }
15977 }
15978
15979 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
15980 TEST_REQUIRES_X86_SSE41;
15981 for (uint32_t n = 5; n < 8; n++) {
15982 for (size_t k = 1; k <= 40; k += 9) {
15983 for (uint32_t m = 1; m <= 2; m++) {
15984 GemmMicrokernelTester()
15985 .mr(2)
15986 .nr(4)
15987 .kr(8)
15988 .sr(1)
15989 .m(m)
15990 .n(n)
15991 .k(k)
15992 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015993 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015994 }
15995 }
15996 }
15997 }
15998
15999 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
16000 TEST_REQUIRES_X86_SSE41;
16001 for (uint32_t n = 8; n <= 12; n += 4) {
16002 for (size_t k = 1; k <= 40; k += 9) {
16003 GemmMicrokernelTester()
16004 .mr(2)
16005 .nr(4)
16006 .kr(8)
16007 .sr(1)
16008 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016009 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016010 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016011 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016012 }
16013 }
16014 }
16015
16016 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
16017 TEST_REQUIRES_X86_SSE41;
16018 for (uint32_t n = 8; n <= 12; n += 4) {
16019 for (size_t k = 1; k <= 40; k += 9) {
16020 GemmMicrokernelTester()
16021 .mr(2)
16022 .nr(4)
16023 .kr(8)
16024 .sr(1)
16025 .m(2)
16026 .n(n)
16027 .k(k)
16028 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016029 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016030 }
16031 }
16032 }
16033
16034 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_a) {
16035 TEST_REQUIRES_X86_SSE41;
16036 for (uint32_t n = 8; n <= 12; n += 4) {
16037 for (size_t k = 1; k <= 40; k += 9) {
16038 GemmMicrokernelTester()
16039 .mr(2)
16040 .nr(4)
16041 .kr(8)
16042 .sr(1)
16043 .m(2)
16044 .n(n)
16045 .k(k)
16046 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016048 }
16049 }
16050 }
16051
16052 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
16053 TEST_REQUIRES_X86_SSE41;
16054 for (uint32_t n = 8; n <= 12; n += 4) {
16055 for (size_t k = 1; k <= 40; k += 9) {
16056 for (uint32_t m = 1; m <= 2; m++) {
16057 GemmMicrokernelTester()
16058 .mr(2)
16059 .nr(4)
16060 .kr(8)
16061 .sr(1)
16062 .m(m)
16063 .n(n)
16064 .k(k)
16065 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016066 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016067 }
16068 }
16069 }
16070 }
16071
16072 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
16073 TEST_REQUIRES_X86_SSE41;
16074 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016075 for (uint32_t n = 1; n <= 4; n++) {
16076 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016077 GemmMicrokernelTester()
16078 .mr(2)
16079 .nr(4)
16080 .kr(8)
16081 .sr(1)
16082 .m(m)
16083 .n(n)
16084 .k(k)
16085 .cm_stride(7)
16086 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016087 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016088 }
16089 }
16090 }
16091 }
16092
16093 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
16094 TEST_REQUIRES_X86_SSE41;
16095 GemmMicrokernelTester()
16096 .mr(2)
16097 .nr(4)
16098 .kr(8)
16099 .sr(1)
16100 .m(2)
16101 .n(4)
16102 .k(8)
16103 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016104 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016105 }
16106
16107 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
16108 TEST_REQUIRES_X86_SSE41;
16109 GemmMicrokernelTester()
16110 .mr(2)
16111 .nr(4)
16112 .kr(8)
16113 .sr(1)
16114 .m(2)
16115 .n(4)
16116 .k(8)
16117 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016118 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016119 }
16120
16121 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
16122 TEST_REQUIRES_X86_SSE41;
16123 GemmMicrokernelTester()
16124 .mr(2)
16125 .nr(4)
16126 .kr(8)
16127 .sr(1)
16128 .m(2)
16129 .n(4)
16130 .k(8)
16131 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016132 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016133 }
16134
16135 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_a_zero_point) {
16136 TEST_REQUIRES_X86_SSE41;
16137 for (size_t k = 1; k <= 40; k += 9) {
16138 GemmMicrokernelTester()
16139 .mr(2)
16140 .nr(4)
16141 .kr(8)
16142 .sr(1)
16143 .m(2)
16144 .n(4)
16145 .k(k)
16146 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016147 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016148 }
16149 }
16150
16151 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_b_zero_point) {
16152 TEST_REQUIRES_X86_SSE41;
16153 for (size_t k = 1; k <= 40; k += 9) {
16154 GemmMicrokernelTester()
16155 .mr(2)
16156 .nr(4)
16157 .kr(8)
16158 .sr(1)
16159 .m(2)
16160 .n(4)
16161 .k(k)
16162 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016163 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016164 }
16165 }
16166
16167 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_zero_point) {
16168 TEST_REQUIRES_X86_SSE41;
16169 for (size_t k = 1; k <= 40; k += 9) {
16170 GemmMicrokernelTester()
16171 .mr(2)
16172 .nr(4)
16173 .kr(8)
16174 .sr(1)
16175 .m(2)
16176 .n(4)
16177 .k(k)
16178 .a_zero_point(0)
16179 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016180 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016181 }
16182 }
16183#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16184
16185
16186#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16187 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
16188 TEST_REQUIRES_X86_AVX;
16189 GemmMicrokernelTester()
16190 .mr(1)
16191 .nr(4)
16192 .kr(8)
16193 .sr(1)
16194 .m(1)
16195 .n(4)
16196 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016197 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016198 }
16199
16200 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
16201 TEST_REQUIRES_X86_AVX;
16202 GemmMicrokernelTester()
16203 .mr(1)
16204 .nr(4)
16205 .kr(8)
16206 .sr(1)
16207 .m(1)
16208 .n(4)
16209 .k(8)
16210 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016211 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016212 }
16213
16214 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_strided_a) {
16215 TEST_REQUIRES_X86_AVX;
16216 GemmMicrokernelTester()
16217 .mr(1)
16218 .nr(4)
16219 .kr(8)
16220 .sr(1)
16221 .m(1)
16222 .n(4)
16223 .k(8)
16224 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016226 }
16227
16228 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
16229 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016230 for (uint32_t n = 1; n <= 4; n++) {
16231 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016232 GemmMicrokernelTester()
16233 .mr(1)
16234 .nr(4)
16235 .kr(8)
16236 .sr(1)
16237 .m(m)
16238 .n(n)
16239 .k(8)
16240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016241 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016242 }
16243 }
16244 }
16245
16246 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
16247 TEST_REQUIRES_X86_AVX;
16248 for (uint32_t m = 1; m <= 1; m++) {
16249 GemmMicrokernelTester()
16250 .mr(1)
16251 .nr(4)
16252 .kr(8)
16253 .sr(1)
16254 .m(m)
16255 .n(4)
16256 .k(8)
16257 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016258 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016259 }
16260 }
16261
16262 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
16263 TEST_REQUIRES_X86_AVX;
16264 for (uint32_t n = 1; n <= 4; n++) {
16265 GemmMicrokernelTester()
16266 .mr(1)
16267 .nr(4)
16268 .kr(8)
16269 .sr(1)
16270 .m(1)
16271 .n(n)
16272 .k(8)
16273 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016274 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016275 }
16276 }
16277
16278 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
16279 TEST_REQUIRES_X86_AVX;
16280 for (size_t k = 1; k < 8; k++) {
16281 GemmMicrokernelTester()
16282 .mr(1)
16283 .nr(4)
16284 .kr(8)
16285 .sr(1)
16286 .m(1)
16287 .n(4)
16288 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016289 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016290 }
16291 }
16292
16293 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_strided_a) {
16294 TEST_REQUIRES_X86_AVX;
16295 for (size_t k = 1; k < 8; k++) {
16296 GemmMicrokernelTester()
16297 .mr(1)
16298 .nr(4)
16299 .kr(8)
16300 .sr(1)
16301 .m(1)
16302 .n(4)
16303 .k(k)
16304 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016305 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016306 }
16307 }
16308
16309 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
16310 TEST_REQUIRES_X86_AVX;
16311 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016312 for (uint32_t n = 1; n <= 4; n++) {
16313 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016314 GemmMicrokernelTester()
16315 .mr(1)
16316 .nr(4)
16317 .kr(8)
16318 .sr(1)
16319 .m(m)
16320 .n(n)
16321 .k(k)
16322 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016323 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016324 }
16325 }
16326 }
16327 }
16328
16329 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
16330 TEST_REQUIRES_X86_AVX;
16331 for (size_t k = 9; k < 16; k++) {
16332 GemmMicrokernelTester()
16333 .mr(1)
16334 .nr(4)
16335 .kr(8)
16336 .sr(1)
16337 .m(1)
16338 .n(4)
16339 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016340 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016341 }
16342 }
16343
16344 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_strided_a) {
16345 TEST_REQUIRES_X86_AVX;
16346 for (size_t k = 9; k < 16; k++) {
16347 GemmMicrokernelTester()
16348 .mr(1)
16349 .nr(4)
16350 .kr(8)
16351 .sr(1)
16352 .m(1)
16353 .n(4)
16354 .k(k)
16355 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016357 }
16358 }
16359
16360 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
16361 TEST_REQUIRES_X86_AVX;
16362 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016363 for (uint32_t n = 1; n <= 4; n++) {
16364 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016365 GemmMicrokernelTester()
16366 .mr(1)
16367 .nr(4)
16368 .kr(8)
16369 .sr(1)
16370 .m(m)
16371 .n(n)
16372 .k(k)
16373 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016375 }
16376 }
16377 }
16378 }
16379
16380 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
16381 TEST_REQUIRES_X86_AVX;
16382 for (size_t k = 16; k <= 80; k += 8) {
16383 GemmMicrokernelTester()
16384 .mr(1)
16385 .nr(4)
16386 .kr(8)
16387 .sr(1)
16388 .m(1)
16389 .n(4)
16390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016391 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016392 }
16393 }
16394
16395 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_strided_a) {
16396 TEST_REQUIRES_X86_AVX;
16397 for (size_t k = 16; k <= 80; k += 8) {
16398 GemmMicrokernelTester()
16399 .mr(1)
16400 .nr(4)
16401 .kr(8)
16402 .sr(1)
16403 .m(1)
16404 .n(4)
16405 .k(k)
16406 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016407 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016408 }
16409 }
16410
16411 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
16412 TEST_REQUIRES_X86_AVX;
16413 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016414 for (uint32_t n = 1; n <= 4; n++) {
16415 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016416 GemmMicrokernelTester()
16417 .mr(1)
16418 .nr(4)
16419 .kr(8)
16420 .sr(1)
16421 .m(m)
16422 .n(n)
16423 .k(k)
16424 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016425 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016426 }
16427 }
16428 }
16429 }
16430
16431 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
16432 TEST_REQUIRES_X86_AVX;
16433 for (uint32_t n = 5; n < 8; n++) {
16434 for (size_t k = 1; k <= 40; k += 9) {
16435 GemmMicrokernelTester()
16436 .mr(1)
16437 .nr(4)
16438 .kr(8)
16439 .sr(1)
16440 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016441 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016442 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016443 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016444 }
16445 }
16446 }
16447
16448 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
16449 TEST_REQUIRES_X86_AVX;
16450 for (uint32_t n = 5; n < 8; n++) {
16451 for (size_t k = 1; k <= 40; k += 9) {
16452 GemmMicrokernelTester()
16453 .mr(1)
16454 .nr(4)
16455 .kr(8)
16456 .sr(1)
16457 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016458 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016459 .k(k)
16460 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016461 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016462 }
16463 }
16464 }
16465
16466 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_a) {
16467 TEST_REQUIRES_X86_AVX;
16468 for (uint32_t n = 5; n < 8; n++) {
16469 for (size_t k = 1; k <= 40; k += 9) {
16470 GemmMicrokernelTester()
16471 .mr(1)
16472 .nr(4)
16473 .kr(8)
16474 .sr(1)
16475 .m(1)
16476 .n(n)
16477 .k(k)
16478 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016479 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016480 }
16481 }
16482 }
16483
16484 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
16485 TEST_REQUIRES_X86_AVX;
16486 for (uint32_t n = 5; n < 8; n++) {
16487 for (size_t k = 1; k <= 40; k += 9) {
16488 for (uint32_t m = 1; m <= 1; m++) {
16489 GemmMicrokernelTester()
16490 .mr(1)
16491 .nr(4)
16492 .kr(8)
16493 .sr(1)
16494 .m(m)
16495 .n(n)
16496 .k(k)
16497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016498 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016499 }
16500 }
16501 }
16502 }
16503
16504 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
16505 TEST_REQUIRES_X86_AVX;
16506 for (uint32_t n = 8; n <= 12; n += 4) {
16507 for (size_t k = 1; k <= 40; k += 9) {
16508 GemmMicrokernelTester()
16509 .mr(1)
16510 .nr(4)
16511 .kr(8)
16512 .sr(1)
16513 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016514 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016515 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016516 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016517 }
16518 }
16519 }
16520
16521 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
16522 TEST_REQUIRES_X86_AVX;
16523 for (uint32_t n = 8; n <= 12; n += 4) {
16524 for (size_t k = 1; k <= 40; k += 9) {
16525 GemmMicrokernelTester()
16526 .mr(1)
16527 .nr(4)
16528 .kr(8)
16529 .sr(1)
16530 .m(1)
16531 .n(n)
16532 .k(k)
16533 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016534 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016535 }
16536 }
16537 }
16538
16539 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_a) {
16540 TEST_REQUIRES_X86_AVX;
16541 for (uint32_t n = 8; n <= 12; n += 4) {
16542 for (size_t k = 1; k <= 40; k += 9) {
16543 GemmMicrokernelTester()
16544 .mr(1)
16545 .nr(4)
16546 .kr(8)
16547 .sr(1)
16548 .m(1)
16549 .n(n)
16550 .k(k)
16551 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016553 }
16554 }
16555 }
16556
16557 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
16558 TEST_REQUIRES_X86_AVX;
16559 for (uint32_t n = 8; n <= 12; n += 4) {
16560 for (size_t k = 1; k <= 40; k += 9) {
16561 for (uint32_t m = 1; m <= 1; m++) {
16562 GemmMicrokernelTester()
16563 .mr(1)
16564 .nr(4)
16565 .kr(8)
16566 .sr(1)
16567 .m(m)
16568 .n(n)
16569 .k(k)
16570 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016571 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016572 }
16573 }
16574 }
16575 }
16576
16577 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
16578 TEST_REQUIRES_X86_AVX;
16579 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016580 for (uint32_t n = 1; n <= 4; n++) {
16581 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016582 GemmMicrokernelTester()
16583 .mr(1)
16584 .nr(4)
16585 .kr(8)
16586 .sr(1)
16587 .m(m)
16588 .n(n)
16589 .k(k)
16590 .cm_stride(7)
16591 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016592 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016593 }
16594 }
16595 }
16596 }
16597
16598 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
16599 TEST_REQUIRES_X86_AVX;
16600 GemmMicrokernelTester()
16601 .mr(1)
16602 .nr(4)
16603 .kr(8)
16604 .sr(1)
16605 .m(1)
16606 .n(4)
16607 .k(8)
16608 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016609 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016610 }
16611
16612 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
16613 TEST_REQUIRES_X86_AVX;
16614 GemmMicrokernelTester()
16615 .mr(1)
16616 .nr(4)
16617 .kr(8)
16618 .sr(1)
16619 .m(1)
16620 .n(4)
16621 .k(8)
16622 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016624 }
16625
16626 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
16627 TEST_REQUIRES_X86_AVX;
16628 GemmMicrokernelTester()
16629 .mr(1)
16630 .nr(4)
16631 .kr(8)
16632 .sr(1)
16633 .m(1)
16634 .n(4)
16635 .k(8)
16636 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016637 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016638 }
16639
16640 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_a_zero_point) {
16641 TEST_REQUIRES_X86_AVX;
16642 for (size_t k = 1; k <= 40; k += 9) {
16643 GemmMicrokernelTester()
16644 .mr(1)
16645 .nr(4)
16646 .kr(8)
16647 .sr(1)
16648 .m(1)
16649 .n(4)
16650 .k(k)
16651 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016652 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016653 }
16654 }
16655
16656 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_b_zero_point) {
16657 TEST_REQUIRES_X86_AVX;
16658 for (size_t k = 1; k <= 40; k += 9) {
16659 GemmMicrokernelTester()
16660 .mr(1)
16661 .nr(4)
16662 .kr(8)
16663 .sr(1)
16664 .m(1)
16665 .n(4)
16666 .k(k)
16667 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016668 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016669 }
16670 }
16671
16672 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_zero_point) {
16673 TEST_REQUIRES_X86_AVX;
16674 for (size_t k = 1; k <= 40; k += 9) {
16675 GemmMicrokernelTester()
16676 .mr(1)
16677 .nr(4)
16678 .kr(8)
16679 .sr(1)
16680 .m(1)
16681 .n(4)
16682 .k(k)
16683 .a_zero_point(0)
16684 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016685 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016686 }
16687 }
16688#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16689
16690
16691#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16692 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
16693 TEST_REQUIRES_X86_AVX;
16694 GemmMicrokernelTester()
16695 .mr(2)
16696 .nr(4)
16697 .kr(8)
16698 .sr(1)
16699 .m(2)
16700 .n(4)
16701 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016702 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016703 }
16704
16705 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
16706 TEST_REQUIRES_X86_AVX;
16707 GemmMicrokernelTester()
16708 .mr(2)
16709 .nr(4)
16710 .kr(8)
16711 .sr(1)
16712 .m(2)
16713 .n(4)
16714 .k(8)
16715 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016716 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016717 }
16718
16719 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_strided_a) {
16720 TEST_REQUIRES_X86_AVX;
16721 GemmMicrokernelTester()
16722 .mr(2)
16723 .nr(4)
16724 .kr(8)
16725 .sr(1)
16726 .m(2)
16727 .n(4)
16728 .k(8)
16729 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016730 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016731 }
16732
16733 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
16734 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016735 for (uint32_t n = 1; n <= 4; n++) {
16736 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016737 GemmMicrokernelTester()
16738 .mr(2)
16739 .nr(4)
16740 .kr(8)
16741 .sr(1)
16742 .m(m)
16743 .n(n)
16744 .k(8)
16745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016746 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016747 }
16748 }
16749 }
16750
16751 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
16752 TEST_REQUIRES_X86_AVX;
16753 for (uint32_t m = 1; m <= 2; m++) {
16754 GemmMicrokernelTester()
16755 .mr(2)
16756 .nr(4)
16757 .kr(8)
16758 .sr(1)
16759 .m(m)
16760 .n(4)
16761 .k(8)
16762 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016763 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016764 }
16765 }
16766
16767 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
16768 TEST_REQUIRES_X86_AVX;
16769 for (uint32_t n = 1; n <= 4; n++) {
16770 GemmMicrokernelTester()
16771 .mr(2)
16772 .nr(4)
16773 .kr(8)
16774 .sr(1)
16775 .m(2)
16776 .n(n)
16777 .k(8)
16778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016779 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016780 }
16781 }
16782
16783 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
16784 TEST_REQUIRES_X86_AVX;
16785 for (size_t k = 1; k < 8; k++) {
16786 GemmMicrokernelTester()
16787 .mr(2)
16788 .nr(4)
16789 .kr(8)
16790 .sr(1)
16791 .m(2)
16792 .n(4)
16793 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016794 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016795 }
16796 }
16797
16798 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_strided_a) {
16799 TEST_REQUIRES_X86_AVX;
16800 for (size_t k = 1; k < 8; k++) {
16801 GemmMicrokernelTester()
16802 .mr(2)
16803 .nr(4)
16804 .kr(8)
16805 .sr(1)
16806 .m(2)
16807 .n(4)
16808 .k(k)
16809 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016810 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016811 }
16812 }
16813
16814 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
16815 TEST_REQUIRES_X86_AVX;
16816 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016817 for (uint32_t n = 1; n <= 4; n++) {
16818 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016819 GemmMicrokernelTester()
16820 .mr(2)
16821 .nr(4)
16822 .kr(8)
16823 .sr(1)
16824 .m(m)
16825 .n(n)
16826 .k(k)
16827 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016828 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016829 }
16830 }
16831 }
16832 }
16833
16834 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
16835 TEST_REQUIRES_X86_AVX;
16836 for (size_t k = 9; k < 16; k++) {
16837 GemmMicrokernelTester()
16838 .mr(2)
16839 .nr(4)
16840 .kr(8)
16841 .sr(1)
16842 .m(2)
16843 .n(4)
16844 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016845 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016846 }
16847 }
16848
16849 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_strided_a) {
16850 TEST_REQUIRES_X86_AVX;
16851 for (size_t k = 9; k < 16; k++) {
16852 GemmMicrokernelTester()
16853 .mr(2)
16854 .nr(4)
16855 .kr(8)
16856 .sr(1)
16857 .m(2)
16858 .n(4)
16859 .k(k)
16860 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016861 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016862 }
16863 }
16864
16865 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
16866 TEST_REQUIRES_X86_AVX;
16867 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016868 for (uint32_t n = 1; n <= 4; n++) {
16869 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016870 GemmMicrokernelTester()
16871 .mr(2)
16872 .nr(4)
16873 .kr(8)
16874 .sr(1)
16875 .m(m)
16876 .n(n)
16877 .k(k)
16878 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016879 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016880 }
16881 }
16882 }
16883 }
16884
16885 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
16886 TEST_REQUIRES_X86_AVX;
16887 for (size_t k = 16; k <= 80; k += 8) {
16888 GemmMicrokernelTester()
16889 .mr(2)
16890 .nr(4)
16891 .kr(8)
16892 .sr(1)
16893 .m(2)
16894 .n(4)
16895 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016896 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016897 }
16898 }
16899
16900 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_strided_a) {
16901 TEST_REQUIRES_X86_AVX;
16902 for (size_t k = 16; k <= 80; k += 8) {
16903 GemmMicrokernelTester()
16904 .mr(2)
16905 .nr(4)
16906 .kr(8)
16907 .sr(1)
16908 .m(2)
16909 .n(4)
16910 .k(k)
16911 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016912 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016913 }
16914 }
16915
16916 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
16917 TEST_REQUIRES_X86_AVX;
16918 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016919 for (uint32_t n = 1; n <= 4; n++) {
16920 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016921 GemmMicrokernelTester()
16922 .mr(2)
16923 .nr(4)
16924 .kr(8)
16925 .sr(1)
16926 .m(m)
16927 .n(n)
16928 .k(k)
16929 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016930 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016931 }
16932 }
16933 }
16934 }
16935
16936 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
16937 TEST_REQUIRES_X86_AVX;
16938 for (uint32_t n = 5; n < 8; n++) {
16939 for (size_t k = 1; k <= 40; k += 9) {
16940 GemmMicrokernelTester()
16941 .mr(2)
16942 .nr(4)
16943 .kr(8)
16944 .sr(1)
16945 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016948 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016949 }
16950 }
16951 }
16952
16953 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
16954 TEST_REQUIRES_X86_AVX;
16955 for (uint32_t n = 5; n < 8; n++) {
16956 for (size_t k = 1; k <= 40; k += 9) {
16957 GemmMicrokernelTester()
16958 .mr(2)
16959 .nr(4)
16960 .kr(8)
16961 .sr(1)
16962 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016963 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016964 .k(k)
16965 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016966 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016967 }
16968 }
16969 }
16970
16971 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_a) {
16972 TEST_REQUIRES_X86_AVX;
16973 for (uint32_t n = 5; n < 8; n++) {
16974 for (size_t k = 1; k <= 40; k += 9) {
16975 GemmMicrokernelTester()
16976 .mr(2)
16977 .nr(4)
16978 .kr(8)
16979 .sr(1)
16980 .m(2)
16981 .n(n)
16982 .k(k)
16983 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016984 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016985 }
16986 }
16987 }
16988
16989 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
16990 TEST_REQUIRES_X86_AVX;
16991 for (uint32_t n = 5; n < 8; n++) {
16992 for (size_t k = 1; k <= 40; k += 9) {
16993 for (uint32_t m = 1; m <= 2; m++) {
16994 GemmMicrokernelTester()
16995 .mr(2)
16996 .nr(4)
16997 .kr(8)
16998 .sr(1)
16999 .m(m)
17000 .n(n)
17001 .k(k)
17002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017003 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017004 }
17005 }
17006 }
17007 }
17008
17009 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
17010 TEST_REQUIRES_X86_AVX;
17011 for (uint32_t n = 8; n <= 12; n += 4) {
17012 for (size_t k = 1; k <= 40; k += 9) {
17013 GemmMicrokernelTester()
17014 .mr(2)
17015 .nr(4)
17016 .kr(8)
17017 .sr(1)
17018 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017019 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017020 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017021 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017022 }
17023 }
17024 }
17025
17026 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
17027 TEST_REQUIRES_X86_AVX;
17028 for (uint32_t n = 8; n <= 12; n += 4) {
17029 for (size_t k = 1; k <= 40; k += 9) {
17030 GemmMicrokernelTester()
17031 .mr(2)
17032 .nr(4)
17033 .kr(8)
17034 .sr(1)
17035 .m(2)
17036 .n(n)
17037 .k(k)
17038 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017040 }
17041 }
17042 }
17043
17044 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_a) {
17045 TEST_REQUIRES_X86_AVX;
17046 for (uint32_t n = 8; n <= 12; n += 4) {
17047 for (size_t k = 1; k <= 40; k += 9) {
17048 GemmMicrokernelTester()
17049 .mr(2)
17050 .nr(4)
17051 .kr(8)
17052 .sr(1)
17053 .m(2)
17054 .n(n)
17055 .k(k)
17056 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017057 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017058 }
17059 }
17060 }
17061
17062 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
17063 TEST_REQUIRES_X86_AVX;
17064 for (uint32_t n = 8; n <= 12; n += 4) {
17065 for (size_t k = 1; k <= 40; k += 9) {
17066 for (uint32_t m = 1; m <= 2; m++) {
17067 GemmMicrokernelTester()
17068 .mr(2)
17069 .nr(4)
17070 .kr(8)
17071 .sr(1)
17072 .m(m)
17073 .n(n)
17074 .k(k)
17075 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017076 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017077 }
17078 }
17079 }
17080 }
17081
17082 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
17083 TEST_REQUIRES_X86_AVX;
17084 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017085 for (uint32_t n = 1; n <= 4; n++) {
17086 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017087 GemmMicrokernelTester()
17088 .mr(2)
17089 .nr(4)
17090 .kr(8)
17091 .sr(1)
17092 .m(m)
17093 .n(n)
17094 .k(k)
17095 .cm_stride(7)
17096 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017097 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017098 }
17099 }
17100 }
17101 }
17102
17103 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
17104 TEST_REQUIRES_X86_AVX;
17105 GemmMicrokernelTester()
17106 .mr(2)
17107 .nr(4)
17108 .kr(8)
17109 .sr(1)
17110 .m(2)
17111 .n(4)
17112 .k(8)
17113 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017114 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017115 }
17116
17117 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
17118 TEST_REQUIRES_X86_AVX;
17119 GemmMicrokernelTester()
17120 .mr(2)
17121 .nr(4)
17122 .kr(8)
17123 .sr(1)
17124 .m(2)
17125 .n(4)
17126 .k(8)
17127 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017129 }
17130
17131 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
17132 TEST_REQUIRES_X86_AVX;
17133 GemmMicrokernelTester()
17134 .mr(2)
17135 .nr(4)
17136 .kr(8)
17137 .sr(1)
17138 .m(2)
17139 .n(4)
17140 .k(8)
17141 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017142 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017143 }
17144
17145 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_a_zero_point) {
17146 TEST_REQUIRES_X86_AVX;
17147 for (size_t k = 1; k <= 40; k += 9) {
17148 GemmMicrokernelTester()
17149 .mr(2)
17150 .nr(4)
17151 .kr(8)
17152 .sr(1)
17153 .m(2)
17154 .n(4)
17155 .k(k)
17156 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017157 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017158 }
17159 }
17160
17161 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_b_zero_point) {
17162 TEST_REQUIRES_X86_AVX;
17163 for (size_t k = 1; k <= 40; k += 9) {
17164 GemmMicrokernelTester()
17165 .mr(2)
17166 .nr(4)
17167 .kr(8)
17168 .sr(1)
17169 .m(2)
17170 .n(4)
17171 .k(k)
17172 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017173 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017174 }
17175 }
17176
17177 TEST(QU8_GEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_zero_point) {
17178 TEST_REQUIRES_X86_AVX;
17179 for (size_t k = 1; k <= 40; k += 9) {
17180 GemmMicrokernelTester()
17181 .mr(2)
17182 .nr(4)
17183 .kr(8)
17184 .sr(1)
17185 .m(2)
17186 .n(4)
17187 .k(k)
17188 .a_zero_point(0)
17189 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017191 }
17192 }
17193#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17194
17195
17196#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17197 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
17198 TEST_REQUIRES_X86_XOP;
17199 GemmMicrokernelTester()
17200 .mr(3)
17201 .nr(4)
17202 .kr(8)
17203 .sr(1)
17204 .m(3)
17205 .n(4)
17206 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017207 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017208 }
17209
17210 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
17211 TEST_REQUIRES_X86_XOP;
17212 GemmMicrokernelTester()
17213 .mr(3)
17214 .nr(4)
17215 .kr(8)
17216 .sr(1)
17217 .m(3)
17218 .n(4)
17219 .k(8)
17220 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017221 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017222 }
17223
17224 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_strided_a) {
17225 TEST_REQUIRES_X86_XOP;
17226 GemmMicrokernelTester()
17227 .mr(3)
17228 .nr(4)
17229 .kr(8)
17230 .sr(1)
17231 .m(3)
17232 .n(4)
17233 .k(8)
17234 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017235 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017236 }
17237
17238 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
17239 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017240 for (uint32_t n = 1; n <= 4; n++) {
17241 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017242 GemmMicrokernelTester()
17243 .mr(3)
17244 .nr(4)
17245 .kr(8)
17246 .sr(1)
17247 .m(m)
17248 .n(n)
17249 .k(8)
17250 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017251 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017252 }
17253 }
17254 }
17255
17256 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
17257 TEST_REQUIRES_X86_XOP;
17258 for (uint32_t m = 1; m <= 3; m++) {
17259 GemmMicrokernelTester()
17260 .mr(3)
17261 .nr(4)
17262 .kr(8)
17263 .sr(1)
17264 .m(m)
17265 .n(4)
17266 .k(8)
17267 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017268 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017269 }
17270 }
17271
17272 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
17273 TEST_REQUIRES_X86_XOP;
17274 for (uint32_t n = 1; n <= 4; n++) {
17275 GemmMicrokernelTester()
17276 .mr(3)
17277 .nr(4)
17278 .kr(8)
17279 .sr(1)
17280 .m(3)
17281 .n(n)
17282 .k(8)
17283 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017284 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017285 }
17286 }
17287
17288 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
17289 TEST_REQUIRES_X86_XOP;
17290 for (size_t k = 1; k < 8; k++) {
17291 GemmMicrokernelTester()
17292 .mr(3)
17293 .nr(4)
17294 .kr(8)
17295 .sr(1)
17296 .m(3)
17297 .n(4)
17298 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017299 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017300 }
17301 }
17302
17303 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_strided_a) {
17304 TEST_REQUIRES_X86_XOP;
17305 for (size_t k = 1; k < 8; k++) {
17306 GemmMicrokernelTester()
17307 .mr(3)
17308 .nr(4)
17309 .kr(8)
17310 .sr(1)
17311 .m(3)
17312 .n(4)
17313 .k(k)
17314 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017315 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017316 }
17317 }
17318
17319 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
17320 TEST_REQUIRES_X86_XOP;
17321 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017322 for (uint32_t n = 1; n <= 4; n++) {
17323 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017324 GemmMicrokernelTester()
17325 .mr(3)
17326 .nr(4)
17327 .kr(8)
17328 .sr(1)
17329 .m(m)
17330 .n(n)
17331 .k(k)
17332 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017333 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017334 }
17335 }
17336 }
17337 }
17338
17339 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
17340 TEST_REQUIRES_X86_XOP;
17341 for (size_t k = 9; k < 16; k++) {
17342 GemmMicrokernelTester()
17343 .mr(3)
17344 .nr(4)
17345 .kr(8)
17346 .sr(1)
17347 .m(3)
17348 .n(4)
17349 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017350 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017351 }
17352 }
17353
17354 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_strided_a) {
17355 TEST_REQUIRES_X86_XOP;
17356 for (size_t k = 9; k < 16; k++) {
17357 GemmMicrokernelTester()
17358 .mr(3)
17359 .nr(4)
17360 .kr(8)
17361 .sr(1)
17362 .m(3)
17363 .n(4)
17364 .k(k)
17365 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017366 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017367 }
17368 }
17369
17370 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
17371 TEST_REQUIRES_X86_XOP;
17372 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017373 for (uint32_t n = 1; n <= 4; n++) {
17374 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017375 GemmMicrokernelTester()
17376 .mr(3)
17377 .nr(4)
17378 .kr(8)
17379 .sr(1)
17380 .m(m)
17381 .n(n)
17382 .k(k)
17383 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017384 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017385 }
17386 }
17387 }
17388 }
17389
17390 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
17391 TEST_REQUIRES_X86_XOP;
17392 for (size_t k = 16; k <= 80; k += 8) {
17393 GemmMicrokernelTester()
17394 .mr(3)
17395 .nr(4)
17396 .kr(8)
17397 .sr(1)
17398 .m(3)
17399 .n(4)
17400 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017401 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017402 }
17403 }
17404
17405 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_strided_a) {
17406 TEST_REQUIRES_X86_XOP;
17407 for (size_t k = 16; k <= 80; k += 8) {
17408 GemmMicrokernelTester()
17409 .mr(3)
17410 .nr(4)
17411 .kr(8)
17412 .sr(1)
17413 .m(3)
17414 .n(4)
17415 .k(k)
17416 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017417 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017418 }
17419 }
17420
17421 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
17422 TEST_REQUIRES_X86_XOP;
17423 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017424 for (uint32_t n = 1; n <= 4; n++) {
17425 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017426 GemmMicrokernelTester()
17427 .mr(3)
17428 .nr(4)
17429 .kr(8)
17430 .sr(1)
17431 .m(m)
17432 .n(n)
17433 .k(k)
17434 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017435 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017436 }
17437 }
17438 }
17439 }
17440
17441 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
17442 TEST_REQUIRES_X86_XOP;
17443 for (uint32_t n = 5; n < 8; n++) {
17444 for (size_t k = 1; k <= 40; k += 9) {
17445 GemmMicrokernelTester()
17446 .mr(3)
17447 .nr(4)
17448 .kr(8)
17449 .sr(1)
17450 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017451 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017452 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017453 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017454 }
17455 }
17456 }
17457
17458 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
17459 TEST_REQUIRES_X86_XOP;
17460 for (uint32_t n = 5; n < 8; n++) {
17461 for (size_t k = 1; k <= 40; k += 9) {
17462 GemmMicrokernelTester()
17463 .mr(3)
17464 .nr(4)
17465 .kr(8)
17466 .sr(1)
17467 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017468 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017469 .k(k)
17470 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017471 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017472 }
17473 }
17474 }
17475
17476 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_a) {
17477 TEST_REQUIRES_X86_XOP;
17478 for (uint32_t n = 5; n < 8; n++) {
17479 for (size_t k = 1; k <= 40; k += 9) {
17480 GemmMicrokernelTester()
17481 .mr(3)
17482 .nr(4)
17483 .kr(8)
17484 .sr(1)
17485 .m(3)
17486 .n(n)
17487 .k(k)
17488 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017489 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017490 }
17491 }
17492 }
17493
17494 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
17495 TEST_REQUIRES_X86_XOP;
17496 for (uint32_t n = 5; n < 8; n++) {
17497 for (size_t k = 1; k <= 40; k += 9) {
17498 for (uint32_t m = 1; m <= 3; m++) {
17499 GemmMicrokernelTester()
17500 .mr(3)
17501 .nr(4)
17502 .kr(8)
17503 .sr(1)
17504 .m(m)
17505 .n(n)
17506 .k(k)
17507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017509 }
17510 }
17511 }
17512 }
17513
17514 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
17515 TEST_REQUIRES_X86_XOP;
17516 for (uint32_t n = 8; n <= 12; n += 4) {
17517 for (size_t k = 1; k <= 40; k += 9) {
17518 GemmMicrokernelTester()
17519 .mr(3)
17520 .nr(4)
17521 .kr(8)
17522 .sr(1)
17523 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017524 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017525 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017526 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017527 }
17528 }
17529 }
17530
17531 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
17532 TEST_REQUIRES_X86_XOP;
17533 for (uint32_t n = 8; n <= 12; n += 4) {
17534 for (size_t k = 1; k <= 40; k += 9) {
17535 GemmMicrokernelTester()
17536 .mr(3)
17537 .nr(4)
17538 .kr(8)
17539 .sr(1)
17540 .m(3)
17541 .n(n)
17542 .k(k)
17543 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017544 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017545 }
17546 }
17547 }
17548
17549 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_a) {
17550 TEST_REQUIRES_X86_XOP;
17551 for (uint32_t n = 8; n <= 12; n += 4) {
17552 for (size_t k = 1; k <= 40; k += 9) {
17553 GemmMicrokernelTester()
17554 .mr(3)
17555 .nr(4)
17556 .kr(8)
17557 .sr(1)
17558 .m(3)
17559 .n(n)
17560 .k(k)
17561 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017563 }
17564 }
17565 }
17566
17567 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
17568 TEST_REQUIRES_X86_XOP;
17569 for (uint32_t n = 8; n <= 12; n += 4) {
17570 for (size_t k = 1; k <= 40; k += 9) {
17571 for (uint32_t m = 1; m <= 3; m++) {
17572 GemmMicrokernelTester()
17573 .mr(3)
17574 .nr(4)
17575 .kr(8)
17576 .sr(1)
17577 .m(m)
17578 .n(n)
17579 .k(k)
17580 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017581 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017582 }
17583 }
17584 }
17585 }
17586
17587 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
17588 TEST_REQUIRES_X86_XOP;
17589 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017590 for (uint32_t n = 1; n <= 4; n++) {
17591 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017592 GemmMicrokernelTester()
17593 .mr(3)
17594 .nr(4)
17595 .kr(8)
17596 .sr(1)
17597 .m(m)
17598 .n(n)
17599 .k(k)
17600 .cm_stride(7)
17601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017603 }
17604 }
17605 }
17606 }
17607
17608 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
17609 TEST_REQUIRES_X86_XOP;
17610 GemmMicrokernelTester()
17611 .mr(3)
17612 .nr(4)
17613 .kr(8)
17614 .sr(1)
17615 .m(3)
17616 .n(4)
17617 .k(8)
17618 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017619 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017620 }
17621
17622 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
17623 TEST_REQUIRES_X86_XOP;
17624 GemmMicrokernelTester()
17625 .mr(3)
17626 .nr(4)
17627 .kr(8)
17628 .sr(1)
17629 .m(3)
17630 .n(4)
17631 .k(8)
17632 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017633 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017634 }
17635
17636 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
17637 TEST_REQUIRES_X86_XOP;
17638 GemmMicrokernelTester()
17639 .mr(3)
17640 .nr(4)
17641 .kr(8)
17642 .sr(1)
17643 .m(3)
17644 .n(4)
17645 .k(8)
17646 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017647 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017648 }
17649
17650 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_a_zero_point) {
17651 TEST_REQUIRES_X86_XOP;
17652 for (size_t k = 1; k <= 40; k += 9) {
17653 GemmMicrokernelTester()
17654 .mr(3)
17655 .nr(4)
17656 .kr(8)
17657 .sr(1)
17658 .m(3)
17659 .n(4)
17660 .k(k)
17661 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017662 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017663 }
17664 }
17665
17666 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_b_zero_point) {
17667 TEST_REQUIRES_X86_XOP;
17668 for (size_t k = 1; k <= 40; k += 9) {
17669 GemmMicrokernelTester()
17670 .mr(3)
17671 .nr(4)
17672 .kr(8)
17673 .sr(1)
17674 .m(3)
17675 .n(4)
17676 .k(k)
17677 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017679 }
17680 }
17681
17682 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_zero_point) {
17683 TEST_REQUIRES_X86_XOP;
17684 for (size_t k = 1; k <= 40; k += 9) {
17685 GemmMicrokernelTester()
17686 .mr(3)
17687 .nr(4)
17688 .kr(8)
17689 .sr(1)
17690 .m(3)
17691 .n(4)
17692 .k(k)
17693 .a_zero_point(0)
17694 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017695 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017696 }
17697 }
17698#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17699
17700
17701#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17702 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
17703 TEST_REQUIRES_X86_AVX2;
17704 GemmMicrokernelTester()
17705 .mr(2)
17706 .nr(8)
17707 .kr(8)
17708 .sr(1)
17709 .m(2)
17710 .n(8)
17711 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017712 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017713 }
17714
17715 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
17716 TEST_REQUIRES_X86_AVX2;
17717 GemmMicrokernelTester()
17718 .mr(2)
17719 .nr(8)
17720 .kr(8)
17721 .sr(1)
17722 .m(2)
17723 .n(8)
17724 .k(8)
17725 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017726 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017727 }
17728
17729 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
17730 TEST_REQUIRES_X86_AVX2;
17731 GemmMicrokernelTester()
17732 .mr(2)
17733 .nr(8)
17734 .kr(8)
17735 .sr(1)
17736 .m(2)
17737 .n(8)
17738 .k(8)
17739 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017740 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017741 }
17742
17743 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
17744 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017745 for (uint32_t n = 1; n <= 8; n++) {
17746 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017747 GemmMicrokernelTester()
17748 .mr(2)
17749 .nr(8)
17750 .kr(8)
17751 .sr(1)
17752 .m(m)
17753 .n(n)
17754 .k(8)
17755 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017756 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017757 }
17758 }
17759 }
17760
17761 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
17762 TEST_REQUIRES_X86_AVX2;
17763 for (uint32_t m = 1; m <= 2; m++) {
17764 GemmMicrokernelTester()
17765 .mr(2)
17766 .nr(8)
17767 .kr(8)
17768 .sr(1)
17769 .m(m)
17770 .n(8)
17771 .k(8)
17772 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017774 }
17775 }
17776
17777 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
17778 TEST_REQUIRES_X86_AVX2;
17779 for (uint32_t n = 1; n <= 8; n++) {
17780 GemmMicrokernelTester()
17781 .mr(2)
17782 .nr(8)
17783 .kr(8)
17784 .sr(1)
17785 .m(2)
17786 .n(n)
17787 .k(8)
17788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017790 }
17791 }
17792
17793 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
17794 TEST_REQUIRES_X86_AVX2;
17795 for (size_t k = 1; k < 8; k++) {
17796 GemmMicrokernelTester()
17797 .mr(2)
17798 .nr(8)
17799 .kr(8)
17800 .sr(1)
17801 .m(2)
17802 .n(8)
17803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017804 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017805 }
17806 }
17807
17808 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
17809 TEST_REQUIRES_X86_AVX2;
17810 for (size_t k = 1; k < 8; k++) {
17811 GemmMicrokernelTester()
17812 .mr(2)
17813 .nr(8)
17814 .kr(8)
17815 .sr(1)
17816 .m(2)
17817 .n(8)
17818 .k(k)
17819 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017820 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017821 }
17822 }
17823
17824 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
17825 TEST_REQUIRES_X86_AVX2;
17826 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017827 for (uint32_t n = 1; n <= 8; n++) {
17828 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017829 GemmMicrokernelTester()
17830 .mr(2)
17831 .nr(8)
17832 .kr(8)
17833 .sr(1)
17834 .m(m)
17835 .n(n)
17836 .k(k)
17837 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017838 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017839 }
17840 }
17841 }
17842 }
17843
17844 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
17845 TEST_REQUIRES_X86_AVX2;
17846 for (size_t k = 9; k < 16; k++) {
17847 GemmMicrokernelTester()
17848 .mr(2)
17849 .nr(8)
17850 .kr(8)
17851 .sr(1)
17852 .m(2)
17853 .n(8)
17854 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017855 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017856 }
17857 }
17858
17859 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
17860 TEST_REQUIRES_X86_AVX2;
17861 for (size_t k = 9; k < 16; k++) {
17862 GemmMicrokernelTester()
17863 .mr(2)
17864 .nr(8)
17865 .kr(8)
17866 .sr(1)
17867 .m(2)
17868 .n(8)
17869 .k(k)
17870 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017871 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017872 }
17873 }
17874
17875 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
17876 TEST_REQUIRES_X86_AVX2;
17877 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017878 for (uint32_t n = 1; n <= 8; n++) {
17879 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017880 GemmMicrokernelTester()
17881 .mr(2)
17882 .nr(8)
17883 .kr(8)
17884 .sr(1)
17885 .m(m)
17886 .n(n)
17887 .k(k)
17888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017889 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017890 }
17891 }
17892 }
17893 }
17894
17895 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
17896 TEST_REQUIRES_X86_AVX2;
17897 for (size_t k = 16; k <= 80; k += 8) {
17898 GemmMicrokernelTester()
17899 .mr(2)
17900 .nr(8)
17901 .kr(8)
17902 .sr(1)
17903 .m(2)
17904 .n(8)
17905 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017906 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017907 }
17908 }
17909
17910 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
17911 TEST_REQUIRES_X86_AVX2;
17912 for (size_t k = 16; k <= 80; k += 8) {
17913 GemmMicrokernelTester()
17914 .mr(2)
17915 .nr(8)
17916 .kr(8)
17917 .sr(1)
17918 .m(2)
17919 .n(8)
17920 .k(k)
17921 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017922 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017923 }
17924 }
17925
17926 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
17927 TEST_REQUIRES_X86_AVX2;
17928 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017929 for (uint32_t n = 1; n <= 8; n++) {
17930 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017931 GemmMicrokernelTester()
17932 .mr(2)
17933 .nr(8)
17934 .kr(8)
17935 .sr(1)
17936 .m(m)
17937 .n(n)
17938 .k(k)
17939 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017940 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017941 }
17942 }
17943 }
17944 }
17945
17946 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
17947 TEST_REQUIRES_X86_AVX2;
17948 for (uint32_t n = 9; n < 16; n++) {
17949 for (size_t k = 1; k <= 40; k += 9) {
17950 GemmMicrokernelTester()
17951 .mr(2)
17952 .nr(8)
17953 .kr(8)
17954 .sr(1)
17955 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017956 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017957 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017958 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017959 }
17960 }
17961 }
17962
17963 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
17964 TEST_REQUIRES_X86_AVX2;
17965 for (uint32_t n = 9; n < 16; n++) {
17966 for (size_t k = 1; k <= 40; k += 9) {
17967 GemmMicrokernelTester()
17968 .mr(2)
17969 .nr(8)
17970 .kr(8)
17971 .sr(1)
17972 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017973 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017974 .k(k)
17975 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017976 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017977 }
17978 }
17979 }
17980
17981 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
17982 TEST_REQUIRES_X86_AVX2;
17983 for (uint32_t n = 9; n < 16; n++) {
17984 for (size_t k = 1; k <= 40; k += 9) {
17985 GemmMicrokernelTester()
17986 .mr(2)
17987 .nr(8)
17988 .kr(8)
17989 .sr(1)
17990 .m(2)
17991 .n(n)
17992 .k(k)
17993 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017994 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017995 }
17996 }
17997 }
17998
17999 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
18000 TEST_REQUIRES_X86_AVX2;
18001 for (uint32_t n = 9; n < 16; n++) {
18002 for (size_t k = 1; k <= 40; k += 9) {
18003 for (uint32_t m = 1; m <= 2; m++) {
18004 GemmMicrokernelTester()
18005 .mr(2)
18006 .nr(8)
18007 .kr(8)
18008 .sr(1)
18009 .m(m)
18010 .n(n)
18011 .k(k)
18012 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018013 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018014 }
18015 }
18016 }
18017 }
18018
18019 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
18020 TEST_REQUIRES_X86_AVX2;
18021 for (uint32_t n = 16; n <= 24; n += 8) {
18022 for (size_t k = 1; k <= 40; k += 9) {
18023 GemmMicrokernelTester()
18024 .mr(2)
18025 .nr(8)
18026 .kr(8)
18027 .sr(1)
18028 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018029 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018030 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018031 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018032 }
18033 }
18034 }
18035
18036 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
18037 TEST_REQUIRES_X86_AVX2;
18038 for (uint32_t n = 16; n <= 24; n += 8) {
18039 for (size_t k = 1; k <= 40; k += 9) {
18040 GemmMicrokernelTester()
18041 .mr(2)
18042 .nr(8)
18043 .kr(8)
18044 .sr(1)
18045 .m(2)
18046 .n(n)
18047 .k(k)
18048 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018049 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018050 }
18051 }
18052 }
18053
18054 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
18055 TEST_REQUIRES_X86_AVX2;
18056 for (uint32_t n = 16; n <= 24; n += 8) {
18057 for (size_t k = 1; k <= 40; k += 9) {
18058 GemmMicrokernelTester()
18059 .mr(2)
18060 .nr(8)
18061 .kr(8)
18062 .sr(1)
18063 .m(2)
18064 .n(n)
18065 .k(k)
18066 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018068 }
18069 }
18070 }
18071
18072 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
18073 TEST_REQUIRES_X86_AVX2;
18074 for (uint32_t n = 16; n <= 24; n += 8) {
18075 for (size_t k = 1; k <= 40; k += 9) {
18076 for (uint32_t m = 1; m <= 2; m++) {
18077 GemmMicrokernelTester()
18078 .mr(2)
18079 .nr(8)
18080 .kr(8)
18081 .sr(1)
18082 .m(m)
18083 .n(n)
18084 .k(k)
18085 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018086 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018087 }
18088 }
18089 }
18090 }
18091
18092 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
18093 TEST_REQUIRES_X86_AVX2;
18094 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018095 for (uint32_t n = 1; n <= 8; n++) {
18096 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018097 GemmMicrokernelTester()
18098 .mr(2)
18099 .nr(8)
18100 .kr(8)
18101 .sr(1)
18102 .m(m)
18103 .n(n)
18104 .k(k)
18105 .cm_stride(11)
18106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018107 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018108 }
18109 }
18110 }
18111 }
18112
18113 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
18114 TEST_REQUIRES_X86_AVX2;
18115 GemmMicrokernelTester()
18116 .mr(2)
18117 .nr(8)
18118 .kr(8)
18119 .sr(1)
18120 .m(2)
18121 .n(8)
18122 .k(8)
18123 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018124 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018125 }
18126
18127 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
18128 TEST_REQUIRES_X86_AVX2;
18129 GemmMicrokernelTester()
18130 .mr(2)
18131 .nr(8)
18132 .kr(8)
18133 .sr(1)
18134 .m(2)
18135 .n(8)
18136 .k(8)
18137 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018138 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018139 }
18140
18141 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
18142 TEST_REQUIRES_X86_AVX2;
18143 GemmMicrokernelTester()
18144 .mr(2)
18145 .nr(8)
18146 .kr(8)
18147 .sr(1)
18148 .m(2)
18149 .n(8)
18150 .k(8)
18151 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018152 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018153 }
18154
18155 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, no_a_zero_point) {
18156 TEST_REQUIRES_X86_AVX2;
18157 for (size_t k = 1; k <= 40; k += 9) {
18158 GemmMicrokernelTester()
18159 .mr(2)
18160 .nr(8)
18161 .kr(8)
18162 .sr(1)
18163 .m(2)
18164 .n(8)
18165 .k(k)
18166 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018168 }
18169 }
18170
18171 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, no_b_zero_point) {
18172 TEST_REQUIRES_X86_AVX2;
18173 for (size_t k = 1; k <= 40; k += 9) {
18174 GemmMicrokernelTester()
18175 .mr(2)
18176 .nr(8)
18177 .kr(8)
18178 .sr(1)
18179 .m(2)
18180 .n(8)
18181 .k(k)
18182 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018183 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018184 }
18185 }
18186
18187 TEST(QU8_GEMM_MINMAX_FP32_2X8C8__AVX2, no_zero_point) {
18188 TEST_REQUIRES_X86_AVX2;
18189 for (size_t k = 1; k <= 40; k += 9) {
18190 GemmMicrokernelTester()
18191 .mr(2)
18192 .nr(8)
18193 .kr(8)
18194 .sr(1)
18195 .m(2)
18196 .n(8)
18197 .k(k)
18198 .a_zero_point(0)
18199 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018200 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018201 }
18202 }
18203#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18204
18205
18206#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18207 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
18208 TEST_REQUIRES_X86_AVX2;
18209 GemmMicrokernelTester()
18210 .mr(3)
18211 .nr(8)
18212 .kr(8)
18213 .sr(1)
18214 .m(3)
18215 .n(8)
18216 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018217 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018218 }
18219
18220 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
18221 TEST_REQUIRES_X86_AVX2;
18222 GemmMicrokernelTester()
18223 .mr(3)
18224 .nr(8)
18225 .kr(8)
18226 .sr(1)
18227 .m(3)
18228 .n(8)
18229 .k(8)
18230 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018231 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018232 }
18233
18234 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
18235 TEST_REQUIRES_X86_AVX2;
18236 GemmMicrokernelTester()
18237 .mr(3)
18238 .nr(8)
18239 .kr(8)
18240 .sr(1)
18241 .m(3)
18242 .n(8)
18243 .k(8)
18244 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018245 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018246 }
18247
18248 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
18249 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018250 for (uint32_t n = 1; n <= 8; n++) {
18251 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018252 GemmMicrokernelTester()
18253 .mr(3)
18254 .nr(8)
18255 .kr(8)
18256 .sr(1)
18257 .m(m)
18258 .n(n)
18259 .k(8)
18260 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018261 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018262 }
18263 }
18264 }
18265
18266 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
18267 TEST_REQUIRES_X86_AVX2;
18268 for (uint32_t m = 1; m <= 3; m++) {
18269 GemmMicrokernelTester()
18270 .mr(3)
18271 .nr(8)
18272 .kr(8)
18273 .sr(1)
18274 .m(m)
18275 .n(8)
18276 .k(8)
18277 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018278 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018279 }
18280 }
18281
18282 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
18283 TEST_REQUIRES_X86_AVX2;
18284 for (uint32_t n = 1; n <= 8; n++) {
18285 GemmMicrokernelTester()
18286 .mr(3)
18287 .nr(8)
18288 .kr(8)
18289 .sr(1)
18290 .m(3)
18291 .n(n)
18292 .k(8)
18293 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018294 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018295 }
18296 }
18297
18298 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
18299 TEST_REQUIRES_X86_AVX2;
18300 for (size_t k = 1; k < 8; k++) {
18301 GemmMicrokernelTester()
18302 .mr(3)
18303 .nr(8)
18304 .kr(8)
18305 .sr(1)
18306 .m(3)
18307 .n(8)
18308 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018309 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018310 }
18311 }
18312
18313 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
18314 TEST_REQUIRES_X86_AVX2;
18315 for (size_t k = 1; k < 8; k++) {
18316 GemmMicrokernelTester()
18317 .mr(3)
18318 .nr(8)
18319 .kr(8)
18320 .sr(1)
18321 .m(3)
18322 .n(8)
18323 .k(k)
18324 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018325 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018326 }
18327 }
18328
18329 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
18330 TEST_REQUIRES_X86_AVX2;
18331 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018332 for (uint32_t n = 1; n <= 8; n++) {
18333 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018334 GemmMicrokernelTester()
18335 .mr(3)
18336 .nr(8)
18337 .kr(8)
18338 .sr(1)
18339 .m(m)
18340 .n(n)
18341 .k(k)
18342 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018343 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018344 }
18345 }
18346 }
18347 }
18348
18349 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
18350 TEST_REQUIRES_X86_AVX2;
18351 for (size_t k = 9; k < 16; k++) {
18352 GemmMicrokernelTester()
18353 .mr(3)
18354 .nr(8)
18355 .kr(8)
18356 .sr(1)
18357 .m(3)
18358 .n(8)
18359 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018360 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018361 }
18362 }
18363
18364 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
18365 TEST_REQUIRES_X86_AVX2;
18366 for (size_t k = 9; k < 16; k++) {
18367 GemmMicrokernelTester()
18368 .mr(3)
18369 .nr(8)
18370 .kr(8)
18371 .sr(1)
18372 .m(3)
18373 .n(8)
18374 .k(k)
18375 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018377 }
18378 }
18379
18380 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
18381 TEST_REQUIRES_X86_AVX2;
18382 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018383 for (uint32_t n = 1; n <= 8; n++) {
18384 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018385 GemmMicrokernelTester()
18386 .mr(3)
18387 .nr(8)
18388 .kr(8)
18389 .sr(1)
18390 .m(m)
18391 .n(n)
18392 .k(k)
18393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018394 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018395 }
18396 }
18397 }
18398 }
18399
18400 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
18401 TEST_REQUIRES_X86_AVX2;
18402 for (size_t k = 16; k <= 80; k += 8) {
18403 GemmMicrokernelTester()
18404 .mr(3)
18405 .nr(8)
18406 .kr(8)
18407 .sr(1)
18408 .m(3)
18409 .n(8)
18410 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018411 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018412 }
18413 }
18414
18415 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
18416 TEST_REQUIRES_X86_AVX2;
18417 for (size_t k = 16; k <= 80; k += 8) {
18418 GemmMicrokernelTester()
18419 .mr(3)
18420 .nr(8)
18421 .kr(8)
18422 .sr(1)
18423 .m(3)
18424 .n(8)
18425 .k(k)
18426 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018427 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018428 }
18429 }
18430
18431 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
18432 TEST_REQUIRES_X86_AVX2;
18433 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018434 for (uint32_t n = 1; n <= 8; n++) {
18435 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018436 GemmMicrokernelTester()
18437 .mr(3)
18438 .nr(8)
18439 .kr(8)
18440 .sr(1)
18441 .m(m)
18442 .n(n)
18443 .k(k)
18444 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018445 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018446 }
18447 }
18448 }
18449 }
18450
18451 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
18452 TEST_REQUIRES_X86_AVX2;
18453 for (uint32_t n = 9; n < 16; n++) {
18454 for (size_t k = 1; k <= 40; k += 9) {
18455 GemmMicrokernelTester()
18456 .mr(3)
18457 .nr(8)
18458 .kr(8)
18459 .sr(1)
18460 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018461 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018462 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018463 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018464 }
18465 }
18466 }
18467
18468 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
18469 TEST_REQUIRES_X86_AVX2;
18470 for (uint32_t n = 9; n < 16; n++) {
18471 for (size_t k = 1; k <= 40; k += 9) {
18472 GemmMicrokernelTester()
18473 .mr(3)
18474 .nr(8)
18475 .kr(8)
18476 .sr(1)
18477 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018478 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018479 .k(k)
18480 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018481 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018482 }
18483 }
18484 }
18485
18486 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
18487 TEST_REQUIRES_X86_AVX2;
18488 for (uint32_t n = 9; n < 16; n++) {
18489 for (size_t k = 1; k <= 40; k += 9) {
18490 GemmMicrokernelTester()
18491 .mr(3)
18492 .nr(8)
18493 .kr(8)
18494 .sr(1)
18495 .m(3)
18496 .n(n)
18497 .k(k)
18498 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018499 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018500 }
18501 }
18502 }
18503
18504 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
18505 TEST_REQUIRES_X86_AVX2;
18506 for (uint32_t n = 9; n < 16; n++) {
18507 for (size_t k = 1; k <= 40; k += 9) {
18508 for (uint32_t m = 1; m <= 3; m++) {
18509 GemmMicrokernelTester()
18510 .mr(3)
18511 .nr(8)
18512 .kr(8)
18513 .sr(1)
18514 .m(m)
18515 .n(n)
18516 .k(k)
18517 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018518 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018519 }
18520 }
18521 }
18522 }
18523
18524 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
18525 TEST_REQUIRES_X86_AVX2;
18526 for (uint32_t n = 16; n <= 24; n += 8) {
18527 for (size_t k = 1; k <= 40; k += 9) {
18528 GemmMicrokernelTester()
18529 .mr(3)
18530 .nr(8)
18531 .kr(8)
18532 .sr(1)
18533 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018534 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018535 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018536 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018537 }
18538 }
18539 }
18540
18541 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
18542 TEST_REQUIRES_X86_AVX2;
18543 for (uint32_t n = 16; n <= 24; n += 8) {
18544 for (size_t k = 1; k <= 40; k += 9) {
18545 GemmMicrokernelTester()
18546 .mr(3)
18547 .nr(8)
18548 .kr(8)
18549 .sr(1)
18550 .m(3)
18551 .n(n)
18552 .k(k)
18553 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018554 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018555 }
18556 }
18557 }
18558
18559 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
18560 TEST_REQUIRES_X86_AVX2;
18561 for (uint32_t n = 16; n <= 24; n += 8) {
18562 for (size_t k = 1; k <= 40; k += 9) {
18563 GemmMicrokernelTester()
18564 .mr(3)
18565 .nr(8)
18566 .kr(8)
18567 .sr(1)
18568 .m(3)
18569 .n(n)
18570 .k(k)
18571 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018572 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018573 }
18574 }
18575 }
18576
18577 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
18578 TEST_REQUIRES_X86_AVX2;
18579 for (uint32_t n = 16; n <= 24; n += 8) {
18580 for (size_t k = 1; k <= 40; k += 9) {
18581 for (uint32_t m = 1; m <= 3; m++) {
18582 GemmMicrokernelTester()
18583 .mr(3)
18584 .nr(8)
18585 .kr(8)
18586 .sr(1)
18587 .m(m)
18588 .n(n)
18589 .k(k)
18590 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018591 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018592 }
18593 }
18594 }
18595 }
18596
18597 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
18598 TEST_REQUIRES_X86_AVX2;
18599 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018600 for (uint32_t n = 1; n <= 8; n++) {
18601 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018602 GemmMicrokernelTester()
18603 .mr(3)
18604 .nr(8)
18605 .kr(8)
18606 .sr(1)
18607 .m(m)
18608 .n(n)
18609 .k(k)
18610 .cm_stride(11)
18611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018612 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018613 }
18614 }
18615 }
18616 }
18617
18618 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
18619 TEST_REQUIRES_X86_AVX2;
18620 GemmMicrokernelTester()
18621 .mr(3)
18622 .nr(8)
18623 .kr(8)
18624 .sr(1)
18625 .m(3)
18626 .n(8)
18627 .k(8)
18628 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018629 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018630 }
18631
18632 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
18633 TEST_REQUIRES_X86_AVX2;
18634 GemmMicrokernelTester()
18635 .mr(3)
18636 .nr(8)
18637 .kr(8)
18638 .sr(1)
18639 .m(3)
18640 .n(8)
18641 .k(8)
18642 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018643 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018644 }
18645
18646 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
18647 TEST_REQUIRES_X86_AVX2;
18648 GemmMicrokernelTester()
18649 .mr(3)
18650 .nr(8)
18651 .kr(8)
18652 .sr(1)
18653 .m(3)
18654 .n(8)
18655 .k(8)
18656 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018657 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018658 }
18659
18660 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, no_a_zero_point) {
18661 TEST_REQUIRES_X86_AVX2;
18662 for (size_t k = 1; k <= 40; k += 9) {
18663 GemmMicrokernelTester()
18664 .mr(3)
18665 .nr(8)
18666 .kr(8)
18667 .sr(1)
18668 .m(3)
18669 .n(8)
18670 .k(k)
18671 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018672 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018673 }
18674 }
18675
18676 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, no_b_zero_point) {
18677 TEST_REQUIRES_X86_AVX2;
18678 for (size_t k = 1; k <= 40; k += 9) {
18679 GemmMicrokernelTester()
18680 .mr(3)
18681 .nr(8)
18682 .kr(8)
18683 .sr(1)
18684 .m(3)
18685 .n(8)
18686 .k(k)
18687 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018689 }
18690 }
18691
18692 TEST(QU8_GEMM_MINMAX_FP32_3X8C8__AVX2, no_zero_point) {
18693 TEST_REQUIRES_X86_AVX2;
18694 for (size_t k = 1; k <= 40; k += 9) {
18695 GemmMicrokernelTester()
18696 .mr(3)
18697 .nr(8)
18698 .kr(8)
18699 .sr(1)
18700 .m(3)
18701 .n(8)
18702 .k(k)
18703 .a_zero_point(0)
18704 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018705 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018706 }
18707 }
18708#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18709
18710
18711#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18712 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
18713 TEST_REQUIRES_X86_AVX512SKX;
18714 GemmMicrokernelTester()
18715 .mr(3)
18716 .nr(16)
18717 .kr(8)
18718 .sr(1)
18719 .m(3)
18720 .n(16)
18721 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018722 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018723 }
18724
18725 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
18726 TEST_REQUIRES_X86_AVX512SKX;
18727 GemmMicrokernelTester()
18728 .mr(3)
18729 .nr(16)
18730 .kr(8)
18731 .sr(1)
18732 .m(3)
18733 .n(16)
18734 .k(8)
18735 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018736 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018737 }
18738
18739 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_strided_a) {
18740 TEST_REQUIRES_X86_AVX512SKX;
18741 GemmMicrokernelTester()
18742 .mr(3)
18743 .nr(16)
18744 .kr(8)
18745 .sr(1)
18746 .m(3)
18747 .n(16)
18748 .k(8)
18749 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018750 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018751 }
18752
18753 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
18754 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018755 for (uint32_t n = 1; n <= 16; n++) {
18756 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018757 GemmMicrokernelTester()
18758 .mr(3)
18759 .nr(16)
18760 .kr(8)
18761 .sr(1)
18762 .m(m)
18763 .n(n)
18764 .k(8)
18765 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018766 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018767 }
18768 }
18769 }
18770
18771 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
18772 TEST_REQUIRES_X86_AVX512SKX;
18773 for (uint32_t m = 1; m <= 3; m++) {
18774 GemmMicrokernelTester()
18775 .mr(3)
18776 .nr(16)
18777 .kr(8)
18778 .sr(1)
18779 .m(m)
18780 .n(16)
18781 .k(8)
18782 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018783 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018784 }
18785 }
18786
18787 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
18788 TEST_REQUIRES_X86_AVX512SKX;
18789 for (uint32_t n = 1; n <= 16; n++) {
18790 GemmMicrokernelTester()
18791 .mr(3)
18792 .nr(16)
18793 .kr(8)
18794 .sr(1)
18795 .m(3)
18796 .n(n)
18797 .k(8)
18798 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018799 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018800 }
18801 }
18802
18803 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
18804 TEST_REQUIRES_X86_AVX512SKX;
18805 for (size_t k = 1; k < 8; k++) {
18806 GemmMicrokernelTester()
18807 .mr(3)
18808 .nr(16)
18809 .kr(8)
18810 .sr(1)
18811 .m(3)
18812 .n(16)
18813 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018814 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018815 }
18816 }
18817
18818 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_strided_a) {
18819 TEST_REQUIRES_X86_AVX512SKX;
18820 for (size_t k = 1; k < 8; k++) {
18821 GemmMicrokernelTester()
18822 .mr(3)
18823 .nr(16)
18824 .kr(8)
18825 .sr(1)
18826 .m(3)
18827 .n(16)
18828 .k(k)
18829 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018830 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018831 }
18832 }
18833
18834 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
18835 TEST_REQUIRES_X86_AVX512SKX;
18836 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018837 for (uint32_t n = 1; n <= 16; n++) {
18838 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018839 GemmMicrokernelTester()
18840 .mr(3)
18841 .nr(16)
18842 .kr(8)
18843 .sr(1)
18844 .m(m)
18845 .n(n)
18846 .k(k)
18847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018848 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018849 }
18850 }
18851 }
18852 }
18853
18854 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
18855 TEST_REQUIRES_X86_AVX512SKX;
18856 for (size_t k = 9; k < 16; k++) {
18857 GemmMicrokernelTester()
18858 .mr(3)
18859 .nr(16)
18860 .kr(8)
18861 .sr(1)
18862 .m(3)
18863 .n(16)
18864 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018865 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018866 }
18867 }
18868
18869 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_strided_a) {
18870 TEST_REQUIRES_X86_AVX512SKX;
18871 for (size_t k = 9; k < 16; k++) {
18872 GemmMicrokernelTester()
18873 .mr(3)
18874 .nr(16)
18875 .kr(8)
18876 .sr(1)
18877 .m(3)
18878 .n(16)
18879 .k(k)
18880 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018881 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018882 }
18883 }
18884
18885 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
18886 TEST_REQUIRES_X86_AVX512SKX;
18887 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018888 for (uint32_t n = 1; n <= 16; n++) {
18889 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018890 GemmMicrokernelTester()
18891 .mr(3)
18892 .nr(16)
18893 .kr(8)
18894 .sr(1)
18895 .m(m)
18896 .n(n)
18897 .k(k)
18898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018899 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018900 }
18901 }
18902 }
18903 }
18904
18905 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
18906 TEST_REQUIRES_X86_AVX512SKX;
18907 for (size_t k = 16; k <= 80; k += 8) {
18908 GemmMicrokernelTester()
18909 .mr(3)
18910 .nr(16)
18911 .kr(8)
18912 .sr(1)
18913 .m(3)
18914 .n(16)
18915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018917 }
18918 }
18919
18920 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_strided_a) {
18921 TEST_REQUIRES_X86_AVX512SKX;
18922 for (size_t k = 16; k <= 80; k += 8) {
18923 GemmMicrokernelTester()
18924 .mr(3)
18925 .nr(16)
18926 .kr(8)
18927 .sr(1)
18928 .m(3)
18929 .n(16)
18930 .k(k)
18931 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018933 }
18934 }
18935
18936 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
18937 TEST_REQUIRES_X86_AVX512SKX;
18938 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018939 for (uint32_t n = 1; n <= 16; n++) {
18940 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018941 GemmMicrokernelTester()
18942 .mr(3)
18943 .nr(16)
18944 .kr(8)
18945 .sr(1)
18946 .m(m)
18947 .n(n)
18948 .k(k)
18949 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018950 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018951 }
18952 }
18953 }
18954 }
18955
18956 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
18957 TEST_REQUIRES_X86_AVX512SKX;
18958 for (uint32_t n = 17; n < 32; n++) {
18959 for (size_t k = 1; k <= 40; k += 9) {
18960 GemmMicrokernelTester()
18961 .mr(3)
18962 .nr(16)
18963 .kr(8)
18964 .sr(1)
18965 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018966 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018967 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018968 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018969 }
18970 }
18971 }
18972
18973 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
18974 TEST_REQUIRES_X86_AVX512SKX;
18975 for (uint32_t n = 17; n < 32; n++) {
18976 for (size_t k = 1; k <= 40; k += 9) {
18977 GemmMicrokernelTester()
18978 .mr(3)
18979 .nr(16)
18980 .kr(8)
18981 .sr(1)
18982 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018983 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018984 .k(k)
18985 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018986 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018987 }
18988 }
18989 }
18990
18991 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_a) {
18992 TEST_REQUIRES_X86_AVX512SKX;
18993 for (uint32_t n = 17; n < 32; n++) {
18994 for (size_t k = 1; k <= 40; k += 9) {
18995 GemmMicrokernelTester()
18996 .mr(3)
18997 .nr(16)
18998 .kr(8)
18999 .sr(1)
19000 .m(3)
19001 .n(n)
19002 .k(k)
19003 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019004 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019005 }
19006 }
19007 }
19008
19009 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
19010 TEST_REQUIRES_X86_AVX512SKX;
19011 for (uint32_t n = 17; n < 32; n++) {
19012 for (size_t k = 1; k <= 40; k += 9) {
19013 for (uint32_t m = 1; m <= 3; m++) {
19014 GemmMicrokernelTester()
19015 .mr(3)
19016 .nr(16)
19017 .kr(8)
19018 .sr(1)
19019 .m(m)
19020 .n(n)
19021 .k(k)
19022 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019023 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019024 }
19025 }
19026 }
19027 }
19028
19029 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
19030 TEST_REQUIRES_X86_AVX512SKX;
19031 for (uint32_t n = 32; n <= 48; n += 16) {
19032 for (size_t k = 1; k <= 40; k += 9) {
19033 GemmMicrokernelTester()
19034 .mr(3)
19035 .nr(16)
19036 .kr(8)
19037 .sr(1)
19038 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019039 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019040 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019041 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019042 }
19043 }
19044 }
19045
19046 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
19047 TEST_REQUIRES_X86_AVX512SKX;
19048 for (uint32_t n = 32; n <= 48; n += 16) {
19049 for (size_t k = 1; k <= 40; k += 9) {
19050 GemmMicrokernelTester()
19051 .mr(3)
19052 .nr(16)
19053 .kr(8)
19054 .sr(1)
19055 .m(3)
19056 .n(n)
19057 .k(k)
19058 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019059 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019060 }
19061 }
19062 }
19063
19064 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_a) {
19065 TEST_REQUIRES_X86_AVX512SKX;
19066 for (uint32_t n = 32; n <= 48; n += 16) {
19067 for (size_t k = 1; k <= 40; k += 9) {
19068 GemmMicrokernelTester()
19069 .mr(3)
19070 .nr(16)
19071 .kr(8)
19072 .sr(1)
19073 .m(3)
19074 .n(n)
19075 .k(k)
19076 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019077 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019078 }
19079 }
19080 }
19081
19082 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
19083 TEST_REQUIRES_X86_AVX512SKX;
19084 for (uint32_t n = 32; n <= 48; n += 16) {
19085 for (size_t k = 1; k <= 40; k += 9) {
19086 for (uint32_t m = 1; m <= 3; m++) {
19087 GemmMicrokernelTester()
19088 .mr(3)
19089 .nr(16)
19090 .kr(8)
19091 .sr(1)
19092 .m(m)
19093 .n(n)
19094 .k(k)
19095 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019096 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019097 }
19098 }
19099 }
19100 }
19101
19102 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
19103 TEST_REQUIRES_X86_AVX512SKX;
19104 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019105 for (uint32_t n = 1; n <= 16; n++) {
19106 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019107 GemmMicrokernelTester()
19108 .mr(3)
19109 .nr(16)
19110 .kr(8)
19111 .sr(1)
19112 .m(m)
19113 .n(n)
19114 .k(k)
19115 .cm_stride(19)
19116 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019117 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019118 }
19119 }
19120 }
19121 }
19122
19123 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
19124 TEST_REQUIRES_X86_AVX512SKX;
19125 GemmMicrokernelTester()
19126 .mr(3)
19127 .nr(16)
19128 .kr(8)
19129 .sr(1)
19130 .m(3)
19131 .n(16)
19132 .k(8)
19133 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019134 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019135 }
19136
19137 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
19138 TEST_REQUIRES_X86_AVX512SKX;
19139 GemmMicrokernelTester()
19140 .mr(3)
19141 .nr(16)
19142 .kr(8)
19143 .sr(1)
19144 .m(3)
19145 .n(16)
19146 .k(8)
19147 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019149 }
19150
19151 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
19152 TEST_REQUIRES_X86_AVX512SKX;
19153 GemmMicrokernelTester()
19154 .mr(3)
19155 .nr(16)
19156 .kr(8)
19157 .sr(1)
19158 .m(3)
19159 .n(16)
19160 .k(8)
19161 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019162 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019163 }
19164
19165 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_a_zero_point) {
19166 TEST_REQUIRES_X86_AVX512SKX;
19167 for (size_t k = 1; k <= 40; k += 9) {
19168 GemmMicrokernelTester()
19169 .mr(3)
19170 .nr(16)
19171 .kr(8)
19172 .sr(1)
19173 .m(3)
19174 .n(16)
19175 .k(k)
19176 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019177 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019178 }
19179 }
19180
19181 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_b_zero_point) {
19182 TEST_REQUIRES_X86_AVX512SKX;
19183 for (size_t k = 1; k <= 40; k += 9) {
19184 GemmMicrokernelTester()
19185 .mr(3)
19186 .nr(16)
19187 .kr(8)
19188 .sr(1)
19189 .m(3)
19190 .n(16)
19191 .k(k)
19192 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019193 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019194 }
19195 }
19196
19197 TEST(QU8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_zero_point) {
19198 TEST_REQUIRES_X86_AVX512SKX;
19199 for (size_t k = 1; k <= 40; k += 9) {
19200 GemmMicrokernelTester()
19201 .mr(3)
19202 .nr(16)
19203 .kr(8)
19204 .sr(1)
19205 .m(3)
19206 .n(16)
19207 .k(k)
19208 .a_zero_point(0)
19209 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019210 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019211 }
19212 }
19213#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19214
19215
19216#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19217 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
19218 GemmMicrokernelTester()
19219 .mr(2)
19220 .nr(4)
19221 .kr(2)
19222 .sr(1)
19223 .m(2)
19224 .n(4)
19225 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019226 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019227 }
19228
19229 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
19230 GemmMicrokernelTester()
19231 .mr(2)
19232 .nr(4)
19233 .kr(2)
19234 .sr(1)
19235 .m(2)
19236 .n(4)
19237 .k(8)
19238 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019239 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019240 }
19241
19242 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
19243 GemmMicrokernelTester()
19244 .mr(2)
19245 .nr(4)
19246 .kr(2)
19247 .sr(1)
19248 .m(2)
19249 .n(4)
19250 .k(8)
19251 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019252 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019253 }
19254
19255 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019256 for (uint32_t n = 1; n <= 4; n++) {
19257 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019258 GemmMicrokernelTester()
19259 .mr(2)
19260 .nr(4)
19261 .kr(2)
19262 .sr(1)
19263 .m(m)
19264 .n(n)
19265 .k(8)
19266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019267 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019268 }
19269 }
19270 }
19271
19272 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
19273 for (uint32_t m = 1; m <= 2; m++) {
19274 GemmMicrokernelTester()
19275 .mr(2)
19276 .nr(4)
19277 .kr(2)
19278 .sr(1)
19279 .m(m)
19280 .n(4)
19281 .k(8)
19282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019283 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019284 }
19285 }
19286
19287 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
19288 for (uint32_t n = 1; n <= 4; n++) {
19289 GemmMicrokernelTester()
19290 .mr(2)
19291 .nr(4)
19292 .kr(2)
19293 .sr(1)
19294 .m(2)
19295 .n(n)
19296 .k(8)
19297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019298 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019299 }
19300 }
19301
19302 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
19303 for (size_t k = 1; k < 8; k++) {
19304 GemmMicrokernelTester()
19305 .mr(2)
19306 .nr(4)
19307 .kr(2)
19308 .sr(1)
19309 .m(2)
19310 .n(4)
19311 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019313 }
19314 }
19315
19316 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
19317 for (size_t k = 1; k < 8; k++) {
19318 GemmMicrokernelTester()
19319 .mr(2)
19320 .nr(4)
19321 .kr(2)
19322 .sr(1)
19323 .m(2)
19324 .n(4)
19325 .k(k)
19326 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019327 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019328 }
19329 }
19330
19331 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
19332 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019333 for (uint32_t n = 1; n <= 4; n++) {
19334 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019335 GemmMicrokernelTester()
19336 .mr(2)
19337 .nr(4)
19338 .kr(2)
19339 .sr(1)
19340 .m(m)
19341 .n(n)
19342 .k(k)
19343 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019344 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019345 }
19346 }
19347 }
19348 }
19349
19350 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
19351 for (size_t k = 9; k < 16; k++) {
19352 GemmMicrokernelTester()
19353 .mr(2)
19354 .nr(4)
19355 .kr(2)
19356 .sr(1)
19357 .m(2)
19358 .n(4)
19359 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019360 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019361 }
19362 }
19363
19364 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
19365 for (size_t k = 9; k < 16; k++) {
19366 GemmMicrokernelTester()
19367 .mr(2)
19368 .nr(4)
19369 .kr(2)
19370 .sr(1)
19371 .m(2)
19372 .n(4)
19373 .k(k)
19374 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019375 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019376 }
19377 }
19378
19379 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
19380 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019381 for (uint32_t n = 1; n <= 4; n++) {
19382 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019383 GemmMicrokernelTester()
19384 .mr(2)
19385 .nr(4)
19386 .kr(2)
19387 .sr(1)
19388 .m(m)
19389 .n(n)
19390 .k(k)
19391 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019392 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019393 }
19394 }
19395 }
19396 }
19397
19398 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
19399 for (size_t k = 16; k <= 80; k += 8) {
19400 GemmMicrokernelTester()
19401 .mr(2)
19402 .nr(4)
19403 .kr(2)
19404 .sr(1)
19405 .m(2)
19406 .n(4)
19407 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019408 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019409 }
19410 }
19411
19412 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
19413 for (size_t k = 16; k <= 80; k += 8) {
19414 GemmMicrokernelTester()
19415 .mr(2)
19416 .nr(4)
19417 .kr(2)
19418 .sr(1)
19419 .m(2)
19420 .n(4)
19421 .k(k)
19422 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019423 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019424 }
19425 }
19426
19427 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
19428 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019429 for (uint32_t n = 1; n <= 4; n++) {
19430 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019431 GemmMicrokernelTester()
19432 .mr(2)
19433 .nr(4)
19434 .kr(2)
19435 .sr(1)
19436 .m(m)
19437 .n(n)
19438 .k(k)
19439 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019440 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019441 }
19442 }
19443 }
19444 }
19445
19446 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
19447 for (uint32_t n = 5; n < 8; n++) {
19448 for (size_t k = 1; k <= 40; k += 9) {
19449 GemmMicrokernelTester()
19450 .mr(2)
19451 .nr(4)
19452 .kr(2)
19453 .sr(1)
19454 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019455 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019457 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019458 }
19459 }
19460 }
19461
19462 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
19463 for (uint32_t n = 5; n < 8; n++) {
19464 for (size_t k = 1; k <= 40; k += 9) {
19465 GemmMicrokernelTester()
19466 .mr(2)
19467 .nr(4)
19468 .kr(2)
19469 .sr(1)
19470 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019471 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019472 .k(k)
19473 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019474 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019475 }
19476 }
19477 }
19478
19479 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
19480 for (uint32_t n = 5; n < 8; n++) {
19481 for (size_t k = 1; k <= 40; k += 9) {
19482 GemmMicrokernelTester()
19483 .mr(2)
19484 .nr(4)
19485 .kr(2)
19486 .sr(1)
19487 .m(2)
19488 .n(n)
19489 .k(k)
19490 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019491 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019492 }
19493 }
19494 }
19495
19496 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
19497 for (uint32_t n = 5; n < 8; n++) {
19498 for (size_t k = 1; k <= 40; k += 9) {
19499 for (uint32_t m = 1; m <= 2; m++) {
19500 GemmMicrokernelTester()
19501 .mr(2)
19502 .nr(4)
19503 .kr(2)
19504 .sr(1)
19505 .m(m)
19506 .n(n)
19507 .k(k)
19508 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019509 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019510 }
19511 }
19512 }
19513 }
19514
19515 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
19516 for (uint32_t n = 8; n <= 12; n += 4) {
19517 for (size_t k = 1; k <= 40; k += 9) {
19518 GemmMicrokernelTester()
19519 .mr(2)
19520 .nr(4)
19521 .kr(2)
19522 .sr(1)
19523 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019524 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019525 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019526 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019527 }
19528 }
19529 }
19530
19531 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
19532 for (uint32_t n = 8; n <= 12; n += 4) {
19533 for (size_t k = 1; k <= 40; k += 9) {
19534 GemmMicrokernelTester()
19535 .mr(2)
19536 .nr(4)
19537 .kr(2)
19538 .sr(1)
19539 .m(2)
19540 .n(n)
19541 .k(k)
19542 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019544 }
19545 }
19546 }
19547
19548 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
19549 for (uint32_t n = 8; n <= 12; n += 4) {
19550 for (size_t k = 1; k <= 40; k += 9) {
19551 GemmMicrokernelTester()
19552 .mr(2)
19553 .nr(4)
19554 .kr(2)
19555 .sr(1)
19556 .m(2)
19557 .n(n)
19558 .k(k)
19559 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019561 }
19562 }
19563 }
19564
19565 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
19566 for (uint32_t n = 8; n <= 12; n += 4) {
19567 for (size_t k = 1; k <= 40; k += 9) {
19568 for (uint32_t m = 1; m <= 2; m++) {
19569 GemmMicrokernelTester()
19570 .mr(2)
19571 .nr(4)
19572 .kr(2)
19573 .sr(1)
19574 .m(m)
19575 .n(n)
19576 .k(k)
19577 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019578 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019579 }
19580 }
19581 }
19582 }
19583
19584 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
19585 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019586 for (uint32_t n = 1; n <= 4; n++) {
19587 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019588 GemmMicrokernelTester()
19589 .mr(2)
19590 .nr(4)
19591 .kr(2)
19592 .sr(1)
19593 .m(m)
19594 .n(n)
19595 .k(k)
19596 .cm_stride(7)
19597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019598 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019599 }
19600 }
19601 }
19602 }
19603
19604 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
19605 GemmMicrokernelTester()
19606 .mr(2)
19607 .nr(4)
19608 .kr(2)
19609 .sr(1)
19610 .m(2)
19611 .n(4)
19612 .k(8)
19613 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019614 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019615 }
19616
19617 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
19618 GemmMicrokernelTester()
19619 .mr(2)
19620 .nr(4)
19621 .kr(2)
19622 .sr(1)
19623 .m(2)
19624 .n(4)
19625 .k(8)
19626 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019627 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019628 }
19629
19630 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
19631 GemmMicrokernelTester()
19632 .mr(2)
19633 .nr(4)
19634 .kr(2)
19635 .sr(1)
19636 .m(2)
19637 .n(4)
19638 .k(8)
19639 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019640 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019641 }
19642
19643 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
19644 for (size_t k = 1; k <= 40; k += 9) {
19645 GemmMicrokernelTester()
19646 .mr(2)
19647 .nr(4)
19648 .kr(2)
19649 .sr(1)
19650 .m(2)
19651 .n(4)
19652 .k(k)
19653 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019654 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019655 }
19656 }
19657
19658 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
19659 for (size_t k = 1; k <= 40; k += 9) {
19660 GemmMicrokernelTester()
19661 .mr(2)
19662 .nr(4)
19663 .kr(2)
19664 .sr(1)
19665 .m(2)
19666 .n(4)
19667 .k(k)
19668 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019669 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019670 }
19671 }
19672
19673 TEST(QU8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
19674 for (size_t k = 1; k <= 40; k += 9) {
19675 GemmMicrokernelTester()
19676 .mr(2)
19677 .nr(4)
19678 .kr(2)
19679 .sr(1)
19680 .m(2)
19681 .n(4)
19682 .k(k)
19683 .a_zero_point(0)
19684 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019685 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019686 }
19687 }
19688#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19689
19690
19691#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
19692 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
19693 GemmMicrokernelTester()
19694 .mr(4)
19695 .nr(4)
19696 .kr(2)
19697 .sr(1)
19698 .m(4)
19699 .n(4)
19700 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019701 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019702 }
19703
19704 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
19705 GemmMicrokernelTester()
19706 .mr(4)
19707 .nr(4)
19708 .kr(2)
19709 .sr(1)
19710 .m(4)
19711 .n(4)
19712 .k(8)
19713 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019714 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019715 }
19716
19717 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
19718 GemmMicrokernelTester()
19719 .mr(4)
19720 .nr(4)
19721 .kr(2)
19722 .sr(1)
19723 .m(4)
19724 .n(4)
19725 .k(8)
19726 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019727 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019728 }
19729
19730 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019731 for (uint32_t n = 1; n <= 4; n++) {
19732 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019733 GemmMicrokernelTester()
19734 .mr(4)
19735 .nr(4)
19736 .kr(2)
19737 .sr(1)
19738 .m(m)
19739 .n(n)
19740 .k(8)
19741 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019742 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019743 }
19744 }
19745 }
19746
19747 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
19748 for (uint32_t m = 1; m <= 4; m++) {
19749 GemmMicrokernelTester()
19750 .mr(4)
19751 .nr(4)
19752 .kr(2)
19753 .sr(1)
19754 .m(m)
19755 .n(4)
19756 .k(8)
19757 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019758 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019759 }
19760 }
19761
19762 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
19763 for (uint32_t n = 1; n <= 4; n++) {
19764 GemmMicrokernelTester()
19765 .mr(4)
19766 .nr(4)
19767 .kr(2)
19768 .sr(1)
19769 .m(4)
19770 .n(n)
19771 .k(8)
19772 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019774 }
19775 }
19776
19777 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
19778 for (size_t k = 1; k < 8; k++) {
19779 GemmMicrokernelTester()
19780 .mr(4)
19781 .nr(4)
19782 .kr(2)
19783 .sr(1)
19784 .m(4)
19785 .n(4)
19786 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019787 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019788 }
19789 }
19790
19791 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
19792 for (size_t k = 1; k < 8; k++) {
19793 GemmMicrokernelTester()
19794 .mr(4)
19795 .nr(4)
19796 .kr(2)
19797 .sr(1)
19798 .m(4)
19799 .n(4)
19800 .k(k)
19801 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019802 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019803 }
19804 }
19805
19806 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
19807 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019808 for (uint32_t n = 1; n <= 4; n++) {
19809 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019810 GemmMicrokernelTester()
19811 .mr(4)
19812 .nr(4)
19813 .kr(2)
19814 .sr(1)
19815 .m(m)
19816 .n(n)
19817 .k(k)
19818 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019819 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019820 }
19821 }
19822 }
19823 }
19824
19825 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
19826 for (size_t k = 9; k < 16; k++) {
19827 GemmMicrokernelTester()
19828 .mr(4)
19829 .nr(4)
19830 .kr(2)
19831 .sr(1)
19832 .m(4)
19833 .n(4)
19834 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019835 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019836 }
19837 }
19838
19839 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
19840 for (size_t k = 9; k < 16; k++) {
19841 GemmMicrokernelTester()
19842 .mr(4)
19843 .nr(4)
19844 .kr(2)
19845 .sr(1)
19846 .m(4)
19847 .n(4)
19848 .k(k)
19849 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019850 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019851 }
19852 }
19853
19854 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
19855 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019856 for (uint32_t n = 1; n <= 4; n++) {
19857 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019858 GemmMicrokernelTester()
19859 .mr(4)
19860 .nr(4)
19861 .kr(2)
19862 .sr(1)
19863 .m(m)
19864 .n(n)
19865 .k(k)
19866 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019867 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019868 }
19869 }
19870 }
19871 }
19872
19873 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
19874 for (size_t k = 16; k <= 80; k += 8) {
19875 GemmMicrokernelTester()
19876 .mr(4)
19877 .nr(4)
19878 .kr(2)
19879 .sr(1)
19880 .m(4)
19881 .n(4)
19882 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019883 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019884 }
19885 }
19886
19887 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
19888 for (size_t k = 16; k <= 80; k += 8) {
19889 GemmMicrokernelTester()
19890 .mr(4)
19891 .nr(4)
19892 .kr(2)
19893 .sr(1)
19894 .m(4)
19895 .n(4)
19896 .k(k)
19897 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019898 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019899 }
19900 }
19901
19902 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
19903 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019904 for (uint32_t n = 1; n <= 4; n++) {
19905 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019906 GemmMicrokernelTester()
19907 .mr(4)
19908 .nr(4)
19909 .kr(2)
19910 .sr(1)
19911 .m(m)
19912 .n(n)
19913 .k(k)
19914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019915 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019916 }
19917 }
19918 }
19919 }
19920
19921 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
19922 for (uint32_t n = 5; n < 8; n++) {
19923 for (size_t k = 1; k <= 40; k += 9) {
19924 GemmMicrokernelTester()
19925 .mr(4)
19926 .nr(4)
19927 .kr(2)
19928 .sr(1)
19929 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019930 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019933 }
19934 }
19935 }
19936
19937 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
19938 for (uint32_t n = 5; n < 8; n++) {
19939 for (size_t k = 1; k <= 40; k += 9) {
19940 GemmMicrokernelTester()
19941 .mr(4)
19942 .nr(4)
19943 .kr(2)
19944 .sr(1)
19945 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019947 .k(k)
19948 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019949 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019950 }
19951 }
19952 }
19953
19954 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
19955 for (uint32_t n = 5; n < 8; n++) {
19956 for (size_t k = 1; k <= 40; k += 9) {
19957 GemmMicrokernelTester()
19958 .mr(4)
19959 .nr(4)
19960 .kr(2)
19961 .sr(1)
19962 .m(4)
19963 .n(n)
19964 .k(k)
19965 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019966 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019967 }
19968 }
19969 }
19970
19971 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
19972 for (uint32_t n = 5; n < 8; n++) {
19973 for (size_t k = 1; k <= 40; k += 9) {
19974 for (uint32_t m = 1; m <= 4; m++) {
19975 GemmMicrokernelTester()
19976 .mr(4)
19977 .nr(4)
19978 .kr(2)
19979 .sr(1)
19980 .m(m)
19981 .n(n)
19982 .k(k)
19983 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019984 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019985 }
19986 }
19987 }
19988 }
19989
19990 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
19991 for (uint32_t n = 8; n <= 12; n += 4) {
19992 for (size_t k = 1; k <= 40; k += 9) {
19993 GemmMicrokernelTester()
19994 .mr(4)
19995 .nr(4)
19996 .kr(2)
19997 .sr(1)
19998 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019999 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020000 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020001 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020002 }
20003 }
20004 }
20005
20006 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
20007 for (uint32_t n = 8; n <= 12; n += 4) {
20008 for (size_t k = 1; k <= 40; k += 9) {
20009 GemmMicrokernelTester()
20010 .mr(4)
20011 .nr(4)
20012 .kr(2)
20013 .sr(1)
20014 .m(4)
20015 .n(n)
20016 .k(k)
20017 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020018 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020019 }
20020 }
20021 }
20022
20023 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
20024 for (uint32_t n = 8; n <= 12; n += 4) {
20025 for (size_t k = 1; k <= 40; k += 9) {
20026 GemmMicrokernelTester()
20027 .mr(4)
20028 .nr(4)
20029 .kr(2)
20030 .sr(1)
20031 .m(4)
20032 .n(n)
20033 .k(k)
20034 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020035 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020036 }
20037 }
20038 }
20039
20040 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
20041 for (uint32_t n = 8; n <= 12; n += 4) {
20042 for (size_t k = 1; k <= 40; k += 9) {
20043 for (uint32_t m = 1; m <= 4; m++) {
20044 GemmMicrokernelTester()
20045 .mr(4)
20046 .nr(4)
20047 .kr(2)
20048 .sr(1)
20049 .m(m)
20050 .n(n)
20051 .k(k)
20052 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020053 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020054 }
20055 }
20056 }
20057 }
20058
20059 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
20060 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020061 for (uint32_t n = 1; n <= 4; n++) {
20062 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020063 GemmMicrokernelTester()
20064 .mr(4)
20065 .nr(4)
20066 .kr(2)
20067 .sr(1)
20068 .m(m)
20069 .n(n)
20070 .k(k)
20071 .cm_stride(7)
20072 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020073 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020074 }
20075 }
20076 }
20077 }
20078
20079 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
20080 GemmMicrokernelTester()
20081 .mr(4)
20082 .nr(4)
20083 .kr(2)
20084 .sr(1)
20085 .m(4)
20086 .n(4)
20087 .k(8)
20088 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020089 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020090 }
20091
20092 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
20093 GemmMicrokernelTester()
20094 .mr(4)
20095 .nr(4)
20096 .kr(2)
20097 .sr(1)
20098 .m(4)
20099 .n(4)
20100 .k(8)
20101 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020103 }
20104
20105 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
20106 GemmMicrokernelTester()
20107 .mr(4)
20108 .nr(4)
20109 .kr(2)
20110 .sr(1)
20111 .m(4)
20112 .n(4)
20113 .k(8)
20114 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020115 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020116 }
20117
20118 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
20119 for (size_t k = 1; k <= 40; k += 9) {
20120 GemmMicrokernelTester()
20121 .mr(4)
20122 .nr(4)
20123 .kr(2)
20124 .sr(1)
20125 .m(4)
20126 .n(4)
20127 .k(k)
20128 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020129 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020130 }
20131 }
20132
20133 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
20134 for (size_t k = 1; k <= 40; k += 9) {
20135 GemmMicrokernelTester()
20136 .mr(4)
20137 .nr(4)
20138 .kr(2)
20139 .sr(1)
20140 .m(4)
20141 .n(4)
20142 .k(k)
20143 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020144 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020145 }
20146 }
20147
20148 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
20149 for (size_t k = 1; k <= 40; k += 9) {
20150 GemmMicrokernelTester()
20151 .mr(4)
20152 .nr(4)
20153 .kr(2)
20154 .sr(1)
20155 .m(4)
20156 .n(4)
20157 .k(k)
20158 .a_zero_point(0)
20159 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020160 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020161 }
20162 }
20163#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20164
20165
20166#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20167 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
20168 GemmMicrokernelTester()
20169 .mr(4)
20170 .nr(4)
20171 .kr(2)
20172 .sr(1)
20173 .m(4)
20174 .n(4)
20175 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020176 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020177 }
20178
20179 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
20180 GemmMicrokernelTester()
20181 .mr(4)
20182 .nr(4)
20183 .kr(2)
20184 .sr(1)
20185 .m(4)
20186 .n(4)
20187 .k(8)
20188 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020189 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020190 }
20191
20192 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
20193 GemmMicrokernelTester()
20194 .mr(4)
20195 .nr(4)
20196 .kr(2)
20197 .sr(1)
20198 .m(4)
20199 .n(4)
20200 .k(8)
20201 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020202 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020203 }
20204
20205 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020206 for (uint32_t n = 1; n <= 4; n++) {
20207 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020208 GemmMicrokernelTester()
20209 .mr(4)
20210 .nr(4)
20211 .kr(2)
20212 .sr(1)
20213 .m(m)
20214 .n(n)
20215 .k(8)
20216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020217 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020218 }
20219 }
20220 }
20221
20222 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
20223 for (uint32_t m = 1; m <= 4; m++) {
20224 GemmMicrokernelTester()
20225 .mr(4)
20226 .nr(4)
20227 .kr(2)
20228 .sr(1)
20229 .m(m)
20230 .n(4)
20231 .k(8)
20232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020233 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020234 }
20235 }
20236
20237 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
20238 for (uint32_t n = 1; n <= 4; n++) {
20239 GemmMicrokernelTester()
20240 .mr(4)
20241 .nr(4)
20242 .kr(2)
20243 .sr(1)
20244 .m(4)
20245 .n(n)
20246 .k(8)
20247 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020248 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020249 }
20250 }
20251
20252 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
20253 for (size_t k = 1; k < 8; k++) {
20254 GemmMicrokernelTester()
20255 .mr(4)
20256 .nr(4)
20257 .kr(2)
20258 .sr(1)
20259 .m(4)
20260 .n(4)
20261 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020262 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020263 }
20264 }
20265
20266 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
20267 for (size_t k = 1; k < 8; k++) {
20268 GemmMicrokernelTester()
20269 .mr(4)
20270 .nr(4)
20271 .kr(2)
20272 .sr(1)
20273 .m(4)
20274 .n(4)
20275 .k(k)
20276 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020277 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020278 }
20279 }
20280
20281 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
20282 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020283 for (uint32_t n = 1; n <= 4; n++) {
20284 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020285 GemmMicrokernelTester()
20286 .mr(4)
20287 .nr(4)
20288 .kr(2)
20289 .sr(1)
20290 .m(m)
20291 .n(n)
20292 .k(k)
20293 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020294 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020295 }
20296 }
20297 }
20298 }
20299
20300 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
20301 for (size_t k = 9; k < 16; k++) {
20302 GemmMicrokernelTester()
20303 .mr(4)
20304 .nr(4)
20305 .kr(2)
20306 .sr(1)
20307 .m(4)
20308 .n(4)
20309 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020310 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020311 }
20312 }
20313
20314 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
20315 for (size_t k = 9; k < 16; k++) {
20316 GemmMicrokernelTester()
20317 .mr(4)
20318 .nr(4)
20319 .kr(2)
20320 .sr(1)
20321 .m(4)
20322 .n(4)
20323 .k(k)
20324 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020325 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020326 }
20327 }
20328
20329 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
20330 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020331 for (uint32_t n = 1; n <= 4; n++) {
20332 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020333 GemmMicrokernelTester()
20334 .mr(4)
20335 .nr(4)
20336 .kr(2)
20337 .sr(1)
20338 .m(m)
20339 .n(n)
20340 .k(k)
20341 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020342 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020343 }
20344 }
20345 }
20346 }
20347
20348 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
20349 for (size_t k = 16; k <= 80; k += 8) {
20350 GemmMicrokernelTester()
20351 .mr(4)
20352 .nr(4)
20353 .kr(2)
20354 .sr(1)
20355 .m(4)
20356 .n(4)
20357 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020358 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020359 }
20360 }
20361
20362 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
20363 for (size_t k = 16; k <= 80; k += 8) {
20364 GemmMicrokernelTester()
20365 .mr(4)
20366 .nr(4)
20367 .kr(2)
20368 .sr(1)
20369 .m(4)
20370 .n(4)
20371 .k(k)
20372 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020373 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020374 }
20375 }
20376
20377 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
20378 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020379 for (uint32_t n = 1; n <= 4; n++) {
20380 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020381 GemmMicrokernelTester()
20382 .mr(4)
20383 .nr(4)
20384 .kr(2)
20385 .sr(1)
20386 .m(m)
20387 .n(n)
20388 .k(k)
20389 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020390 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020391 }
20392 }
20393 }
20394 }
20395
20396 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
20397 for (uint32_t n = 5; n < 8; n++) {
20398 for (size_t k = 1; k <= 40; k += 9) {
20399 GemmMicrokernelTester()
20400 .mr(4)
20401 .nr(4)
20402 .kr(2)
20403 .sr(1)
20404 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020405 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020406 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020407 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020408 }
20409 }
20410 }
20411
20412 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
20413 for (uint32_t n = 5; n < 8; n++) {
20414 for (size_t k = 1; k <= 40; k += 9) {
20415 GemmMicrokernelTester()
20416 .mr(4)
20417 .nr(4)
20418 .kr(2)
20419 .sr(1)
20420 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020421 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020422 .k(k)
20423 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020424 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020425 }
20426 }
20427 }
20428
20429 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
20430 for (uint32_t n = 5; n < 8; n++) {
20431 for (size_t k = 1; k <= 40; k += 9) {
20432 GemmMicrokernelTester()
20433 .mr(4)
20434 .nr(4)
20435 .kr(2)
20436 .sr(1)
20437 .m(4)
20438 .n(n)
20439 .k(k)
20440 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020441 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020442 }
20443 }
20444 }
20445
20446 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
20447 for (uint32_t n = 5; n < 8; n++) {
20448 for (size_t k = 1; k <= 40; k += 9) {
20449 for (uint32_t m = 1; m <= 4; m++) {
20450 GemmMicrokernelTester()
20451 .mr(4)
20452 .nr(4)
20453 .kr(2)
20454 .sr(1)
20455 .m(m)
20456 .n(n)
20457 .k(k)
20458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020459 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020460 }
20461 }
20462 }
20463 }
20464
20465 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
20466 for (uint32_t n = 8; n <= 12; n += 4) {
20467 for (size_t k = 1; k <= 40; k += 9) {
20468 GemmMicrokernelTester()
20469 .mr(4)
20470 .nr(4)
20471 .kr(2)
20472 .sr(1)
20473 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020474 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020476 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020477 }
20478 }
20479 }
20480
20481 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
20482 for (uint32_t n = 8; n <= 12; n += 4) {
20483 for (size_t k = 1; k <= 40; k += 9) {
20484 GemmMicrokernelTester()
20485 .mr(4)
20486 .nr(4)
20487 .kr(2)
20488 .sr(1)
20489 .m(4)
20490 .n(n)
20491 .k(k)
20492 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020493 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020494 }
20495 }
20496 }
20497
20498 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
20499 for (uint32_t n = 8; n <= 12; n += 4) {
20500 for (size_t k = 1; k <= 40; k += 9) {
20501 GemmMicrokernelTester()
20502 .mr(4)
20503 .nr(4)
20504 .kr(2)
20505 .sr(1)
20506 .m(4)
20507 .n(n)
20508 .k(k)
20509 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020510 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020511 }
20512 }
20513 }
20514
20515 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
20516 for (uint32_t n = 8; n <= 12; n += 4) {
20517 for (size_t k = 1; k <= 40; k += 9) {
20518 for (uint32_t m = 1; m <= 4; m++) {
20519 GemmMicrokernelTester()
20520 .mr(4)
20521 .nr(4)
20522 .kr(2)
20523 .sr(1)
20524 .m(m)
20525 .n(n)
20526 .k(k)
20527 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020528 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020529 }
20530 }
20531 }
20532 }
20533
20534 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
20535 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020536 for (uint32_t n = 1; n <= 4; n++) {
20537 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020538 GemmMicrokernelTester()
20539 .mr(4)
20540 .nr(4)
20541 .kr(2)
20542 .sr(1)
20543 .m(m)
20544 .n(n)
20545 .k(k)
20546 .cm_stride(7)
20547 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020548 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020549 }
20550 }
20551 }
20552 }
20553
20554 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
20555 GemmMicrokernelTester()
20556 .mr(4)
20557 .nr(4)
20558 .kr(2)
20559 .sr(1)
20560 .m(4)
20561 .n(4)
20562 .k(8)
20563 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020564 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020565 }
20566
20567 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
20568 GemmMicrokernelTester()
20569 .mr(4)
20570 .nr(4)
20571 .kr(2)
20572 .sr(1)
20573 .m(4)
20574 .n(4)
20575 .k(8)
20576 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020578 }
20579
20580 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
20581 GemmMicrokernelTester()
20582 .mr(4)
20583 .nr(4)
20584 .kr(2)
20585 .sr(1)
20586 .m(4)
20587 .n(4)
20588 .k(8)
20589 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020590 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020591 }
20592
20593 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
20594 for (size_t k = 1; k <= 40; k += 9) {
20595 GemmMicrokernelTester()
20596 .mr(4)
20597 .nr(4)
20598 .kr(2)
20599 .sr(1)
20600 .m(4)
20601 .n(4)
20602 .k(k)
20603 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020604 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020605 }
20606 }
20607
20608 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
20609 for (size_t k = 1; k <= 40; k += 9) {
20610 GemmMicrokernelTester()
20611 .mr(4)
20612 .nr(4)
20613 .kr(2)
20614 .sr(1)
20615 .m(4)
20616 .n(4)
20617 .k(k)
20618 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020619 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020620 }
20621 }
20622
20623 TEST(QU8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
20624 for (size_t k = 1; k <= 40; k += 9) {
20625 GemmMicrokernelTester()
20626 .mr(4)
20627 .nr(4)
20628 .kr(2)
20629 .sr(1)
20630 .m(4)
20631 .n(4)
20632 .k(k)
20633 .a_zero_point(0)
20634 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020635 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020636 }
20637 }
20638#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
20639
20640
20641#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan348c3772022-02-01 00:36:50 -080020642 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
20643 GemmMicrokernelTester()
20644 .mr(1)
20645 .nr(4)
20646 .kr(2)
20647 .sr(4)
20648 .m(1)
20649 .n(4)
20650 .k(8)
20651 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20652 }
20653
20654 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
20655 GemmMicrokernelTester()
20656 .mr(1)
20657 .nr(4)
20658 .kr(2)
20659 .sr(4)
20660 .m(1)
20661 .n(4)
20662 .k(8)
20663 .cn_stride(7)
20664 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20665 }
20666
20667 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
20668 GemmMicrokernelTester()
20669 .mr(1)
20670 .nr(4)
20671 .kr(2)
20672 .sr(4)
20673 .m(1)
20674 .n(4)
20675 .k(8)
20676 .a_stride(11)
20677 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20678 }
20679
20680 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
20681 for (uint32_t n = 1; n <= 4; n++) {
20682 for (uint32_t m = 1; m <= 1; m++) {
20683 GemmMicrokernelTester()
20684 .mr(1)
20685 .nr(4)
20686 .kr(2)
20687 .sr(4)
20688 .m(m)
20689 .n(n)
20690 .k(8)
20691 .iterations(1)
20692 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20693 }
20694 }
20695 }
20696
20697 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
20698 for (uint32_t m = 1; m <= 1; m++) {
20699 GemmMicrokernelTester()
20700 .mr(1)
20701 .nr(4)
20702 .kr(2)
20703 .sr(4)
20704 .m(m)
20705 .n(4)
20706 .k(8)
20707 .iterations(1)
20708 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20709 }
20710 }
20711
20712 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
20713 for (uint32_t n = 1; n <= 4; n++) {
20714 GemmMicrokernelTester()
20715 .mr(1)
20716 .nr(4)
20717 .kr(2)
20718 .sr(4)
20719 .m(1)
20720 .n(n)
20721 .k(8)
20722 .iterations(1)
20723 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20724 }
20725 }
20726
20727 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
20728 for (size_t k = 1; k < 8; k++) {
20729 GemmMicrokernelTester()
20730 .mr(1)
20731 .nr(4)
20732 .kr(2)
20733 .sr(4)
20734 .m(1)
20735 .n(4)
20736 .k(k)
20737 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20738 }
20739 }
20740
20741 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
20742 for (size_t k = 1; k < 8; k++) {
20743 GemmMicrokernelTester()
20744 .mr(1)
20745 .nr(4)
20746 .kr(2)
20747 .sr(4)
20748 .m(1)
20749 .n(4)
20750 .k(k)
20751 .a_stride(11)
20752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20753 }
20754 }
20755
20756 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
20757 for (size_t k = 1; k < 8; k++) {
20758 for (uint32_t n = 1; n <= 4; n++) {
20759 for (uint32_t m = 1; m <= 1; m++) {
20760 GemmMicrokernelTester()
20761 .mr(1)
20762 .nr(4)
20763 .kr(2)
20764 .sr(4)
20765 .m(m)
20766 .n(n)
20767 .k(k)
20768 .iterations(1)
20769 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20770 }
20771 }
20772 }
20773 }
20774
20775 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
20776 for (size_t k = 9; k < 16; k++) {
20777 GemmMicrokernelTester()
20778 .mr(1)
20779 .nr(4)
20780 .kr(2)
20781 .sr(4)
20782 .m(1)
20783 .n(4)
20784 .k(k)
20785 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20786 }
20787 }
20788
20789 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
20790 for (size_t k = 9; k < 16; k++) {
20791 GemmMicrokernelTester()
20792 .mr(1)
20793 .nr(4)
20794 .kr(2)
20795 .sr(4)
20796 .m(1)
20797 .n(4)
20798 .k(k)
20799 .a_stride(19)
20800 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20801 }
20802 }
20803
20804 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
20805 for (size_t k = 9; k < 16; k++) {
20806 for (uint32_t n = 1; n <= 4; n++) {
20807 for (uint32_t m = 1; m <= 1; m++) {
20808 GemmMicrokernelTester()
20809 .mr(1)
20810 .nr(4)
20811 .kr(2)
20812 .sr(4)
20813 .m(m)
20814 .n(n)
20815 .k(k)
20816 .iterations(1)
20817 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20818 }
20819 }
20820 }
20821 }
20822
20823 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
20824 for (size_t k = 16; k <= 80; k += 8) {
20825 GemmMicrokernelTester()
20826 .mr(1)
20827 .nr(4)
20828 .kr(2)
20829 .sr(4)
20830 .m(1)
20831 .n(4)
20832 .k(k)
20833 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20834 }
20835 }
20836
20837 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
20838 for (size_t k = 16; k <= 80; k += 8) {
20839 GemmMicrokernelTester()
20840 .mr(1)
20841 .nr(4)
20842 .kr(2)
20843 .sr(4)
20844 .m(1)
20845 .n(4)
20846 .k(k)
20847 .a_stride(83)
20848 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20849 }
20850 }
20851
20852 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
20853 for (size_t k = 16; k <= 80; k += 8) {
20854 for (uint32_t n = 1; n <= 4; n++) {
20855 for (uint32_t m = 1; m <= 1; m++) {
20856 GemmMicrokernelTester()
20857 .mr(1)
20858 .nr(4)
20859 .kr(2)
20860 .sr(4)
20861 .m(m)
20862 .n(n)
20863 .k(k)
20864 .iterations(1)
20865 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20866 }
20867 }
20868 }
20869 }
20870
20871 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
20872 for (uint32_t n = 5; n < 8; n++) {
20873 for (size_t k = 1; k <= 40; k += 9) {
20874 GemmMicrokernelTester()
20875 .mr(1)
20876 .nr(4)
20877 .kr(2)
20878 .sr(4)
20879 .m(1)
20880 .n(n)
20881 .k(k)
20882 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20883 }
20884 }
20885 }
20886
20887 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
20888 for (uint32_t n = 5; n < 8; n++) {
20889 for (size_t k = 1; k <= 40; k += 9) {
20890 GemmMicrokernelTester()
20891 .mr(1)
20892 .nr(4)
20893 .kr(2)
20894 .sr(4)
20895 .m(1)
20896 .n(n)
20897 .k(k)
20898 .cn_stride(7)
20899 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20900 }
20901 }
20902 }
20903
20904 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
20905 for (uint32_t n = 5; n < 8; n++) {
20906 for (size_t k = 1; k <= 40; k += 9) {
20907 GemmMicrokernelTester()
20908 .mr(1)
20909 .nr(4)
20910 .kr(2)
20911 .sr(4)
20912 .m(1)
20913 .n(n)
20914 .k(k)
20915 .a_stride(43)
20916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20917 }
20918 }
20919 }
20920
20921 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
20922 for (uint32_t n = 5; n < 8; n++) {
20923 for (size_t k = 1; k <= 40; k += 9) {
20924 for (uint32_t m = 1; m <= 1; m++) {
20925 GemmMicrokernelTester()
20926 .mr(1)
20927 .nr(4)
20928 .kr(2)
20929 .sr(4)
20930 .m(m)
20931 .n(n)
20932 .k(k)
20933 .iterations(1)
20934 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20935 }
20936 }
20937 }
20938 }
20939
20940 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
20941 for (uint32_t n = 8; n <= 12; n += 4) {
20942 for (size_t k = 1; k <= 40; k += 9) {
20943 GemmMicrokernelTester()
20944 .mr(1)
20945 .nr(4)
20946 .kr(2)
20947 .sr(4)
20948 .m(1)
20949 .n(n)
20950 .k(k)
20951 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20952 }
20953 }
20954 }
20955
20956 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
20957 for (uint32_t n = 8; n <= 12; n += 4) {
20958 for (size_t k = 1; k <= 40; k += 9) {
20959 GemmMicrokernelTester()
20960 .mr(1)
20961 .nr(4)
20962 .kr(2)
20963 .sr(4)
20964 .m(1)
20965 .n(n)
20966 .k(k)
20967 .cn_stride(7)
20968 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20969 }
20970 }
20971 }
20972
20973 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
20974 for (uint32_t n = 8; n <= 12; n += 4) {
20975 for (size_t k = 1; k <= 40; k += 9) {
20976 GemmMicrokernelTester()
20977 .mr(1)
20978 .nr(4)
20979 .kr(2)
20980 .sr(4)
20981 .m(1)
20982 .n(n)
20983 .k(k)
20984 .a_stride(43)
20985 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
20986 }
20987 }
20988 }
20989
20990 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
20991 for (uint32_t n = 8; n <= 12; n += 4) {
20992 for (size_t k = 1; k <= 40; k += 9) {
20993 for (uint32_t m = 1; m <= 1; m++) {
20994 GemmMicrokernelTester()
20995 .mr(1)
20996 .nr(4)
20997 .kr(2)
20998 .sr(4)
20999 .m(m)
21000 .n(n)
21001 .k(k)
21002 .iterations(1)
21003 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21004 }
21005 }
21006 }
21007 }
21008
21009 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
21010 for (size_t k = 1; k <= 40; k += 9) {
21011 for (uint32_t n = 1; n <= 4; n++) {
21012 for (uint32_t m = 1; m <= 1; m++) {
21013 GemmMicrokernelTester()
21014 .mr(1)
21015 .nr(4)
21016 .kr(2)
21017 .sr(4)
21018 .m(m)
21019 .n(n)
21020 .k(k)
21021 .cm_stride(7)
21022 .iterations(1)
21023 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21024 }
21025 }
21026 }
21027 }
21028
21029 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
21030 GemmMicrokernelTester()
21031 .mr(1)
21032 .nr(4)
21033 .kr(2)
21034 .sr(4)
21035 .m(1)
21036 .n(4)
21037 .k(8)
21038 .qmin(128)
21039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21040 }
21041
21042 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
21043 GemmMicrokernelTester()
21044 .mr(1)
21045 .nr(4)
21046 .kr(2)
21047 .sr(4)
21048 .m(1)
21049 .n(4)
21050 .k(8)
21051 .qmax(128)
21052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21053 }
21054
21055 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
21056 GemmMicrokernelTester()
21057 .mr(1)
21058 .nr(4)
21059 .kr(2)
21060 .sr(4)
21061 .m(1)
21062 .n(4)
21063 .k(8)
21064 .cm_stride(7)
21065 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21066 }
21067
21068 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
21069 for (size_t k = 1; k <= 40; k += 9) {
21070 GemmMicrokernelTester()
21071 .mr(1)
21072 .nr(4)
21073 .kr(2)
21074 .sr(4)
21075 .m(1)
21076 .n(4)
21077 .k(k)
21078 .a_zero_point(0)
21079 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21080 }
21081 }
21082
21083 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
21084 for (size_t k = 1; k <= 40; k += 9) {
21085 GemmMicrokernelTester()
21086 .mr(1)
21087 .nr(4)
21088 .kr(2)
21089 .sr(4)
21090 .m(1)
21091 .n(4)
21092 .k(k)
21093 .b_zero_point(0)
21094 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21095 }
21096 }
21097
21098 TEST(QU8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
21099 for (size_t k = 1; k <= 40; k += 9) {
21100 GemmMicrokernelTester()
21101 .mr(1)
21102 .nr(4)
21103 .kr(2)
21104 .sr(4)
21105 .m(1)
21106 .n(4)
21107 .k(k)
21108 .a_zero_point(0)
21109 .b_zero_point(0)
21110 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21111 }
21112 }
21113#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21114
21115
21116#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21117 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
21118 GemmMicrokernelTester()
21119 .mr(3)
21120 .nr(4)
21121 .kr(2)
21122 .sr(4)
21123 .m(3)
21124 .n(4)
21125 .k(8)
21126 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21127 }
21128
21129 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
21130 GemmMicrokernelTester()
21131 .mr(3)
21132 .nr(4)
21133 .kr(2)
21134 .sr(4)
21135 .m(3)
21136 .n(4)
21137 .k(8)
21138 .cn_stride(7)
21139 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21140 }
21141
21142 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
21143 GemmMicrokernelTester()
21144 .mr(3)
21145 .nr(4)
21146 .kr(2)
21147 .sr(4)
21148 .m(3)
21149 .n(4)
21150 .k(8)
21151 .a_stride(11)
21152 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21153 }
21154
21155 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
21156 for (uint32_t n = 1; n <= 4; n++) {
21157 for (uint32_t m = 1; m <= 3; m++) {
21158 GemmMicrokernelTester()
21159 .mr(3)
21160 .nr(4)
21161 .kr(2)
21162 .sr(4)
21163 .m(m)
21164 .n(n)
21165 .k(8)
21166 .iterations(1)
21167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21168 }
21169 }
21170 }
21171
21172 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
21173 for (uint32_t m = 1; m <= 3; m++) {
21174 GemmMicrokernelTester()
21175 .mr(3)
21176 .nr(4)
21177 .kr(2)
21178 .sr(4)
21179 .m(m)
21180 .n(4)
21181 .k(8)
21182 .iterations(1)
21183 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21184 }
21185 }
21186
21187 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
21188 for (uint32_t n = 1; n <= 4; n++) {
21189 GemmMicrokernelTester()
21190 .mr(3)
21191 .nr(4)
21192 .kr(2)
21193 .sr(4)
21194 .m(3)
21195 .n(n)
21196 .k(8)
21197 .iterations(1)
21198 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21199 }
21200 }
21201
21202 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
21203 for (size_t k = 1; k < 8; k++) {
21204 GemmMicrokernelTester()
21205 .mr(3)
21206 .nr(4)
21207 .kr(2)
21208 .sr(4)
21209 .m(3)
21210 .n(4)
21211 .k(k)
21212 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21213 }
21214 }
21215
21216 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
21217 for (size_t k = 1; k < 8; k++) {
21218 GemmMicrokernelTester()
21219 .mr(3)
21220 .nr(4)
21221 .kr(2)
21222 .sr(4)
21223 .m(3)
21224 .n(4)
21225 .k(k)
21226 .a_stride(11)
21227 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21228 }
21229 }
21230
21231 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
21232 for (size_t k = 1; k < 8; k++) {
21233 for (uint32_t n = 1; n <= 4; n++) {
21234 for (uint32_t m = 1; m <= 3; m++) {
21235 GemmMicrokernelTester()
21236 .mr(3)
21237 .nr(4)
21238 .kr(2)
21239 .sr(4)
21240 .m(m)
21241 .n(n)
21242 .k(k)
21243 .iterations(1)
21244 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21245 }
21246 }
21247 }
21248 }
21249
21250 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
21251 for (size_t k = 9; k < 16; k++) {
21252 GemmMicrokernelTester()
21253 .mr(3)
21254 .nr(4)
21255 .kr(2)
21256 .sr(4)
21257 .m(3)
21258 .n(4)
21259 .k(k)
21260 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21261 }
21262 }
21263
21264 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
21265 for (size_t k = 9; k < 16; k++) {
21266 GemmMicrokernelTester()
21267 .mr(3)
21268 .nr(4)
21269 .kr(2)
21270 .sr(4)
21271 .m(3)
21272 .n(4)
21273 .k(k)
21274 .a_stride(19)
21275 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21276 }
21277 }
21278
21279 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
21280 for (size_t k = 9; k < 16; k++) {
21281 for (uint32_t n = 1; n <= 4; n++) {
21282 for (uint32_t m = 1; m <= 3; m++) {
21283 GemmMicrokernelTester()
21284 .mr(3)
21285 .nr(4)
21286 .kr(2)
21287 .sr(4)
21288 .m(m)
21289 .n(n)
21290 .k(k)
21291 .iterations(1)
21292 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21293 }
21294 }
21295 }
21296 }
21297
21298 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
21299 for (size_t k = 16; k <= 80; k += 8) {
21300 GemmMicrokernelTester()
21301 .mr(3)
21302 .nr(4)
21303 .kr(2)
21304 .sr(4)
21305 .m(3)
21306 .n(4)
21307 .k(k)
21308 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21309 }
21310 }
21311
21312 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
21313 for (size_t k = 16; k <= 80; k += 8) {
21314 GemmMicrokernelTester()
21315 .mr(3)
21316 .nr(4)
21317 .kr(2)
21318 .sr(4)
21319 .m(3)
21320 .n(4)
21321 .k(k)
21322 .a_stride(83)
21323 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21324 }
21325 }
21326
21327 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
21328 for (size_t k = 16; k <= 80; k += 8) {
21329 for (uint32_t n = 1; n <= 4; n++) {
21330 for (uint32_t m = 1; m <= 3; m++) {
21331 GemmMicrokernelTester()
21332 .mr(3)
21333 .nr(4)
21334 .kr(2)
21335 .sr(4)
21336 .m(m)
21337 .n(n)
21338 .k(k)
21339 .iterations(1)
21340 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21341 }
21342 }
21343 }
21344 }
21345
21346 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
21347 for (uint32_t n = 5; n < 8; n++) {
21348 for (size_t k = 1; k <= 40; k += 9) {
21349 GemmMicrokernelTester()
21350 .mr(3)
21351 .nr(4)
21352 .kr(2)
21353 .sr(4)
21354 .m(3)
21355 .n(n)
21356 .k(k)
21357 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21358 }
21359 }
21360 }
21361
21362 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
21363 for (uint32_t n = 5; n < 8; n++) {
21364 for (size_t k = 1; k <= 40; k += 9) {
21365 GemmMicrokernelTester()
21366 .mr(3)
21367 .nr(4)
21368 .kr(2)
21369 .sr(4)
21370 .m(3)
21371 .n(n)
21372 .k(k)
21373 .cn_stride(7)
21374 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21375 }
21376 }
21377 }
21378
21379 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
21380 for (uint32_t n = 5; n < 8; n++) {
21381 for (size_t k = 1; k <= 40; k += 9) {
21382 GemmMicrokernelTester()
21383 .mr(3)
21384 .nr(4)
21385 .kr(2)
21386 .sr(4)
21387 .m(3)
21388 .n(n)
21389 .k(k)
21390 .a_stride(43)
21391 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21392 }
21393 }
21394 }
21395
21396 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
21397 for (uint32_t n = 5; n < 8; n++) {
21398 for (size_t k = 1; k <= 40; k += 9) {
21399 for (uint32_t m = 1; m <= 3; m++) {
21400 GemmMicrokernelTester()
21401 .mr(3)
21402 .nr(4)
21403 .kr(2)
21404 .sr(4)
21405 .m(m)
21406 .n(n)
21407 .k(k)
21408 .iterations(1)
21409 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21410 }
21411 }
21412 }
21413 }
21414
21415 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
21416 for (uint32_t n = 8; n <= 12; n += 4) {
21417 for (size_t k = 1; k <= 40; k += 9) {
21418 GemmMicrokernelTester()
21419 .mr(3)
21420 .nr(4)
21421 .kr(2)
21422 .sr(4)
21423 .m(3)
21424 .n(n)
21425 .k(k)
21426 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21427 }
21428 }
21429 }
21430
21431 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
21432 for (uint32_t n = 8; n <= 12; n += 4) {
21433 for (size_t k = 1; k <= 40; k += 9) {
21434 GemmMicrokernelTester()
21435 .mr(3)
21436 .nr(4)
21437 .kr(2)
21438 .sr(4)
21439 .m(3)
21440 .n(n)
21441 .k(k)
21442 .cn_stride(7)
21443 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21444 }
21445 }
21446 }
21447
21448 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
21449 for (uint32_t n = 8; n <= 12; n += 4) {
21450 for (size_t k = 1; k <= 40; k += 9) {
21451 GemmMicrokernelTester()
21452 .mr(3)
21453 .nr(4)
21454 .kr(2)
21455 .sr(4)
21456 .m(3)
21457 .n(n)
21458 .k(k)
21459 .a_stride(43)
21460 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21461 }
21462 }
21463 }
21464
21465 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
21466 for (uint32_t n = 8; n <= 12; n += 4) {
21467 for (size_t k = 1; k <= 40; k += 9) {
21468 for (uint32_t m = 1; m <= 3; m++) {
21469 GemmMicrokernelTester()
21470 .mr(3)
21471 .nr(4)
21472 .kr(2)
21473 .sr(4)
21474 .m(m)
21475 .n(n)
21476 .k(k)
21477 .iterations(1)
21478 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21479 }
21480 }
21481 }
21482 }
21483
21484 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
21485 for (size_t k = 1; k <= 40; k += 9) {
21486 for (uint32_t n = 1; n <= 4; n++) {
21487 for (uint32_t m = 1; m <= 3; m++) {
21488 GemmMicrokernelTester()
21489 .mr(3)
21490 .nr(4)
21491 .kr(2)
21492 .sr(4)
21493 .m(m)
21494 .n(n)
21495 .k(k)
21496 .cm_stride(7)
21497 .iterations(1)
21498 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21499 }
21500 }
21501 }
21502 }
21503
21504 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
21505 GemmMicrokernelTester()
21506 .mr(3)
21507 .nr(4)
21508 .kr(2)
21509 .sr(4)
21510 .m(3)
21511 .n(4)
21512 .k(8)
21513 .qmin(128)
21514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21515 }
21516
21517 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
21518 GemmMicrokernelTester()
21519 .mr(3)
21520 .nr(4)
21521 .kr(2)
21522 .sr(4)
21523 .m(3)
21524 .n(4)
21525 .k(8)
21526 .qmax(128)
21527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21528 }
21529
21530 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
21531 GemmMicrokernelTester()
21532 .mr(3)
21533 .nr(4)
21534 .kr(2)
21535 .sr(4)
21536 .m(3)
21537 .n(4)
21538 .k(8)
21539 .cm_stride(7)
21540 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21541 }
21542
21543 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
21544 for (size_t k = 1; k <= 40; k += 9) {
21545 GemmMicrokernelTester()
21546 .mr(3)
21547 .nr(4)
21548 .kr(2)
21549 .sr(4)
21550 .m(3)
21551 .n(4)
21552 .k(k)
21553 .a_zero_point(0)
21554 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21555 }
21556 }
21557
21558 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
21559 for (size_t k = 1; k <= 40; k += 9) {
21560 GemmMicrokernelTester()
21561 .mr(3)
21562 .nr(4)
21563 .kr(2)
21564 .sr(4)
21565 .m(3)
21566 .n(4)
21567 .k(k)
21568 .b_zero_point(0)
21569 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21570 }
21571 }
21572
21573 TEST(QU8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
21574 for (size_t k = 1; k <= 40; k += 9) {
21575 GemmMicrokernelTester()
21576 .mr(3)
21577 .nr(4)
21578 .kr(2)
21579 .sr(4)
21580 .m(3)
21581 .n(4)
21582 .k(k)
21583 .a_zero_point(0)
21584 .b_zero_point(0)
21585 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
21586 }
21587 }
21588#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
21589
21590
21591#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021592 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
21593 GemmMicrokernelTester()
21594 .mr(1)
21595 .nr(4)
21596 .kr(8)
21597 .sr(1)
21598 .m(1)
21599 .n(4)
21600 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021601 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021602 }
21603
21604 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
21605 GemmMicrokernelTester()
21606 .mr(1)
21607 .nr(4)
21608 .kr(8)
21609 .sr(1)
21610 .m(1)
21611 .n(4)
21612 .k(8)
21613 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021614 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021615 }
21616
21617 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
21618 GemmMicrokernelTester()
21619 .mr(1)
21620 .nr(4)
21621 .kr(8)
21622 .sr(1)
21623 .m(1)
21624 .n(4)
21625 .k(8)
21626 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021627 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021628 }
21629
21630 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021631 for (uint32_t n = 1; n <= 4; n++) {
21632 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021633 GemmMicrokernelTester()
21634 .mr(1)
21635 .nr(4)
21636 .kr(8)
21637 .sr(1)
21638 .m(m)
21639 .n(n)
21640 .k(8)
21641 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021642 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021643 }
21644 }
21645 }
21646
21647 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
21648 for (uint32_t m = 1; m <= 1; m++) {
21649 GemmMicrokernelTester()
21650 .mr(1)
21651 .nr(4)
21652 .kr(8)
21653 .sr(1)
21654 .m(m)
21655 .n(4)
21656 .k(8)
21657 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021658 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021659 }
21660 }
21661
21662 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
21663 for (uint32_t n = 1; n <= 4; n++) {
21664 GemmMicrokernelTester()
21665 .mr(1)
21666 .nr(4)
21667 .kr(8)
21668 .sr(1)
21669 .m(1)
21670 .n(n)
21671 .k(8)
21672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021673 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021674 }
21675 }
21676
21677 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
21678 for (size_t k = 1; k < 8; k++) {
21679 GemmMicrokernelTester()
21680 .mr(1)
21681 .nr(4)
21682 .kr(8)
21683 .sr(1)
21684 .m(1)
21685 .n(4)
21686 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021687 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021688 }
21689 }
21690
21691 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
21692 for (size_t k = 1; k < 8; k++) {
21693 GemmMicrokernelTester()
21694 .mr(1)
21695 .nr(4)
21696 .kr(8)
21697 .sr(1)
21698 .m(1)
21699 .n(4)
21700 .k(k)
21701 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021702 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021703 }
21704 }
21705
21706 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
21707 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021708 for (uint32_t n = 1; n <= 4; n++) {
21709 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021710 GemmMicrokernelTester()
21711 .mr(1)
21712 .nr(4)
21713 .kr(8)
21714 .sr(1)
21715 .m(m)
21716 .n(n)
21717 .k(k)
21718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021719 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021720 }
21721 }
21722 }
21723 }
21724
21725 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
21726 for (size_t k = 9; k < 16; k++) {
21727 GemmMicrokernelTester()
21728 .mr(1)
21729 .nr(4)
21730 .kr(8)
21731 .sr(1)
21732 .m(1)
21733 .n(4)
21734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021735 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021736 }
21737 }
21738
21739 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
21740 for (size_t k = 9; k < 16; k++) {
21741 GemmMicrokernelTester()
21742 .mr(1)
21743 .nr(4)
21744 .kr(8)
21745 .sr(1)
21746 .m(1)
21747 .n(4)
21748 .k(k)
21749 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021750 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021751 }
21752 }
21753
21754 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
21755 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021756 for (uint32_t n = 1; n <= 4; n++) {
21757 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021758 GemmMicrokernelTester()
21759 .mr(1)
21760 .nr(4)
21761 .kr(8)
21762 .sr(1)
21763 .m(m)
21764 .n(n)
21765 .k(k)
21766 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021767 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021768 }
21769 }
21770 }
21771 }
21772
21773 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
21774 for (size_t k = 16; k <= 80; k += 8) {
21775 GemmMicrokernelTester()
21776 .mr(1)
21777 .nr(4)
21778 .kr(8)
21779 .sr(1)
21780 .m(1)
21781 .n(4)
21782 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021783 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021784 }
21785 }
21786
21787 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
21788 for (size_t k = 16; k <= 80; k += 8) {
21789 GemmMicrokernelTester()
21790 .mr(1)
21791 .nr(4)
21792 .kr(8)
21793 .sr(1)
21794 .m(1)
21795 .n(4)
21796 .k(k)
21797 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021798 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021799 }
21800 }
21801
21802 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
21803 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021804 for (uint32_t n = 1; n <= 4; n++) {
21805 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021806 GemmMicrokernelTester()
21807 .mr(1)
21808 .nr(4)
21809 .kr(8)
21810 .sr(1)
21811 .m(m)
21812 .n(n)
21813 .k(k)
21814 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021815 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021816 }
21817 }
21818 }
21819 }
21820
21821 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
21822 for (uint32_t n = 5; n < 8; n++) {
21823 for (size_t k = 1; k <= 40; k += 9) {
21824 GemmMicrokernelTester()
21825 .mr(1)
21826 .nr(4)
21827 .kr(8)
21828 .sr(1)
21829 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021830 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021831 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021832 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021833 }
21834 }
21835 }
21836
21837 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
21838 for (uint32_t n = 5; n < 8; n++) {
21839 for (size_t k = 1; k <= 40; k += 9) {
21840 GemmMicrokernelTester()
21841 .mr(1)
21842 .nr(4)
21843 .kr(8)
21844 .sr(1)
21845 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021846 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021847 .k(k)
21848 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021849 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021850 }
21851 }
21852 }
21853
21854 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
21855 for (uint32_t n = 5; n < 8; n++) {
21856 for (size_t k = 1; k <= 40; k += 9) {
21857 GemmMicrokernelTester()
21858 .mr(1)
21859 .nr(4)
21860 .kr(8)
21861 .sr(1)
21862 .m(1)
21863 .n(n)
21864 .k(k)
21865 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021866 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021867 }
21868 }
21869 }
21870
21871 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
21872 for (uint32_t n = 5; n < 8; n++) {
21873 for (size_t k = 1; k <= 40; k += 9) {
21874 for (uint32_t m = 1; m <= 1; m++) {
21875 GemmMicrokernelTester()
21876 .mr(1)
21877 .nr(4)
21878 .kr(8)
21879 .sr(1)
21880 .m(m)
21881 .n(n)
21882 .k(k)
21883 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021884 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021885 }
21886 }
21887 }
21888 }
21889
21890 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
21891 for (uint32_t n = 8; n <= 12; n += 4) {
21892 for (size_t k = 1; k <= 40; k += 9) {
21893 GemmMicrokernelTester()
21894 .mr(1)
21895 .nr(4)
21896 .kr(8)
21897 .sr(1)
21898 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021899 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021900 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021902 }
21903 }
21904 }
21905
21906 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
21907 for (uint32_t n = 8; n <= 12; n += 4) {
21908 for (size_t k = 1; k <= 40; k += 9) {
21909 GemmMicrokernelTester()
21910 .mr(1)
21911 .nr(4)
21912 .kr(8)
21913 .sr(1)
21914 .m(1)
21915 .n(n)
21916 .k(k)
21917 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021918 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021919 }
21920 }
21921 }
21922
21923 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
21924 for (uint32_t n = 8; n <= 12; n += 4) {
21925 for (size_t k = 1; k <= 40; k += 9) {
21926 GemmMicrokernelTester()
21927 .mr(1)
21928 .nr(4)
21929 .kr(8)
21930 .sr(1)
21931 .m(1)
21932 .n(n)
21933 .k(k)
21934 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021935 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021936 }
21937 }
21938 }
21939
21940 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
21941 for (uint32_t n = 8; n <= 12; n += 4) {
21942 for (size_t k = 1; k <= 40; k += 9) {
21943 for (uint32_t m = 1; m <= 1; m++) {
21944 GemmMicrokernelTester()
21945 .mr(1)
21946 .nr(4)
21947 .kr(8)
21948 .sr(1)
21949 .m(m)
21950 .n(n)
21951 .k(k)
21952 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021953 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021954 }
21955 }
21956 }
21957 }
21958
21959 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
21960 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021961 for (uint32_t n = 1; n <= 4; n++) {
21962 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021963 GemmMicrokernelTester()
21964 .mr(1)
21965 .nr(4)
21966 .kr(8)
21967 .sr(1)
21968 .m(m)
21969 .n(n)
21970 .k(k)
21971 .cm_stride(7)
21972 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021973 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021974 }
21975 }
21976 }
21977 }
21978
21979 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
21980 GemmMicrokernelTester()
21981 .mr(1)
21982 .nr(4)
21983 .kr(8)
21984 .sr(1)
21985 .m(1)
21986 .n(4)
21987 .k(8)
21988 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021989 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021990 }
21991
21992 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
21993 GemmMicrokernelTester()
21994 .mr(1)
21995 .nr(4)
21996 .kr(8)
21997 .sr(1)
21998 .m(1)
21999 .n(4)
22000 .k(8)
22001 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022002 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022003 }
22004
22005 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
22006 GemmMicrokernelTester()
22007 .mr(1)
22008 .nr(4)
22009 .kr(8)
22010 .sr(1)
22011 .m(1)
22012 .n(4)
22013 .k(8)
22014 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022015 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022016 }
22017
22018 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
22019 for (size_t k = 1; k <= 40; k += 9) {
22020 GemmMicrokernelTester()
22021 .mr(1)
22022 .nr(4)
22023 .kr(8)
22024 .sr(1)
22025 .m(1)
22026 .n(4)
22027 .k(k)
22028 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022029 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022030 }
22031 }
22032
22033 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
22034 for (size_t k = 1; k <= 40; k += 9) {
22035 GemmMicrokernelTester()
22036 .mr(1)
22037 .nr(4)
22038 .kr(8)
22039 .sr(1)
22040 .m(1)
22041 .n(4)
22042 .k(k)
22043 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022044 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022045 }
22046 }
22047
22048 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
22049 for (size_t k = 1; k <= 40; k += 9) {
22050 GemmMicrokernelTester()
22051 .mr(1)
22052 .nr(4)
22053 .kr(8)
22054 .sr(1)
22055 .m(1)
22056 .n(4)
22057 .k(k)
22058 .a_zero_point(0)
22059 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022060 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022061 }
22062 }
22063#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22064
22065
22066#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22067 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
22068 GemmMicrokernelTester()
22069 .mr(3)
22070 .nr(4)
22071 .kr(8)
22072 .sr(1)
22073 .m(3)
22074 .n(4)
22075 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022076 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022077 }
22078
22079 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
22080 GemmMicrokernelTester()
22081 .mr(3)
22082 .nr(4)
22083 .kr(8)
22084 .sr(1)
22085 .m(3)
22086 .n(4)
22087 .k(8)
22088 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022089 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022090 }
22091
22092 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
22093 GemmMicrokernelTester()
22094 .mr(3)
22095 .nr(4)
22096 .kr(8)
22097 .sr(1)
22098 .m(3)
22099 .n(4)
22100 .k(8)
22101 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022103 }
22104
22105 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022106 for (uint32_t n = 1; n <= 4; n++) {
22107 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022108 GemmMicrokernelTester()
22109 .mr(3)
22110 .nr(4)
22111 .kr(8)
22112 .sr(1)
22113 .m(m)
22114 .n(n)
22115 .k(8)
22116 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022117 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022118 }
22119 }
22120 }
22121
22122 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
22123 for (uint32_t m = 1; m <= 3; m++) {
22124 GemmMicrokernelTester()
22125 .mr(3)
22126 .nr(4)
22127 .kr(8)
22128 .sr(1)
22129 .m(m)
22130 .n(4)
22131 .k(8)
22132 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022133 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022134 }
22135 }
22136
22137 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
22138 for (uint32_t n = 1; n <= 4; n++) {
22139 GemmMicrokernelTester()
22140 .mr(3)
22141 .nr(4)
22142 .kr(8)
22143 .sr(1)
22144 .m(3)
22145 .n(n)
22146 .k(8)
22147 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022149 }
22150 }
22151
22152 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
22153 for (size_t k = 1; k < 8; k++) {
22154 GemmMicrokernelTester()
22155 .mr(3)
22156 .nr(4)
22157 .kr(8)
22158 .sr(1)
22159 .m(3)
22160 .n(4)
22161 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022162 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022163 }
22164 }
22165
22166 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
22167 for (size_t k = 1; k < 8; k++) {
22168 GemmMicrokernelTester()
22169 .mr(3)
22170 .nr(4)
22171 .kr(8)
22172 .sr(1)
22173 .m(3)
22174 .n(4)
22175 .k(k)
22176 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022177 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022178 }
22179 }
22180
22181 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
22182 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022183 for (uint32_t n = 1; n <= 4; n++) {
22184 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022185 GemmMicrokernelTester()
22186 .mr(3)
22187 .nr(4)
22188 .kr(8)
22189 .sr(1)
22190 .m(m)
22191 .n(n)
22192 .k(k)
22193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022194 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022195 }
22196 }
22197 }
22198 }
22199
22200 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
22201 for (size_t k = 9; k < 16; k++) {
22202 GemmMicrokernelTester()
22203 .mr(3)
22204 .nr(4)
22205 .kr(8)
22206 .sr(1)
22207 .m(3)
22208 .n(4)
22209 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022210 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022211 }
22212 }
22213
22214 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
22215 for (size_t k = 9; k < 16; k++) {
22216 GemmMicrokernelTester()
22217 .mr(3)
22218 .nr(4)
22219 .kr(8)
22220 .sr(1)
22221 .m(3)
22222 .n(4)
22223 .k(k)
22224 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022225 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022226 }
22227 }
22228
22229 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
22230 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022231 for (uint32_t n = 1; n <= 4; n++) {
22232 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022233 GemmMicrokernelTester()
22234 .mr(3)
22235 .nr(4)
22236 .kr(8)
22237 .sr(1)
22238 .m(m)
22239 .n(n)
22240 .k(k)
22241 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022242 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022243 }
22244 }
22245 }
22246 }
22247
22248 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
22249 for (size_t k = 16; k <= 80; k += 8) {
22250 GemmMicrokernelTester()
22251 .mr(3)
22252 .nr(4)
22253 .kr(8)
22254 .sr(1)
22255 .m(3)
22256 .n(4)
22257 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022258 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022259 }
22260 }
22261
22262 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
22263 for (size_t k = 16; k <= 80; k += 8) {
22264 GemmMicrokernelTester()
22265 .mr(3)
22266 .nr(4)
22267 .kr(8)
22268 .sr(1)
22269 .m(3)
22270 .n(4)
22271 .k(k)
22272 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022273 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022274 }
22275 }
22276
22277 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
22278 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022279 for (uint32_t n = 1; n <= 4; n++) {
22280 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022281 GemmMicrokernelTester()
22282 .mr(3)
22283 .nr(4)
22284 .kr(8)
22285 .sr(1)
22286 .m(m)
22287 .n(n)
22288 .k(k)
22289 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022290 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022291 }
22292 }
22293 }
22294 }
22295
22296 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
22297 for (uint32_t n = 5; n < 8; n++) {
22298 for (size_t k = 1; k <= 40; k += 9) {
22299 GemmMicrokernelTester()
22300 .mr(3)
22301 .nr(4)
22302 .kr(8)
22303 .sr(1)
22304 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022305 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022306 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022307 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022308 }
22309 }
22310 }
22311
22312 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
22313 for (uint32_t n = 5; n < 8; n++) {
22314 for (size_t k = 1; k <= 40; k += 9) {
22315 GemmMicrokernelTester()
22316 .mr(3)
22317 .nr(4)
22318 .kr(8)
22319 .sr(1)
22320 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022321 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022322 .k(k)
22323 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022324 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022325 }
22326 }
22327 }
22328
22329 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
22330 for (uint32_t n = 5; n < 8; n++) {
22331 for (size_t k = 1; k <= 40; k += 9) {
22332 GemmMicrokernelTester()
22333 .mr(3)
22334 .nr(4)
22335 .kr(8)
22336 .sr(1)
22337 .m(3)
22338 .n(n)
22339 .k(k)
22340 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022341 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022342 }
22343 }
22344 }
22345
22346 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
22347 for (uint32_t n = 5; n < 8; n++) {
22348 for (size_t k = 1; k <= 40; k += 9) {
22349 for (uint32_t m = 1; m <= 3; m++) {
22350 GemmMicrokernelTester()
22351 .mr(3)
22352 .nr(4)
22353 .kr(8)
22354 .sr(1)
22355 .m(m)
22356 .n(n)
22357 .k(k)
22358 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022359 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022360 }
22361 }
22362 }
22363 }
22364
22365 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
22366 for (uint32_t n = 8; n <= 12; n += 4) {
22367 for (size_t k = 1; k <= 40; k += 9) {
22368 GemmMicrokernelTester()
22369 .mr(3)
22370 .nr(4)
22371 .kr(8)
22372 .sr(1)
22373 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022374 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022375 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022377 }
22378 }
22379 }
22380
22381 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
22382 for (uint32_t n = 8; n <= 12; n += 4) {
22383 for (size_t k = 1; k <= 40; k += 9) {
22384 GemmMicrokernelTester()
22385 .mr(3)
22386 .nr(4)
22387 .kr(8)
22388 .sr(1)
22389 .m(3)
22390 .n(n)
22391 .k(k)
22392 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022393 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022394 }
22395 }
22396 }
22397
22398 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
22399 for (uint32_t n = 8; n <= 12; n += 4) {
22400 for (size_t k = 1; k <= 40; k += 9) {
22401 GemmMicrokernelTester()
22402 .mr(3)
22403 .nr(4)
22404 .kr(8)
22405 .sr(1)
22406 .m(3)
22407 .n(n)
22408 .k(k)
22409 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022410 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022411 }
22412 }
22413 }
22414
22415 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
22416 for (uint32_t n = 8; n <= 12; n += 4) {
22417 for (size_t k = 1; k <= 40; k += 9) {
22418 for (uint32_t m = 1; m <= 3; m++) {
22419 GemmMicrokernelTester()
22420 .mr(3)
22421 .nr(4)
22422 .kr(8)
22423 .sr(1)
22424 .m(m)
22425 .n(n)
22426 .k(k)
22427 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022428 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022429 }
22430 }
22431 }
22432 }
22433
22434 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
22435 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022436 for (uint32_t n = 1; n <= 4; n++) {
22437 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022438 GemmMicrokernelTester()
22439 .mr(3)
22440 .nr(4)
22441 .kr(8)
22442 .sr(1)
22443 .m(m)
22444 .n(n)
22445 .k(k)
22446 .cm_stride(7)
22447 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022448 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022449 }
22450 }
22451 }
22452 }
22453
22454 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
22455 GemmMicrokernelTester()
22456 .mr(3)
22457 .nr(4)
22458 .kr(8)
22459 .sr(1)
22460 .m(3)
22461 .n(4)
22462 .k(8)
22463 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022464 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022465 }
22466
22467 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
22468 GemmMicrokernelTester()
22469 .mr(3)
22470 .nr(4)
22471 .kr(8)
22472 .sr(1)
22473 .m(3)
22474 .n(4)
22475 .k(8)
22476 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022477 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022478 }
22479
22480 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
22481 GemmMicrokernelTester()
22482 .mr(3)
22483 .nr(4)
22484 .kr(8)
22485 .sr(1)
22486 .m(3)
22487 .n(4)
22488 .k(8)
22489 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022490 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022491 }
22492
22493 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
22494 for (size_t k = 1; k <= 40; k += 9) {
22495 GemmMicrokernelTester()
22496 .mr(3)
22497 .nr(4)
22498 .kr(8)
22499 .sr(1)
22500 .m(3)
22501 .n(4)
22502 .k(k)
22503 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022504 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022505 }
22506 }
22507
22508 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
22509 for (size_t k = 1; k <= 40; k += 9) {
22510 GemmMicrokernelTester()
22511 .mr(3)
22512 .nr(4)
22513 .kr(8)
22514 .sr(1)
22515 .m(3)
22516 .n(4)
22517 .k(k)
22518 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022519 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022520 }
22521 }
22522
22523 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
22524 for (size_t k = 1; k <= 40; k += 9) {
22525 GemmMicrokernelTester()
22526 .mr(3)
22527 .nr(4)
22528 .kr(8)
22529 .sr(1)
22530 .m(3)
22531 .n(4)
22532 .k(k)
22533 .a_zero_point(0)
22534 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022535 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022536 }
22537 }
22538#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22539
22540
22541#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22542 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
22543 GemmMicrokernelTester()
22544 .mr(4)
22545 .nr(4)
22546 .kr(8)
22547 .sr(1)
22548 .m(4)
22549 .n(4)
22550 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022551 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022552 }
22553
22554 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
22555 GemmMicrokernelTester()
22556 .mr(4)
22557 .nr(4)
22558 .kr(8)
22559 .sr(1)
22560 .m(4)
22561 .n(4)
22562 .k(8)
22563 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022564 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022565 }
22566
22567 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
22568 GemmMicrokernelTester()
22569 .mr(4)
22570 .nr(4)
22571 .kr(8)
22572 .sr(1)
22573 .m(4)
22574 .n(4)
22575 .k(8)
22576 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022578 }
22579
22580 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022581 for (uint32_t n = 1; n <= 4; n++) {
22582 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022583 GemmMicrokernelTester()
22584 .mr(4)
22585 .nr(4)
22586 .kr(8)
22587 .sr(1)
22588 .m(m)
22589 .n(n)
22590 .k(8)
22591 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022592 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022593 }
22594 }
22595 }
22596
22597 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
22598 for (uint32_t m = 1; m <= 4; m++) {
22599 GemmMicrokernelTester()
22600 .mr(4)
22601 .nr(4)
22602 .kr(8)
22603 .sr(1)
22604 .m(m)
22605 .n(4)
22606 .k(8)
22607 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022608 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022609 }
22610 }
22611
22612 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
22613 for (uint32_t n = 1; n <= 4; n++) {
22614 GemmMicrokernelTester()
22615 .mr(4)
22616 .nr(4)
22617 .kr(8)
22618 .sr(1)
22619 .m(4)
22620 .n(n)
22621 .k(8)
22622 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022623 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022624 }
22625 }
22626
22627 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
22628 for (size_t k = 1; k < 8; k++) {
22629 GemmMicrokernelTester()
22630 .mr(4)
22631 .nr(4)
22632 .kr(8)
22633 .sr(1)
22634 .m(4)
22635 .n(4)
22636 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022637 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022638 }
22639 }
22640
22641 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
22642 for (size_t k = 1; k < 8; k++) {
22643 GemmMicrokernelTester()
22644 .mr(4)
22645 .nr(4)
22646 .kr(8)
22647 .sr(1)
22648 .m(4)
22649 .n(4)
22650 .k(k)
22651 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022652 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022653 }
22654 }
22655
22656 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
22657 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022658 for (uint32_t n = 1; n <= 4; n++) {
22659 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022660 GemmMicrokernelTester()
22661 .mr(4)
22662 .nr(4)
22663 .kr(8)
22664 .sr(1)
22665 .m(m)
22666 .n(n)
22667 .k(k)
22668 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022669 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022670 }
22671 }
22672 }
22673 }
22674
22675 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
22676 for (size_t k = 9; k < 16; k++) {
22677 GemmMicrokernelTester()
22678 .mr(4)
22679 .nr(4)
22680 .kr(8)
22681 .sr(1)
22682 .m(4)
22683 .n(4)
22684 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022685 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022686 }
22687 }
22688
22689 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
22690 for (size_t k = 9; k < 16; k++) {
22691 GemmMicrokernelTester()
22692 .mr(4)
22693 .nr(4)
22694 .kr(8)
22695 .sr(1)
22696 .m(4)
22697 .n(4)
22698 .k(k)
22699 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022700 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022701 }
22702 }
22703
22704 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
22705 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022706 for (uint32_t n = 1; n <= 4; n++) {
22707 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022708 GemmMicrokernelTester()
22709 .mr(4)
22710 .nr(4)
22711 .kr(8)
22712 .sr(1)
22713 .m(m)
22714 .n(n)
22715 .k(k)
22716 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022717 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022718 }
22719 }
22720 }
22721 }
22722
22723 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
22724 for (size_t k = 16; k <= 80; k += 8) {
22725 GemmMicrokernelTester()
22726 .mr(4)
22727 .nr(4)
22728 .kr(8)
22729 .sr(1)
22730 .m(4)
22731 .n(4)
22732 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022733 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022734 }
22735 }
22736
22737 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
22738 for (size_t k = 16; k <= 80; k += 8) {
22739 GemmMicrokernelTester()
22740 .mr(4)
22741 .nr(4)
22742 .kr(8)
22743 .sr(1)
22744 .m(4)
22745 .n(4)
22746 .k(k)
22747 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022748 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022749 }
22750 }
22751
22752 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
22753 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022754 for (uint32_t n = 1; n <= 4; n++) {
22755 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022756 GemmMicrokernelTester()
22757 .mr(4)
22758 .nr(4)
22759 .kr(8)
22760 .sr(1)
22761 .m(m)
22762 .n(n)
22763 .k(k)
22764 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022765 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022766 }
22767 }
22768 }
22769 }
22770
22771 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
22772 for (uint32_t n = 5; n < 8; n++) {
22773 for (size_t k = 1; k <= 40; k += 9) {
22774 GemmMicrokernelTester()
22775 .mr(4)
22776 .nr(4)
22777 .kr(8)
22778 .sr(1)
22779 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022780 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022781 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022782 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022783 }
22784 }
22785 }
22786
22787 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
22788 for (uint32_t n = 5; n < 8; n++) {
22789 for (size_t k = 1; k <= 40; k += 9) {
22790 GemmMicrokernelTester()
22791 .mr(4)
22792 .nr(4)
22793 .kr(8)
22794 .sr(1)
22795 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022796 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022797 .k(k)
22798 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022799 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022800 }
22801 }
22802 }
22803
22804 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
22805 for (uint32_t n = 5; n < 8; n++) {
22806 for (size_t k = 1; k <= 40; k += 9) {
22807 GemmMicrokernelTester()
22808 .mr(4)
22809 .nr(4)
22810 .kr(8)
22811 .sr(1)
22812 .m(4)
22813 .n(n)
22814 .k(k)
22815 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022816 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022817 }
22818 }
22819 }
22820
22821 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
22822 for (uint32_t n = 5; n < 8; n++) {
22823 for (size_t k = 1; k <= 40; k += 9) {
22824 for (uint32_t m = 1; m <= 4; m++) {
22825 GemmMicrokernelTester()
22826 .mr(4)
22827 .nr(4)
22828 .kr(8)
22829 .sr(1)
22830 .m(m)
22831 .n(n)
22832 .k(k)
22833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022834 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022835 }
22836 }
22837 }
22838 }
22839
22840 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
22841 for (uint32_t n = 8; n <= 12; n += 4) {
22842 for (size_t k = 1; k <= 40; k += 9) {
22843 GemmMicrokernelTester()
22844 .mr(4)
22845 .nr(4)
22846 .kr(8)
22847 .sr(1)
22848 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022849 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022850 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022851 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022852 }
22853 }
22854 }
22855
22856 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
22857 for (uint32_t n = 8; n <= 12; n += 4) {
22858 for (size_t k = 1; k <= 40; k += 9) {
22859 GemmMicrokernelTester()
22860 .mr(4)
22861 .nr(4)
22862 .kr(8)
22863 .sr(1)
22864 .m(4)
22865 .n(n)
22866 .k(k)
22867 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022868 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022869 }
22870 }
22871 }
22872
22873 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
22874 for (uint32_t n = 8; n <= 12; n += 4) {
22875 for (size_t k = 1; k <= 40; k += 9) {
22876 GemmMicrokernelTester()
22877 .mr(4)
22878 .nr(4)
22879 .kr(8)
22880 .sr(1)
22881 .m(4)
22882 .n(n)
22883 .k(k)
22884 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022885 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022886 }
22887 }
22888 }
22889
22890 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
22891 for (uint32_t n = 8; n <= 12; n += 4) {
22892 for (size_t k = 1; k <= 40; k += 9) {
22893 for (uint32_t m = 1; m <= 4; m++) {
22894 GemmMicrokernelTester()
22895 .mr(4)
22896 .nr(4)
22897 .kr(8)
22898 .sr(1)
22899 .m(m)
22900 .n(n)
22901 .k(k)
22902 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022903 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022904 }
22905 }
22906 }
22907 }
22908
22909 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
22910 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022911 for (uint32_t n = 1; n <= 4; n++) {
22912 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022913 GemmMicrokernelTester()
22914 .mr(4)
22915 .nr(4)
22916 .kr(8)
22917 .sr(1)
22918 .m(m)
22919 .n(n)
22920 .k(k)
22921 .cm_stride(7)
22922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022923 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022924 }
22925 }
22926 }
22927 }
22928
22929 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
22930 GemmMicrokernelTester()
22931 .mr(4)
22932 .nr(4)
22933 .kr(8)
22934 .sr(1)
22935 .m(4)
22936 .n(4)
22937 .k(8)
22938 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022939 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022940 }
22941
22942 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
22943 GemmMicrokernelTester()
22944 .mr(4)
22945 .nr(4)
22946 .kr(8)
22947 .sr(1)
22948 .m(4)
22949 .n(4)
22950 .k(8)
22951 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022952 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022953 }
22954
22955 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
22956 GemmMicrokernelTester()
22957 .mr(4)
22958 .nr(4)
22959 .kr(8)
22960 .sr(1)
22961 .m(4)
22962 .n(4)
22963 .k(8)
22964 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022965 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022966 }
22967
22968 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
22969 for (size_t k = 1; k <= 40; k += 9) {
22970 GemmMicrokernelTester()
22971 .mr(4)
22972 .nr(4)
22973 .kr(8)
22974 .sr(1)
22975 .m(4)
22976 .n(4)
22977 .k(k)
22978 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022979 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022980 }
22981 }
22982
22983 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
22984 for (size_t k = 1; k <= 40; k += 9) {
22985 GemmMicrokernelTester()
22986 .mr(4)
22987 .nr(4)
22988 .kr(8)
22989 .sr(1)
22990 .m(4)
22991 .n(4)
22992 .k(k)
22993 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022994 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022995 }
22996 }
22997
22998 TEST(QU8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
22999 for (size_t k = 1; k <= 40; k += 9) {
23000 GemmMicrokernelTester()
23001 .mr(4)
23002 .nr(4)
23003 .kr(8)
23004 .sr(1)
23005 .m(4)
23006 .n(4)
23007 .k(k)
23008 .a_zero_point(0)
23009 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023010 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023011 }
23012 }
23013#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23014
23015
23016#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23017 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8) {
23018 GemmMicrokernelTester()
23019 .mr(3)
23020 .nr(4)
23021 .kr(8)
23022 .sr(1)
23023 .m(3)
23024 .n(4)
23025 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023026 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023027 }
23028
23029 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, strided_cn) {
23030 GemmMicrokernelTester()
23031 .mr(3)
23032 .nr(4)
23033 .kr(8)
23034 .sr(1)
23035 .m(3)
23036 .n(4)
23037 .k(8)
23038 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023040 }
23041
23042 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_strided_a) {
23043 GemmMicrokernelTester()
23044 .mr(3)
23045 .nr(4)
23046 .kr(8)
23047 .sr(1)
23048 .m(3)
23049 .n(4)
23050 .k(8)
23051 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023053 }
23054
23055 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023056 for (uint32_t n = 1; n <= 4; n++) {
23057 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023058 GemmMicrokernelTester()
23059 .mr(3)
23060 .nr(4)
23061 .kr(8)
23062 .sr(1)
23063 .m(m)
23064 .n(n)
23065 .k(8)
23066 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023067 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023068 }
23069 }
23070 }
23071
23072 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_subtile_m) {
23073 for (uint32_t m = 1; m <= 3; m++) {
23074 GemmMicrokernelTester()
23075 .mr(3)
23076 .nr(4)
23077 .kr(8)
23078 .sr(1)
23079 .m(m)
23080 .n(4)
23081 .k(8)
23082 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023083 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023084 }
23085 }
23086
23087 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_subtile_n) {
23088 for (uint32_t n = 1; n <= 4; n++) {
23089 GemmMicrokernelTester()
23090 .mr(3)
23091 .nr(4)
23092 .kr(8)
23093 .sr(1)
23094 .m(3)
23095 .n(n)
23096 .k(8)
23097 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023099 }
23100 }
23101
23102 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_lt_8) {
23103 for (size_t k = 1; k < 8; k++) {
23104 GemmMicrokernelTester()
23105 .mr(3)
23106 .nr(4)
23107 .kr(8)
23108 .sr(1)
23109 .m(3)
23110 .n(4)
23111 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023112 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023113 }
23114 }
23115
23116 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_lt_8_strided_a) {
23117 for (size_t k = 1; k < 8; k++) {
23118 GemmMicrokernelTester()
23119 .mr(3)
23120 .nr(4)
23121 .kr(8)
23122 .sr(1)
23123 .m(3)
23124 .n(4)
23125 .k(k)
23126 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023127 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023128 }
23129 }
23130
23131 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_lt_8_subtile) {
23132 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023133 for (uint32_t n = 1; n <= 4; n++) {
23134 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023135 GemmMicrokernelTester()
23136 .mr(3)
23137 .nr(4)
23138 .kr(8)
23139 .sr(1)
23140 .m(m)
23141 .n(n)
23142 .k(k)
23143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023144 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023145 }
23146 }
23147 }
23148 }
23149
23150 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_gt_8) {
23151 for (size_t k = 9; k < 16; k++) {
23152 GemmMicrokernelTester()
23153 .mr(3)
23154 .nr(4)
23155 .kr(8)
23156 .sr(1)
23157 .m(3)
23158 .n(4)
23159 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023160 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023161 }
23162 }
23163
23164 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_gt_8_strided_a) {
23165 for (size_t k = 9; k < 16; k++) {
23166 GemmMicrokernelTester()
23167 .mr(3)
23168 .nr(4)
23169 .kr(8)
23170 .sr(1)
23171 .m(3)
23172 .n(4)
23173 .k(k)
23174 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023175 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023176 }
23177 }
23178
23179 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_gt_8_subtile) {
23180 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023181 for (uint32_t n = 1; n <= 4; n++) {
23182 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023183 GemmMicrokernelTester()
23184 .mr(3)
23185 .nr(4)
23186 .kr(8)
23187 .sr(1)
23188 .m(m)
23189 .n(n)
23190 .k(k)
23191 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023192 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023193 }
23194 }
23195 }
23196 }
23197
23198 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_div_8) {
23199 for (size_t k = 16; k <= 80; k += 8) {
23200 GemmMicrokernelTester()
23201 .mr(3)
23202 .nr(4)
23203 .kr(8)
23204 .sr(1)
23205 .m(3)
23206 .n(4)
23207 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023208 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023209 }
23210 }
23211
23212 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_div_8_strided_a) {
23213 for (size_t k = 16; k <= 80; k += 8) {
23214 GemmMicrokernelTester()
23215 .mr(3)
23216 .nr(4)
23217 .kr(8)
23218 .sr(1)
23219 .m(3)
23220 .n(4)
23221 .k(k)
23222 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023224 }
23225 }
23226
23227 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_div_8_subtile) {
23228 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023229 for (uint32_t n = 1; n <= 4; n++) {
23230 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023231 GemmMicrokernelTester()
23232 .mr(3)
23233 .nr(4)
23234 .kr(8)
23235 .sr(1)
23236 .m(m)
23237 .n(n)
23238 .k(k)
23239 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023240 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023241 }
23242 }
23243 }
23244 }
23245
23246 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4) {
23247 for (uint32_t n = 5; n < 8; n++) {
23248 for (size_t k = 1; k <= 40; k += 9) {
23249 GemmMicrokernelTester()
23250 .mr(3)
23251 .nr(4)
23252 .kr(8)
23253 .sr(1)
23254 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023255 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023256 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023257 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023258 }
23259 }
23260 }
23261
23262 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4_strided_cn) {
23263 for (uint32_t n = 5; n < 8; n++) {
23264 for (size_t k = 1; k <= 40; k += 9) {
23265 GemmMicrokernelTester()
23266 .mr(3)
23267 .nr(4)
23268 .kr(8)
23269 .sr(1)
23270 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023271 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023272 .k(k)
23273 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023274 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023275 }
23276 }
23277 }
23278
23279 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4_strided_a) {
23280 for (uint32_t n = 5; n < 8; n++) {
23281 for (size_t k = 1; k <= 40; k += 9) {
23282 GemmMicrokernelTester()
23283 .mr(3)
23284 .nr(4)
23285 .kr(8)
23286 .sr(1)
23287 .m(3)
23288 .n(n)
23289 .k(k)
23290 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023291 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023292 }
23293 }
23294 }
23295
23296 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4_subtile) {
23297 for (uint32_t n = 5; n < 8; n++) {
23298 for (size_t k = 1; k <= 40; k += 9) {
23299 for (uint32_t m = 1; m <= 3; m++) {
23300 GemmMicrokernelTester()
23301 .mr(3)
23302 .nr(4)
23303 .kr(8)
23304 .sr(1)
23305 .m(m)
23306 .n(n)
23307 .k(k)
23308 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023309 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023310 }
23311 }
23312 }
23313 }
23314
23315 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4) {
23316 for (uint32_t n = 8; n <= 12; n += 4) {
23317 for (size_t k = 1; k <= 40; k += 9) {
23318 GemmMicrokernelTester()
23319 .mr(3)
23320 .nr(4)
23321 .kr(8)
23322 .sr(1)
23323 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023324 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023325 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023326 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023327 }
23328 }
23329 }
23330
23331 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4_strided_cn) {
23332 for (uint32_t n = 8; n <= 12; n += 4) {
23333 for (size_t k = 1; k <= 40; k += 9) {
23334 GemmMicrokernelTester()
23335 .mr(3)
23336 .nr(4)
23337 .kr(8)
23338 .sr(1)
23339 .m(3)
23340 .n(n)
23341 .k(k)
23342 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023343 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023344 }
23345 }
23346 }
23347
23348 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4_strided_a) {
23349 for (uint32_t n = 8; n <= 12; n += 4) {
23350 for (size_t k = 1; k <= 40; k += 9) {
23351 GemmMicrokernelTester()
23352 .mr(3)
23353 .nr(4)
23354 .kr(8)
23355 .sr(1)
23356 .m(3)
23357 .n(n)
23358 .k(k)
23359 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023360 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023361 }
23362 }
23363 }
23364
23365 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4_subtile) {
23366 for (uint32_t n = 8; n <= 12; n += 4) {
23367 for (size_t k = 1; k <= 40; k += 9) {
23368 for (uint32_t m = 1; m <= 3; m++) {
23369 GemmMicrokernelTester()
23370 .mr(3)
23371 .nr(4)
23372 .kr(8)
23373 .sr(1)
23374 .m(m)
23375 .n(n)
23376 .k(k)
23377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023378 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023379 }
23380 }
23381 }
23382 }
23383
23384 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, strided_cm_subtile) {
23385 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023386 for (uint32_t n = 1; n <= 4; n++) {
23387 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023388 GemmMicrokernelTester()
23389 .mr(3)
23390 .nr(4)
23391 .kr(8)
23392 .sr(1)
23393 .m(m)
23394 .n(n)
23395 .k(k)
23396 .cm_stride(7)
23397 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023398 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023399 }
23400 }
23401 }
23402 }
23403
23404 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, qmin) {
23405 GemmMicrokernelTester()
23406 .mr(3)
23407 .nr(4)
23408 .kr(8)
23409 .sr(1)
23410 .m(3)
23411 .n(4)
23412 .k(8)
23413 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023414 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023415 }
23416
23417 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, qmax) {
23418 GemmMicrokernelTester()
23419 .mr(3)
23420 .nr(4)
23421 .kr(8)
23422 .sr(1)
23423 .m(3)
23424 .n(4)
23425 .k(8)
23426 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023427 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023428 }
23429
23430 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, strided_cm) {
23431 GemmMicrokernelTester()
23432 .mr(3)
23433 .nr(4)
23434 .kr(8)
23435 .sr(1)
23436 .m(3)
23437 .n(4)
23438 .k(8)
23439 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023440 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023441 }
23442
23443 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, no_a_zero_point) {
23444 for (size_t k = 1; k <= 40; k += 9) {
23445 GemmMicrokernelTester()
23446 .mr(3)
23447 .nr(4)
23448 .kr(8)
23449 .sr(1)
23450 .m(3)
23451 .n(4)
23452 .k(k)
23453 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023454 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023455 }
23456 }
23457
23458 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, no_b_zero_point) {
23459 for (size_t k = 1; k <= 40; k += 9) {
23460 GemmMicrokernelTester()
23461 .mr(3)
23462 .nr(4)
23463 .kr(8)
23464 .sr(1)
23465 .m(3)
23466 .n(4)
23467 .k(k)
23468 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023469 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023470 }
23471 }
23472
23473 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, no_zero_point) {
23474 for (size_t k = 1; k <= 40; k += 9) {
23475 GemmMicrokernelTester()
23476 .mr(3)
23477 .nr(4)
23478 .kr(8)
23479 .sr(1)
23480 .m(3)
23481 .n(4)
23482 .k(k)
23483 .a_zero_point(0)
23484 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023485 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023486 }
23487 }
23488#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23489
23490
23491#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23492 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8) {
23493 GemmMicrokernelTester()
23494 .mr(1)
23495 .nr(4)
23496 .kr(8)
23497 .sr(1)
23498 .m(1)
23499 .n(4)
23500 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023501 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023502 }
23503
23504 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, strided_cn) {
23505 GemmMicrokernelTester()
23506 .mr(1)
23507 .nr(4)
23508 .kr(8)
23509 .sr(1)
23510 .m(1)
23511 .n(4)
23512 .k(8)
23513 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023515 }
23516
23517 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_strided_a) {
23518 GemmMicrokernelTester()
23519 .mr(1)
23520 .nr(4)
23521 .kr(8)
23522 .sr(1)
23523 .m(1)
23524 .n(4)
23525 .k(8)
23526 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023528 }
23529
23530 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023531 for (uint32_t n = 1; n <= 4; n++) {
23532 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023533 GemmMicrokernelTester()
23534 .mr(1)
23535 .nr(4)
23536 .kr(8)
23537 .sr(1)
23538 .m(m)
23539 .n(n)
23540 .k(8)
23541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023542 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023543 }
23544 }
23545 }
23546
23547 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_m) {
23548 for (uint32_t m = 1; m <= 1; m++) {
23549 GemmMicrokernelTester()
23550 .mr(1)
23551 .nr(4)
23552 .kr(8)
23553 .sr(1)
23554 .m(m)
23555 .n(4)
23556 .k(8)
23557 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023558 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023559 }
23560 }
23561
23562 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_n) {
23563 for (uint32_t n = 1; n <= 4; n++) {
23564 GemmMicrokernelTester()
23565 .mr(1)
23566 .nr(4)
23567 .kr(8)
23568 .sr(1)
23569 .m(1)
23570 .n(n)
23571 .k(8)
23572 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023573 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023574 }
23575 }
23576
23577 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_lt_8) {
23578 for (size_t k = 1; k < 8; k++) {
23579 GemmMicrokernelTester()
23580 .mr(1)
23581 .nr(4)
23582 .kr(8)
23583 .sr(1)
23584 .m(1)
23585 .n(4)
23586 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023587 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023588 }
23589 }
23590
23591 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_lt_8_strided_a) {
23592 for (size_t k = 1; k < 8; k++) {
23593 GemmMicrokernelTester()
23594 .mr(1)
23595 .nr(4)
23596 .kr(8)
23597 .sr(1)
23598 .m(1)
23599 .n(4)
23600 .k(k)
23601 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023603 }
23604 }
23605
23606 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_lt_8_subtile) {
23607 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023608 for (uint32_t n = 1; n <= 4; n++) {
23609 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023610 GemmMicrokernelTester()
23611 .mr(1)
23612 .nr(4)
23613 .kr(8)
23614 .sr(1)
23615 .m(m)
23616 .n(n)
23617 .k(k)
23618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023619 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023620 }
23621 }
23622 }
23623 }
23624
23625 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_gt_8) {
23626 for (size_t k = 9; k < 16; k++) {
23627 GemmMicrokernelTester()
23628 .mr(1)
23629 .nr(4)
23630 .kr(8)
23631 .sr(1)
23632 .m(1)
23633 .n(4)
23634 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023635 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023636 }
23637 }
23638
23639 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_gt_8_strided_a) {
23640 for (size_t k = 9; k < 16; k++) {
23641 GemmMicrokernelTester()
23642 .mr(1)
23643 .nr(4)
23644 .kr(8)
23645 .sr(1)
23646 .m(1)
23647 .n(4)
23648 .k(k)
23649 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023650 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023651 }
23652 }
23653
23654 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_gt_8_subtile) {
23655 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023656 for (uint32_t n = 1; n <= 4; n++) {
23657 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023658 GemmMicrokernelTester()
23659 .mr(1)
23660 .nr(4)
23661 .kr(8)
23662 .sr(1)
23663 .m(m)
23664 .n(n)
23665 .k(k)
23666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023667 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023668 }
23669 }
23670 }
23671 }
23672
23673 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_div_8) {
23674 for (size_t k = 16; k <= 80; k += 8) {
23675 GemmMicrokernelTester()
23676 .mr(1)
23677 .nr(4)
23678 .kr(8)
23679 .sr(1)
23680 .m(1)
23681 .n(4)
23682 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023683 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023684 }
23685 }
23686
23687 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_div_8_strided_a) {
23688 for (size_t k = 16; k <= 80; k += 8) {
23689 GemmMicrokernelTester()
23690 .mr(1)
23691 .nr(4)
23692 .kr(8)
23693 .sr(1)
23694 .m(1)
23695 .n(4)
23696 .k(k)
23697 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023698 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023699 }
23700 }
23701
23702 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_div_8_subtile) {
23703 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023704 for (uint32_t n = 1; n <= 4; n++) {
23705 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023706 GemmMicrokernelTester()
23707 .mr(1)
23708 .nr(4)
23709 .kr(8)
23710 .sr(1)
23711 .m(m)
23712 .n(n)
23713 .k(k)
23714 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023715 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023716 }
23717 }
23718 }
23719 }
23720
23721 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4) {
23722 for (uint32_t n = 5; n < 8; n++) {
23723 for (size_t k = 1; k <= 40; k += 9) {
23724 GemmMicrokernelTester()
23725 .mr(1)
23726 .nr(4)
23727 .kr(8)
23728 .sr(1)
23729 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023730 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023731 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023732 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023733 }
23734 }
23735 }
23736
23737 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4_strided_cn) {
23738 for (uint32_t n = 5; n < 8; n++) {
23739 for (size_t k = 1; k <= 40; k += 9) {
23740 GemmMicrokernelTester()
23741 .mr(1)
23742 .nr(4)
23743 .kr(8)
23744 .sr(1)
23745 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023746 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023747 .k(k)
23748 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023749 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023750 }
23751 }
23752 }
23753
23754 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4_strided_a) {
23755 for (uint32_t n = 5; n < 8; n++) {
23756 for (size_t k = 1; k <= 40; k += 9) {
23757 GemmMicrokernelTester()
23758 .mr(1)
23759 .nr(4)
23760 .kr(8)
23761 .sr(1)
23762 .m(1)
23763 .n(n)
23764 .k(k)
23765 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023766 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023767 }
23768 }
23769 }
23770
23771 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4_subtile) {
23772 for (uint32_t n = 5; n < 8; n++) {
23773 for (size_t k = 1; k <= 40; k += 9) {
23774 for (uint32_t m = 1; m <= 1; m++) {
23775 GemmMicrokernelTester()
23776 .mr(1)
23777 .nr(4)
23778 .kr(8)
23779 .sr(1)
23780 .m(m)
23781 .n(n)
23782 .k(k)
23783 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023784 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023785 }
23786 }
23787 }
23788 }
23789
23790 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4) {
23791 for (uint32_t n = 8; n <= 12; n += 4) {
23792 for (size_t k = 1; k <= 40; k += 9) {
23793 GemmMicrokernelTester()
23794 .mr(1)
23795 .nr(4)
23796 .kr(8)
23797 .sr(1)
23798 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023799 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023800 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023801 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023802 }
23803 }
23804 }
23805
23806 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4_strided_cn) {
23807 for (uint32_t n = 8; n <= 12; n += 4) {
23808 for (size_t k = 1; k <= 40; k += 9) {
23809 GemmMicrokernelTester()
23810 .mr(1)
23811 .nr(4)
23812 .kr(8)
23813 .sr(1)
23814 .m(1)
23815 .n(n)
23816 .k(k)
23817 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023818 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023819 }
23820 }
23821 }
23822
23823 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4_strided_a) {
23824 for (uint32_t n = 8; n <= 12; n += 4) {
23825 for (size_t k = 1; k <= 40; k += 9) {
23826 GemmMicrokernelTester()
23827 .mr(1)
23828 .nr(4)
23829 .kr(8)
23830 .sr(1)
23831 .m(1)
23832 .n(n)
23833 .k(k)
23834 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023835 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023836 }
23837 }
23838 }
23839
23840 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4_subtile) {
23841 for (uint32_t n = 8; n <= 12; n += 4) {
23842 for (size_t k = 1; k <= 40; k += 9) {
23843 for (uint32_t m = 1; m <= 1; m++) {
23844 GemmMicrokernelTester()
23845 .mr(1)
23846 .nr(4)
23847 .kr(8)
23848 .sr(1)
23849 .m(m)
23850 .n(n)
23851 .k(k)
23852 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023853 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023854 }
23855 }
23856 }
23857 }
23858
23859 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, strided_cm_subtile) {
23860 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023861 for (uint32_t n = 1; n <= 4; n++) {
23862 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023863 GemmMicrokernelTester()
23864 .mr(1)
23865 .nr(4)
23866 .kr(8)
23867 .sr(1)
23868 .m(m)
23869 .n(n)
23870 .k(k)
23871 .cm_stride(7)
23872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023873 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023874 }
23875 }
23876 }
23877 }
23878
23879 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, qmin) {
23880 GemmMicrokernelTester()
23881 .mr(1)
23882 .nr(4)
23883 .kr(8)
23884 .sr(1)
23885 .m(1)
23886 .n(4)
23887 .k(8)
23888 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023889 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023890 }
23891
23892 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, qmax) {
23893 GemmMicrokernelTester()
23894 .mr(1)
23895 .nr(4)
23896 .kr(8)
23897 .sr(1)
23898 .m(1)
23899 .n(4)
23900 .k(8)
23901 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023902 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023903 }
23904
23905 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, strided_cm) {
23906 GemmMicrokernelTester()
23907 .mr(1)
23908 .nr(4)
23909 .kr(8)
23910 .sr(1)
23911 .m(1)
23912 .n(4)
23913 .k(8)
23914 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023915 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023916 }
23917
23918 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, no_a_zero_point) {
23919 for (size_t k = 1; k <= 40; k += 9) {
23920 GemmMicrokernelTester()
23921 .mr(1)
23922 .nr(4)
23923 .kr(8)
23924 .sr(1)
23925 .m(1)
23926 .n(4)
23927 .k(k)
23928 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023929 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023930 }
23931 }
23932
23933 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, no_b_zero_point) {
23934 for (size_t k = 1; k <= 40; k += 9) {
23935 GemmMicrokernelTester()
23936 .mr(1)
23937 .nr(4)
23938 .kr(8)
23939 .sr(1)
23940 .m(1)
23941 .n(4)
23942 .k(k)
23943 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023944 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023945 }
23946 }
23947
23948 TEST(QU8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, no_zero_point) {
23949 for (size_t k = 1; k <= 40; k += 9) {
23950 GemmMicrokernelTester()
23951 .mr(1)
23952 .nr(4)
23953 .kr(8)
23954 .sr(1)
23955 .m(1)
23956 .n(4)
23957 .k(k)
23958 .a_zero_point(0)
23959 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080023960 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023961 }
23962 }
23963#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23964
23965
23966#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23967 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8) {
23968 GemmMicrokernelTester()
23969 .mr(3)
23970 .nr(4)
23971 .kr(8)
23972 .sr(1)
23973 .m(3)
23974 .n(4)
23975 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023976 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023977 }
23978
23979 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, strided_cn) {
23980 GemmMicrokernelTester()
23981 .mr(3)
23982 .nr(4)
23983 .kr(8)
23984 .sr(1)
23985 .m(3)
23986 .n(4)
23987 .k(8)
23988 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023989 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023990 }
23991
23992 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_strided_a) {
23993 GemmMicrokernelTester()
23994 .mr(3)
23995 .nr(4)
23996 .kr(8)
23997 .sr(1)
23998 .m(3)
23999 .n(4)
24000 .k(8)
24001 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024002 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024003 }
24004
24005 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024006 for (uint32_t n = 1; n <= 4; n++) {
24007 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024008 GemmMicrokernelTester()
24009 .mr(3)
24010 .nr(4)
24011 .kr(8)
24012 .sr(1)
24013 .m(m)
24014 .n(n)
24015 .k(8)
24016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024018 }
24019 }
24020 }
24021
24022 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_m) {
24023 for (uint32_t m = 1; m <= 3; m++) {
24024 GemmMicrokernelTester()
24025 .mr(3)
24026 .nr(4)
24027 .kr(8)
24028 .sr(1)
24029 .m(m)
24030 .n(4)
24031 .k(8)
24032 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024033 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024034 }
24035 }
24036
24037 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_n) {
24038 for (uint32_t n = 1; n <= 4; n++) {
24039 GemmMicrokernelTester()
24040 .mr(3)
24041 .nr(4)
24042 .kr(8)
24043 .sr(1)
24044 .m(3)
24045 .n(n)
24046 .k(8)
24047 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024048 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024049 }
24050 }
24051
24052 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_lt_8) {
24053 for (size_t k = 1; k < 8; k++) {
24054 GemmMicrokernelTester()
24055 .mr(3)
24056 .nr(4)
24057 .kr(8)
24058 .sr(1)
24059 .m(3)
24060 .n(4)
24061 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024062 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024063 }
24064 }
24065
24066 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_lt_8_strided_a) {
24067 for (size_t k = 1; k < 8; k++) {
24068 GemmMicrokernelTester()
24069 .mr(3)
24070 .nr(4)
24071 .kr(8)
24072 .sr(1)
24073 .m(3)
24074 .n(4)
24075 .k(k)
24076 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024077 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024078 }
24079 }
24080
24081 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_lt_8_subtile) {
24082 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024083 for (uint32_t n = 1; n <= 4; n++) {
24084 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024085 GemmMicrokernelTester()
24086 .mr(3)
24087 .nr(4)
24088 .kr(8)
24089 .sr(1)
24090 .m(m)
24091 .n(n)
24092 .k(k)
24093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024094 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024095 }
24096 }
24097 }
24098 }
24099
24100 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_gt_8) {
24101 for (size_t k = 9; k < 16; k++) {
24102 GemmMicrokernelTester()
24103 .mr(3)
24104 .nr(4)
24105 .kr(8)
24106 .sr(1)
24107 .m(3)
24108 .n(4)
24109 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024110 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024111 }
24112 }
24113
24114 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_gt_8_strided_a) {
24115 for (size_t k = 9; k < 16; k++) {
24116 GemmMicrokernelTester()
24117 .mr(3)
24118 .nr(4)
24119 .kr(8)
24120 .sr(1)
24121 .m(3)
24122 .n(4)
24123 .k(k)
24124 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024125 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024126 }
24127 }
24128
24129 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_gt_8_subtile) {
24130 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024131 for (uint32_t n = 1; n <= 4; n++) {
24132 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024133 GemmMicrokernelTester()
24134 .mr(3)
24135 .nr(4)
24136 .kr(8)
24137 .sr(1)
24138 .m(m)
24139 .n(n)
24140 .k(k)
24141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024142 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024143 }
24144 }
24145 }
24146 }
24147
24148 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_div_8) {
24149 for (size_t k = 16; k <= 80; k += 8) {
24150 GemmMicrokernelTester()
24151 .mr(3)
24152 .nr(4)
24153 .kr(8)
24154 .sr(1)
24155 .m(3)
24156 .n(4)
24157 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024158 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024159 }
24160 }
24161
24162 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_div_8_strided_a) {
24163 for (size_t k = 16; k <= 80; k += 8) {
24164 GemmMicrokernelTester()
24165 .mr(3)
24166 .nr(4)
24167 .kr(8)
24168 .sr(1)
24169 .m(3)
24170 .n(4)
24171 .k(k)
24172 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024173 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024174 }
24175 }
24176
24177 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_div_8_subtile) {
24178 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024179 for (uint32_t n = 1; n <= 4; n++) {
24180 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024181 GemmMicrokernelTester()
24182 .mr(3)
24183 .nr(4)
24184 .kr(8)
24185 .sr(1)
24186 .m(m)
24187 .n(n)
24188 .k(k)
24189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024190 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024191 }
24192 }
24193 }
24194 }
24195
24196 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4) {
24197 for (uint32_t n = 5; n < 8; n++) {
24198 for (size_t k = 1; k <= 40; k += 9) {
24199 GemmMicrokernelTester()
24200 .mr(3)
24201 .nr(4)
24202 .kr(8)
24203 .sr(1)
24204 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024205 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024206 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024207 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024208 }
24209 }
24210 }
24211
24212 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4_strided_cn) {
24213 for (uint32_t n = 5; n < 8; n++) {
24214 for (size_t k = 1; k <= 40; k += 9) {
24215 GemmMicrokernelTester()
24216 .mr(3)
24217 .nr(4)
24218 .kr(8)
24219 .sr(1)
24220 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024221 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024222 .k(k)
24223 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024224 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024225 }
24226 }
24227 }
24228
24229 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4_strided_a) {
24230 for (uint32_t n = 5; n < 8; n++) {
24231 for (size_t k = 1; k <= 40; k += 9) {
24232 GemmMicrokernelTester()
24233 .mr(3)
24234 .nr(4)
24235 .kr(8)
24236 .sr(1)
24237 .m(3)
24238 .n(n)
24239 .k(k)
24240 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024241 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024242 }
24243 }
24244 }
24245
24246 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4_subtile) {
24247 for (uint32_t n = 5; n < 8; n++) {
24248 for (size_t k = 1; k <= 40; k += 9) {
24249 for (uint32_t m = 1; m <= 3; m++) {
24250 GemmMicrokernelTester()
24251 .mr(3)
24252 .nr(4)
24253 .kr(8)
24254 .sr(1)
24255 .m(m)
24256 .n(n)
24257 .k(k)
24258 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024259 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024260 }
24261 }
24262 }
24263 }
24264
24265 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4) {
24266 for (uint32_t n = 8; n <= 12; n += 4) {
24267 for (size_t k = 1; k <= 40; k += 9) {
24268 GemmMicrokernelTester()
24269 .mr(3)
24270 .nr(4)
24271 .kr(8)
24272 .sr(1)
24273 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024274 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024275 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024276 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024277 }
24278 }
24279 }
24280
24281 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4_strided_cn) {
24282 for (uint32_t n = 8; n <= 12; n += 4) {
24283 for (size_t k = 1; k <= 40; k += 9) {
24284 GemmMicrokernelTester()
24285 .mr(3)
24286 .nr(4)
24287 .kr(8)
24288 .sr(1)
24289 .m(3)
24290 .n(n)
24291 .k(k)
24292 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024293 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024294 }
24295 }
24296 }
24297
24298 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4_strided_a) {
24299 for (uint32_t n = 8; n <= 12; n += 4) {
24300 for (size_t k = 1; k <= 40; k += 9) {
24301 GemmMicrokernelTester()
24302 .mr(3)
24303 .nr(4)
24304 .kr(8)
24305 .sr(1)
24306 .m(3)
24307 .n(n)
24308 .k(k)
24309 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024310 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024311 }
24312 }
24313 }
24314
24315 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4_subtile) {
24316 for (uint32_t n = 8; n <= 12; n += 4) {
24317 for (size_t k = 1; k <= 40; k += 9) {
24318 for (uint32_t m = 1; m <= 3; m++) {
24319 GemmMicrokernelTester()
24320 .mr(3)
24321 .nr(4)
24322 .kr(8)
24323 .sr(1)
24324 .m(m)
24325 .n(n)
24326 .k(k)
24327 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024328 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024329 }
24330 }
24331 }
24332 }
24333
24334 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, strided_cm_subtile) {
24335 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024336 for (uint32_t n = 1; n <= 4; n++) {
24337 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024338 GemmMicrokernelTester()
24339 .mr(3)
24340 .nr(4)
24341 .kr(8)
24342 .sr(1)
24343 .m(m)
24344 .n(n)
24345 .k(k)
24346 .cm_stride(7)
24347 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024348 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024349 }
24350 }
24351 }
24352 }
24353
24354 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, qmin) {
24355 GemmMicrokernelTester()
24356 .mr(3)
24357 .nr(4)
24358 .kr(8)
24359 .sr(1)
24360 .m(3)
24361 .n(4)
24362 .k(8)
24363 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024364 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024365 }
24366
24367 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, qmax) {
24368 GemmMicrokernelTester()
24369 .mr(3)
24370 .nr(4)
24371 .kr(8)
24372 .sr(1)
24373 .m(3)
24374 .n(4)
24375 .k(8)
24376 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024377 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024378 }
24379
24380 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, strided_cm) {
24381 GemmMicrokernelTester()
24382 .mr(3)
24383 .nr(4)
24384 .kr(8)
24385 .sr(1)
24386 .m(3)
24387 .n(4)
24388 .k(8)
24389 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024390 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024391 }
24392
24393 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, no_a_zero_point) {
24394 for (size_t k = 1; k <= 40; k += 9) {
24395 GemmMicrokernelTester()
24396 .mr(3)
24397 .nr(4)
24398 .kr(8)
24399 .sr(1)
24400 .m(3)
24401 .n(4)
24402 .k(k)
24403 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080024404 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024405 }
24406 }
24407
24408 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, no_b_zero_point) {
24409 for (size_t k = 1; k <= 40; k += 9) {
24410 GemmMicrokernelTester()
24411 .mr(3)
24412 .nr(4)
24413 .kr(8)
24414 .sr(1)
24415 .m(3)
24416 .n(4)
24417 .k(k)
24418 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080024419 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024420 }
24421 }
24422
24423 TEST(QU8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, no_zero_point) {
24424 for (size_t k = 1; k <= 40; k += 9) {
24425 GemmMicrokernelTester()
24426 .mr(3)
24427 .nr(4)
24428 .kr(8)
24429 .sr(1)
24430 .m(3)
24431 .n(4)
24432 .k(k)
24433 .a_zero_point(0)
24434 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080024435 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024436 }
24437 }
24438#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24439
24440
24441#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24442 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
24443 GemmMicrokernelTester()
24444 .mr(1)
24445 .nr(2)
24446 .kr(1)
24447 .sr(1)
24448 .m(1)
24449 .n(2)
24450 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024451 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024452 }
24453
24454 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
24455 GemmMicrokernelTester()
24456 .mr(1)
24457 .nr(2)
24458 .kr(1)
24459 .sr(1)
24460 .m(1)
24461 .n(2)
24462 .k(1)
24463 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080024464 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024465 }
24466
24467 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_strided_a) {
24468 GemmMicrokernelTester()
24469 .mr(1)
24470 .nr(2)
24471 .kr(1)
24472 .sr(1)
24473 .m(1)
24474 .n(2)
24475 .k(1)
24476 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024477 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024478 }
24479
24480 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024481 for (uint32_t n = 1; n <= 2; n++) {
24482 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024483 GemmMicrokernelTester()
24484 .mr(1)
24485 .nr(2)
24486 .kr(1)
24487 .sr(1)
24488 .m(m)
24489 .n(n)
24490 .k(1)
24491 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024492 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024493 }
24494 }
24495 }
24496
24497 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
24498 for (uint32_t m = 1; m <= 1; m++) {
24499 GemmMicrokernelTester()
24500 .mr(1)
24501 .nr(2)
24502 .kr(1)
24503 .sr(1)
24504 .m(m)
24505 .n(2)
24506 .k(1)
24507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024509 }
24510 }
24511
24512 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
24513 for (uint32_t n = 1; n <= 2; n++) {
24514 GemmMicrokernelTester()
24515 .mr(1)
24516 .nr(2)
24517 .kr(1)
24518 .sr(1)
24519 .m(1)
24520 .n(n)
24521 .k(1)
24522 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024523 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024524 }
24525 }
24526
24527 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
24528 for (size_t k = 2; k < 10; k++) {
24529 GemmMicrokernelTester()
24530 .mr(1)
24531 .nr(2)
24532 .kr(1)
24533 .sr(1)
24534 .m(1)
24535 .n(2)
24536 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024537 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024538 }
24539 }
24540
24541 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_strided_a) {
24542 for (size_t k = 2; k < 10; k++) {
24543 GemmMicrokernelTester()
24544 .mr(1)
24545 .nr(2)
24546 .kr(1)
24547 .sr(1)
24548 .m(1)
24549 .n(2)
24550 .k(k)
24551 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024552 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024553 }
24554 }
24555
24556 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
24557 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024558 for (uint32_t n = 1; n <= 2; n++) {
24559 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024560 GemmMicrokernelTester()
24561 .mr(1)
24562 .nr(2)
24563 .kr(1)
24564 .sr(1)
24565 .m(m)
24566 .n(n)
24567 .k(k)
24568 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024569 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024570 }
24571 }
24572 }
24573 }
24574
24575 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
24576 for (uint32_t n = 3; n < 4; n++) {
24577 for (size_t k = 1; k <= 5; k += 2) {
24578 GemmMicrokernelTester()
24579 .mr(1)
24580 .nr(2)
24581 .kr(1)
24582 .sr(1)
24583 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024584 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024585 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024586 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024587 }
24588 }
24589 }
24590
24591 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
24592 for (uint32_t n = 3; n < 4; n++) {
24593 for (size_t k = 1; k <= 5; k += 2) {
24594 GemmMicrokernelTester()
24595 .mr(1)
24596 .nr(2)
24597 .kr(1)
24598 .sr(1)
24599 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024600 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024601 .k(k)
24602 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080024603 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024604 }
24605 }
24606 }
24607
24608 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_a) {
24609 for (uint32_t n = 3; n < 4; n++) {
24610 for (size_t k = 1; k <= 5; k += 2) {
24611 GemmMicrokernelTester()
24612 .mr(1)
24613 .nr(2)
24614 .kr(1)
24615 .sr(1)
24616 .m(1)
24617 .n(n)
24618 .k(k)
24619 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024620 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024621 }
24622 }
24623 }
24624
24625 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
24626 for (uint32_t n = 3; n < 4; n++) {
24627 for (size_t k = 1; k <= 5; k += 2) {
24628 for (uint32_t m = 1; m <= 1; m++) {
24629 GemmMicrokernelTester()
24630 .mr(1)
24631 .nr(2)
24632 .kr(1)
24633 .sr(1)
24634 .m(m)
24635 .n(n)
24636 .k(k)
24637 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024638 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024639 }
24640 }
24641 }
24642 }
24643
24644 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
24645 for (uint32_t n = 4; n <= 6; n += 2) {
24646 for (size_t k = 1; k <= 5; k += 2) {
24647 GemmMicrokernelTester()
24648 .mr(1)
24649 .nr(2)
24650 .kr(1)
24651 .sr(1)
24652 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024653 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024654 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024655 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024656 }
24657 }
24658 }
24659
24660 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
24661 for (uint32_t n = 4; n <= 6; n += 2) {
24662 for (size_t k = 1; k <= 5; k += 2) {
24663 GemmMicrokernelTester()
24664 .mr(1)
24665 .nr(2)
24666 .kr(1)
24667 .sr(1)
24668 .m(1)
24669 .n(n)
24670 .k(k)
24671 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080024672 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024673 }
24674 }
24675 }
24676
24677 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_a) {
24678 for (uint32_t n = 4; n <= 6; n += 2) {
24679 for (size_t k = 1; k <= 5; k += 2) {
24680 GemmMicrokernelTester()
24681 .mr(1)
24682 .nr(2)
24683 .kr(1)
24684 .sr(1)
24685 .m(1)
24686 .n(n)
24687 .k(k)
24688 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024689 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024690 }
24691 }
24692 }
24693
24694 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
24695 for (uint32_t n = 4; n <= 6; n += 2) {
24696 for (size_t k = 1; k <= 5; k += 2) {
24697 for (uint32_t m = 1; m <= 1; m++) {
24698 GemmMicrokernelTester()
24699 .mr(1)
24700 .nr(2)
24701 .kr(1)
24702 .sr(1)
24703 .m(m)
24704 .n(n)
24705 .k(k)
24706 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024707 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024708 }
24709 }
24710 }
24711 }
24712
24713 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
24714 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024715 for (uint32_t n = 1; n <= 2; n++) {
24716 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024717 GemmMicrokernelTester()
24718 .mr(1)
24719 .nr(2)
24720 .kr(1)
24721 .sr(1)
24722 .m(m)
24723 .n(n)
24724 .k(k)
24725 .cm_stride(5)
24726 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024727 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024728 }
24729 }
24730 }
24731 }
24732
24733 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
24734 GemmMicrokernelTester()
24735 .mr(1)
24736 .nr(2)
24737 .kr(1)
24738 .sr(1)
24739 .m(1)
24740 .n(2)
24741 .k(1)
24742 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024743 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024744 }
24745
24746 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
24747 GemmMicrokernelTester()
24748 .mr(1)
24749 .nr(2)
24750 .kr(1)
24751 .sr(1)
24752 .m(1)
24753 .n(2)
24754 .k(1)
24755 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024756 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024757 }
24758
24759 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
24760 GemmMicrokernelTester()
24761 .mr(1)
24762 .nr(2)
24763 .kr(1)
24764 .sr(1)
24765 .m(1)
24766 .n(2)
24767 .k(1)
24768 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080024769 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024770 }
24771
24772 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_a_zero_point) {
24773 for (size_t k = 1; k <= 5; k += 2) {
24774 GemmMicrokernelTester()
24775 .mr(1)
24776 .nr(2)
24777 .kr(1)
24778 .sr(1)
24779 .m(1)
24780 .n(2)
24781 .k(k)
24782 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080024783 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024784 }
24785 }
24786
24787 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_b_zero_point) {
24788 for (size_t k = 1; k <= 5; k += 2) {
24789 GemmMicrokernelTester()
24790 .mr(1)
24791 .nr(2)
24792 .kr(1)
24793 .sr(1)
24794 .m(1)
24795 .n(2)
24796 .k(k)
24797 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080024798 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024799 }
24800 }
24801
24802 TEST(QU8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_zero_point) {
24803 for (size_t k = 1; k <= 5; k += 2) {
24804 GemmMicrokernelTester()
24805 .mr(1)
24806 .nr(2)
24807 .kr(1)
24808 .sr(1)
24809 .m(1)
24810 .n(2)
24811 .k(k)
24812 .a_zero_point(0)
24813 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080024814 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024815 }
24816 }
24817#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24818
24819
24820#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24821 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
24822 GemmMicrokernelTester()
24823 .mr(2)
24824 .nr(2)
24825 .kr(1)
24826 .sr(1)
24827 .m(2)
24828 .n(2)
24829 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024830 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024831 }
24832
24833 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
24834 GemmMicrokernelTester()
24835 .mr(2)
24836 .nr(2)
24837 .kr(1)
24838 .sr(1)
24839 .m(2)
24840 .n(2)
24841 .k(1)
24842 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080024843 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024844 }
24845
24846 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_strided_a) {
24847 GemmMicrokernelTester()
24848 .mr(2)
24849 .nr(2)
24850 .kr(1)
24851 .sr(1)
24852 .m(2)
24853 .n(2)
24854 .k(1)
24855 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024856 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024857 }
24858
24859 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024860 for (uint32_t n = 1; n <= 2; n++) {
24861 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024862 GemmMicrokernelTester()
24863 .mr(2)
24864 .nr(2)
24865 .kr(1)
24866 .sr(1)
24867 .m(m)
24868 .n(n)
24869 .k(1)
24870 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024871 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024872 }
24873 }
24874 }
24875
24876 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
24877 for (uint32_t m = 1; m <= 2; m++) {
24878 GemmMicrokernelTester()
24879 .mr(2)
24880 .nr(2)
24881 .kr(1)
24882 .sr(1)
24883 .m(m)
24884 .n(2)
24885 .k(1)
24886 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024887 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024888 }
24889 }
24890
24891 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
24892 for (uint32_t n = 1; n <= 2; n++) {
24893 GemmMicrokernelTester()
24894 .mr(2)
24895 .nr(2)
24896 .kr(1)
24897 .sr(1)
24898 .m(2)
24899 .n(n)
24900 .k(1)
24901 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024902 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024903 }
24904 }
24905
24906 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
24907 for (size_t k = 2; k < 10; k++) {
24908 GemmMicrokernelTester()
24909 .mr(2)
24910 .nr(2)
24911 .kr(1)
24912 .sr(1)
24913 .m(2)
24914 .n(2)
24915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024916 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024917 }
24918 }
24919
24920 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_strided_a) {
24921 for (size_t k = 2; k < 10; k++) {
24922 GemmMicrokernelTester()
24923 .mr(2)
24924 .nr(2)
24925 .kr(1)
24926 .sr(1)
24927 .m(2)
24928 .n(2)
24929 .k(k)
24930 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024931 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024932 }
24933 }
24934
24935 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
24936 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024937 for (uint32_t n = 1; n <= 2; n++) {
24938 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024939 GemmMicrokernelTester()
24940 .mr(2)
24941 .nr(2)
24942 .kr(1)
24943 .sr(1)
24944 .m(m)
24945 .n(n)
24946 .k(k)
24947 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024948 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024949 }
24950 }
24951 }
24952 }
24953
24954 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
24955 for (uint32_t n = 3; n < 4; n++) {
24956 for (size_t k = 1; k <= 5; k += 2) {
24957 GemmMicrokernelTester()
24958 .mr(2)
24959 .nr(2)
24960 .kr(1)
24961 .sr(1)
24962 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024963 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024964 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024965 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024966 }
24967 }
24968 }
24969
24970 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
24971 for (uint32_t n = 3; n < 4; n++) {
24972 for (size_t k = 1; k <= 5; k += 2) {
24973 GemmMicrokernelTester()
24974 .mr(2)
24975 .nr(2)
24976 .kr(1)
24977 .sr(1)
24978 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024979 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024980 .k(k)
24981 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080024982 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024983 }
24984 }
24985 }
24986
24987 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_a) {
24988 for (uint32_t n = 3; n < 4; n++) {
24989 for (size_t k = 1; k <= 5; k += 2) {
24990 GemmMicrokernelTester()
24991 .mr(2)
24992 .nr(2)
24993 .kr(1)
24994 .sr(1)
24995 .m(2)
24996 .n(n)
24997 .k(k)
24998 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024999 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025000 }
25001 }
25002 }
25003
25004 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
25005 for (uint32_t n = 3; n < 4; n++) {
25006 for (size_t k = 1; k <= 5; k += 2) {
25007 for (uint32_t m = 1; m <= 2; m++) {
25008 GemmMicrokernelTester()
25009 .mr(2)
25010 .nr(2)
25011 .kr(1)
25012 .sr(1)
25013 .m(m)
25014 .n(n)
25015 .k(k)
25016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025017 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025018 }
25019 }
25020 }
25021 }
25022
25023 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
25024 for (uint32_t n = 4; n <= 6; n += 2) {
25025 for (size_t k = 1; k <= 5; k += 2) {
25026 GemmMicrokernelTester()
25027 .mr(2)
25028 .nr(2)
25029 .kr(1)
25030 .sr(1)
25031 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025032 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025033 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025034 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025035 }
25036 }
25037 }
25038
25039 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
25040 for (uint32_t n = 4; n <= 6; n += 2) {
25041 for (size_t k = 1; k <= 5; k += 2) {
25042 GemmMicrokernelTester()
25043 .mr(2)
25044 .nr(2)
25045 .kr(1)
25046 .sr(1)
25047 .m(2)
25048 .n(n)
25049 .k(k)
25050 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080025051 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025052 }
25053 }
25054 }
25055
25056 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_a) {
25057 for (uint32_t n = 4; n <= 6; n += 2) {
25058 for (size_t k = 1; k <= 5; k += 2) {
25059 GemmMicrokernelTester()
25060 .mr(2)
25061 .nr(2)
25062 .kr(1)
25063 .sr(1)
25064 .m(2)
25065 .n(n)
25066 .k(k)
25067 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025069 }
25070 }
25071 }
25072
25073 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
25074 for (uint32_t n = 4; n <= 6; n += 2) {
25075 for (size_t k = 1; k <= 5; k += 2) {
25076 for (uint32_t m = 1; m <= 2; m++) {
25077 GemmMicrokernelTester()
25078 .mr(2)
25079 .nr(2)
25080 .kr(1)
25081 .sr(1)
25082 .m(m)
25083 .n(n)
25084 .k(k)
25085 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025086 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025087 }
25088 }
25089 }
25090 }
25091
25092 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
25093 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025094 for (uint32_t n = 1; n <= 2; n++) {
25095 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025096 GemmMicrokernelTester()
25097 .mr(2)
25098 .nr(2)
25099 .kr(1)
25100 .sr(1)
25101 .m(m)
25102 .n(n)
25103 .k(k)
25104 .cm_stride(5)
25105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025106 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025107 }
25108 }
25109 }
25110 }
25111
25112 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
25113 GemmMicrokernelTester()
25114 .mr(2)
25115 .nr(2)
25116 .kr(1)
25117 .sr(1)
25118 .m(2)
25119 .n(2)
25120 .k(1)
25121 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025122 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025123 }
25124
25125 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
25126 GemmMicrokernelTester()
25127 .mr(2)
25128 .nr(2)
25129 .kr(1)
25130 .sr(1)
25131 .m(2)
25132 .n(2)
25133 .k(1)
25134 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025135 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025136 }
25137
25138 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
25139 GemmMicrokernelTester()
25140 .mr(2)
25141 .nr(2)
25142 .kr(1)
25143 .sr(1)
25144 .m(2)
25145 .n(2)
25146 .k(1)
25147 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080025148 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025149 }
25150
25151 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_a_zero_point) {
25152 for (size_t k = 1; k <= 5; k += 2) {
25153 GemmMicrokernelTester()
25154 .mr(2)
25155 .nr(2)
25156 .kr(1)
25157 .sr(1)
25158 .m(2)
25159 .n(2)
25160 .k(k)
25161 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025162 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025163 }
25164 }
25165
25166 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_b_zero_point) {
25167 for (size_t k = 1; k <= 5; k += 2) {
25168 GemmMicrokernelTester()
25169 .mr(2)
25170 .nr(2)
25171 .kr(1)
25172 .sr(1)
25173 .m(2)
25174 .n(2)
25175 .k(k)
25176 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025177 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025178 }
25179 }
25180
25181 TEST(QU8_GEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_zero_point) {
25182 for (size_t k = 1; k <= 5; k += 2) {
25183 GemmMicrokernelTester()
25184 .mr(2)
25185 .nr(2)
25186 .kr(1)
25187 .sr(1)
25188 .m(2)
25189 .n(2)
25190 .k(k)
25191 .a_zero_point(0)
25192 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025193 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025194 }
25195 }
25196#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25197
25198
25199#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25200 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
25201 GemmMicrokernelTester()
25202 .mr(4)
25203 .nr(2)
25204 .kr(1)
25205 .sr(1)
25206 .m(4)
25207 .n(2)
25208 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025210 }
25211
25212 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
25213 GemmMicrokernelTester()
25214 .mr(4)
25215 .nr(2)
25216 .kr(1)
25217 .sr(1)
25218 .m(4)
25219 .n(2)
25220 .k(1)
25221 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080025222 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025223 }
25224
25225 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_strided_a) {
25226 GemmMicrokernelTester()
25227 .mr(4)
25228 .nr(2)
25229 .kr(1)
25230 .sr(1)
25231 .m(4)
25232 .n(2)
25233 .k(1)
25234 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025235 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025236 }
25237
25238 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025239 for (uint32_t n = 1; n <= 2; n++) {
25240 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025241 GemmMicrokernelTester()
25242 .mr(4)
25243 .nr(2)
25244 .kr(1)
25245 .sr(1)
25246 .m(m)
25247 .n(n)
25248 .k(1)
25249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025250 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025251 }
25252 }
25253 }
25254
25255 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
25256 for (uint32_t m = 1; m <= 4; m++) {
25257 GemmMicrokernelTester()
25258 .mr(4)
25259 .nr(2)
25260 .kr(1)
25261 .sr(1)
25262 .m(m)
25263 .n(2)
25264 .k(1)
25265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025266 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025267 }
25268 }
25269
25270 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
25271 for (uint32_t n = 1; n <= 2; n++) {
25272 GemmMicrokernelTester()
25273 .mr(4)
25274 .nr(2)
25275 .kr(1)
25276 .sr(1)
25277 .m(4)
25278 .n(n)
25279 .k(1)
25280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025281 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025282 }
25283 }
25284
25285 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
25286 for (size_t k = 2; k < 10; k++) {
25287 GemmMicrokernelTester()
25288 .mr(4)
25289 .nr(2)
25290 .kr(1)
25291 .sr(1)
25292 .m(4)
25293 .n(2)
25294 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025296 }
25297 }
25298
25299 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_strided_a) {
25300 for (size_t k = 2; k < 10; k++) {
25301 GemmMicrokernelTester()
25302 .mr(4)
25303 .nr(2)
25304 .kr(1)
25305 .sr(1)
25306 .m(4)
25307 .n(2)
25308 .k(k)
25309 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025310 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025311 }
25312 }
25313
25314 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
25315 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025316 for (uint32_t n = 1; n <= 2; n++) {
25317 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025318 GemmMicrokernelTester()
25319 .mr(4)
25320 .nr(2)
25321 .kr(1)
25322 .sr(1)
25323 .m(m)
25324 .n(n)
25325 .k(k)
25326 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025327 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025328 }
25329 }
25330 }
25331 }
25332
25333 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
25334 for (uint32_t n = 3; n < 4; n++) {
25335 for (size_t k = 1; k <= 5; k += 2) {
25336 GemmMicrokernelTester()
25337 .mr(4)
25338 .nr(2)
25339 .kr(1)
25340 .sr(1)
25341 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025342 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025343 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025344 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025345 }
25346 }
25347 }
25348
25349 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
25350 for (uint32_t n = 3; n < 4; n++) {
25351 for (size_t k = 1; k <= 5; k += 2) {
25352 GemmMicrokernelTester()
25353 .mr(4)
25354 .nr(2)
25355 .kr(1)
25356 .sr(1)
25357 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025358 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025359 .k(k)
25360 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080025361 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025362 }
25363 }
25364 }
25365
25366 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_a) {
25367 for (uint32_t n = 3; n < 4; n++) {
25368 for (size_t k = 1; k <= 5; k += 2) {
25369 GemmMicrokernelTester()
25370 .mr(4)
25371 .nr(2)
25372 .kr(1)
25373 .sr(1)
25374 .m(4)
25375 .n(n)
25376 .k(k)
25377 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025378 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025379 }
25380 }
25381 }
25382
25383 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
25384 for (uint32_t n = 3; n < 4; n++) {
25385 for (size_t k = 1; k <= 5; k += 2) {
25386 for (uint32_t m = 1; m <= 4; m++) {
25387 GemmMicrokernelTester()
25388 .mr(4)
25389 .nr(2)
25390 .kr(1)
25391 .sr(1)
25392 .m(m)
25393 .n(n)
25394 .k(k)
25395 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025396 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025397 }
25398 }
25399 }
25400 }
25401
25402 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
25403 for (uint32_t n = 4; n <= 6; n += 2) {
25404 for (size_t k = 1; k <= 5; k += 2) {
25405 GemmMicrokernelTester()
25406 .mr(4)
25407 .nr(2)
25408 .kr(1)
25409 .sr(1)
25410 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025411 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025412 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025413 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025414 }
25415 }
25416 }
25417
25418 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
25419 for (uint32_t n = 4; n <= 6; n += 2) {
25420 for (size_t k = 1; k <= 5; k += 2) {
25421 GemmMicrokernelTester()
25422 .mr(4)
25423 .nr(2)
25424 .kr(1)
25425 .sr(1)
25426 .m(4)
25427 .n(n)
25428 .k(k)
25429 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080025430 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025431 }
25432 }
25433 }
25434
25435 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_a) {
25436 for (uint32_t n = 4; n <= 6; n += 2) {
25437 for (size_t k = 1; k <= 5; k += 2) {
25438 GemmMicrokernelTester()
25439 .mr(4)
25440 .nr(2)
25441 .kr(1)
25442 .sr(1)
25443 .m(4)
25444 .n(n)
25445 .k(k)
25446 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025447 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025448 }
25449 }
25450 }
25451
25452 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
25453 for (uint32_t n = 4; n <= 6; n += 2) {
25454 for (size_t k = 1; k <= 5; k += 2) {
25455 for (uint32_t m = 1; m <= 4; m++) {
25456 GemmMicrokernelTester()
25457 .mr(4)
25458 .nr(2)
25459 .kr(1)
25460 .sr(1)
25461 .m(m)
25462 .n(n)
25463 .k(k)
25464 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025465 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025466 }
25467 }
25468 }
25469 }
25470
25471 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
25472 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025473 for (uint32_t n = 1; n <= 2; n++) {
25474 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025475 GemmMicrokernelTester()
25476 .mr(4)
25477 .nr(2)
25478 .kr(1)
25479 .sr(1)
25480 .m(m)
25481 .n(n)
25482 .k(k)
25483 .cm_stride(5)
25484 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025485 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025486 }
25487 }
25488 }
25489 }
25490
25491 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
25492 GemmMicrokernelTester()
25493 .mr(4)
25494 .nr(2)
25495 .kr(1)
25496 .sr(1)
25497 .m(4)
25498 .n(2)
25499 .k(1)
25500 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025501 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025502 }
25503
25504 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
25505 GemmMicrokernelTester()
25506 .mr(4)
25507 .nr(2)
25508 .kr(1)
25509 .sr(1)
25510 .m(4)
25511 .n(2)
25512 .k(1)
25513 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025514 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025515 }
25516
25517 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
25518 GemmMicrokernelTester()
25519 .mr(4)
25520 .nr(2)
25521 .kr(1)
25522 .sr(1)
25523 .m(4)
25524 .n(2)
25525 .k(1)
25526 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080025527 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025528 }
25529
25530 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_a_zero_point) {
25531 for (size_t k = 1; k <= 5; k += 2) {
25532 GemmMicrokernelTester()
25533 .mr(4)
25534 .nr(2)
25535 .kr(1)
25536 .sr(1)
25537 .m(4)
25538 .n(2)
25539 .k(k)
25540 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025541 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025542 }
25543 }
25544
25545 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_b_zero_point) {
25546 for (size_t k = 1; k <= 5; k += 2) {
25547 GemmMicrokernelTester()
25548 .mr(4)
25549 .nr(2)
25550 .kr(1)
25551 .sr(1)
25552 .m(4)
25553 .n(2)
25554 .k(k)
25555 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025556 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025557 }
25558 }
25559
25560 TEST(QU8_GEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_zero_point) {
25561 for (size_t k = 1; k <= 5; k += 2) {
25562 GemmMicrokernelTester()
25563 .mr(4)
25564 .nr(2)
25565 .kr(1)
25566 .sr(1)
25567 .m(4)
25568 .n(2)
25569 .k(k)
25570 .a_zero_point(0)
25571 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025572 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025573 }
25574 }
25575#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25576
25577
25578#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25579 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
25580 GemmMicrokernelTester()
25581 .mr(1)
25582 .nr(4)
25583 .kr(1)
25584 .sr(1)
25585 .m(1)
25586 .n(4)
25587 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025588 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025589 }
25590
25591 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
25592 GemmMicrokernelTester()
25593 .mr(1)
25594 .nr(4)
25595 .kr(1)
25596 .sr(1)
25597 .m(1)
25598 .n(4)
25599 .k(1)
25600 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025601 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025602 }
25603
25604 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_strided_a) {
25605 GemmMicrokernelTester()
25606 .mr(1)
25607 .nr(4)
25608 .kr(1)
25609 .sr(1)
25610 .m(1)
25611 .n(4)
25612 .k(1)
25613 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025614 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025615 }
25616
25617 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025618 for (uint32_t n = 1; n <= 4; n++) {
25619 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025620 GemmMicrokernelTester()
25621 .mr(1)
25622 .nr(4)
25623 .kr(1)
25624 .sr(1)
25625 .m(m)
25626 .n(n)
25627 .k(1)
25628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025629 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025630 }
25631 }
25632 }
25633
25634 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
25635 for (uint32_t m = 1; m <= 1; m++) {
25636 GemmMicrokernelTester()
25637 .mr(1)
25638 .nr(4)
25639 .kr(1)
25640 .sr(1)
25641 .m(m)
25642 .n(4)
25643 .k(1)
25644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025645 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025646 }
25647 }
25648
25649 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
25650 for (uint32_t n = 1; n <= 4; n++) {
25651 GemmMicrokernelTester()
25652 .mr(1)
25653 .nr(4)
25654 .kr(1)
25655 .sr(1)
25656 .m(1)
25657 .n(n)
25658 .k(1)
25659 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025660 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025661 }
25662 }
25663
25664 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
25665 for (size_t k = 2; k < 10; k++) {
25666 GemmMicrokernelTester()
25667 .mr(1)
25668 .nr(4)
25669 .kr(1)
25670 .sr(1)
25671 .m(1)
25672 .n(4)
25673 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025674 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025675 }
25676 }
25677
25678 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_strided_a) {
25679 for (size_t k = 2; k < 10; k++) {
25680 GemmMicrokernelTester()
25681 .mr(1)
25682 .nr(4)
25683 .kr(1)
25684 .sr(1)
25685 .m(1)
25686 .n(4)
25687 .k(k)
25688 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025689 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025690 }
25691 }
25692
25693 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
25694 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025695 for (uint32_t n = 1; n <= 4; n++) {
25696 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025697 GemmMicrokernelTester()
25698 .mr(1)
25699 .nr(4)
25700 .kr(1)
25701 .sr(1)
25702 .m(m)
25703 .n(n)
25704 .k(k)
25705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025706 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025707 }
25708 }
25709 }
25710 }
25711
25712 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
25713 for (uint32_t n = 5; n < 8; n++) {
25714 for (size_t k = 1; k <= 5; k += 2) {
25715 GemmMicrokernelTester()
25716 .mr(1)
25717 .nr(4)
25718 .kr(1)
25719 .sr(1)
25720 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025721 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025722 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025723 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025724 }
25725 }
25726 }
25727
25728 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
25729 for (uint32_t n = 5; n < 8; n++) {
25730 for (size_t k = 1; k <= 5; k += 2) {
25731 GemmMicrokernelTester()
25732 .mr(1)
25733 .nr(4)
25734 .kr(1)
25735 .sr(1)
25736 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025737 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025738 .k(k)
25739 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025740 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025741 }
25742 }
25743 }
25744
25745 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_a) {
25746 for (uint32_t n = 5; n < 8; n++) {
25747 for (size_t k = 1; k <= 5; k += 2) {
25748 GemmMicrokernelTester()
25749 .mr(1)
25750 .nr(4)
25751 .kr(1)
25752 .sr(1)
25753 .m(1)
25754 .n(n)
25755 .k(k)
25756 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025757 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025758 }
25759 }
25760 }
25761
25762 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
25763 for (uint32_t n = 5; n < 8; n++) {
25764 for (size_t k = 1; k <= 5; k += 2) {
25765 for (uint32_t m = 1; m <= 1; m++) {
25766 GemmMicrokernelTester()
25767 .mr(1)
25768 .nr(4)
25769 .kr(1)
25770 .sr(1)
25771 .m(m)
25772 .n(n)
25773 .k(k)
25774 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025775 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025776 }
25777 }
25778 }
25779 }
25780
25781 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
25782 for (uint32_t n = 8; n <= 12; n += 4) {
25783 for (size_t k = 1; k <= 5; k += 2) {
25784 GemmMicrokernelTester()
25785 .mr(1)
25786 .nr(4)
25787 .kr(1)
25788 .sr(1)
25789 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025790 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025791 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025792 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025793 }
25794 }
25795 }
25796
25797 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
25798 for (uint32_t n = 8; n <= 12; n += 4) {
25799 for (size_t k = 1; k <= 5; k += 2) {
25800 GemmMicrokernelTester()
25801 .mr(1)
25802 .nr(4)
25803 .kr(1)
25804 .sr(1)
25805 .m(1)
25806 .n(n)
25807 .k(k)
25808 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025809 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025810 }
25811 }
25812 }
25813
25814 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_a) {
25815 for (uint32_t n = 8; n <= 12; n += 4) {
25816 for (size_t k = 1; k <= 5; k += 2) {
25817 GemmMicrokernelTester()
25818 .mr(1)
25819 .nr(4)
25820 .kr(1)
25821 .sr(1)
25822 .m(1)
25823 .n(n)
25824 .k(k)
25825 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025826 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025827 }
25828 }
25829 }
25830
25831 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
25832 for (uint32_t n = 8; n <= 12; n += 4) {
25833 for (size_t k = 1; k <= 5; k += 2) {
25834 for (uint32_t m = 1; m <= 1; m++) {
25835 GemmMicrokernelTester()
25836 .mr(1)
25837 .nr(4)
25838 .kr(1)
25839 .sr(1)
25840 .m(m)
25841 .n(n)
25842 .k(k)
25843 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025844 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025845 }
25846 }
25847 }
25848 }
25849
25850 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
25851 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025852 for (uint32_t n = 1; n <= 4; n++) {
25853 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025854 GemmMicrokernelTester()
25855 .mr(1)
25856 .nr(4)
25857 .kr(1)
25858 .sr(1)
25859 .m(m)
25860 .n(n)
25861 .k(k)
25862 .cm_stride(7)
25863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025864 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025865 }
25866 }
25867 }
25868 }
25869
25870 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
25871 GemmMicrokernelTester()
25872 .mr(1)
25873 .nr(4)
25874 .kr(1)
25875 .sr(1)
25876 .m(1)
25877 .n(4)
25878 .k(1)
25879 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025880 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025881 }
25882
25883 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
25884 GemmMicrokernelTester()
25885 .mr(1)
25886 .nr(4)
25887 .kr(1)
25888 .sr(1)
25889 .m(1)
25890 .n(4)
25891 .k(1)
25892 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025893 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025894 }
25895
25896 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
25897 GemmMicrokernelTester()
25898 .mr(1)
25899 .nr(4)
25900 .kr(1)
25901 .sr(1)
25902 .m(1)
25903 .n(4)
25904 .k(1)
25905 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025906 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025907 }
25908
25909 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_a_zero_point) {
25910 for (size_t k = 1; k <= 5; k += 2) {
25911 GemmMicrokernelTester()
25912 .mr(1)
25913 .nr(4)
25914 .kr(1)
25915 .sr(1)
25916 .m(1)
25917 .n(4)
25918 .k(k)
25919 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025920 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025921 }
25922 }
25923
25924 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_b_zero_point) {
25925 for (size_t k = 1; k <= 5; k += 2) {
25926 GemmMicrokernelTester()
25927 .mr(1)
25928 .nr(4)
25929 .kr(1)
25930 .sr(1)
25931 .m(1)
25932 .n(4)
25933 .k(k)
25934 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025935 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025936 }
25937 }
25938
25939 TEST(QU8_GEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_zero_point) {
25940 for (size_t k = 1; k <= 5; k += 2) {
25941 GemmMicrokernelTester()
25942 .mr(1)
25943 .nr(4)
25944 .kr(1)
25945 .sr(1)
25946 .m(1)
25947 .n(4)
25948 .k(k)
25949 .a_zero_point(0)
25950 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025951 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025952 }
25953 }
25954#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25955
25956
25957#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25958 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
25959 GemmMicrokernelTester()
25960 .mr(2)
25961 .nr(4)
25962 .kr(1)
25963 .sr(1)
25964 .m(2)
25965 .n(4)
25966 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025968 }
25969
25970 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
25971 GemmMicrokernelTester()
25972 .mr(2)
25973 .nr(4)
25974 .kr(1)
25975 .sr(1)
25976 .m(2)
25977 .n(4)
25978 .k(1)
25979 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025980 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025981 }
25982
25983 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_strided_a) {
25984 GemmMicrokernelTester()
25985 .mr(2)
25986 .nr(4)
25987 .kr(1)
25988 .sr(1)
25989 .m(2)
25990 .n(4)
25991 .k(1)
25992 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025993 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025994 }
25995
25996 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025997 for (uint32_t n = 1; n <= 4; n++) {
25998 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025999 GemmMicrokernelTester()
26000 .mr(2)
26001 .nr(4)
26002 .kr(1)
26003 .sr(1)
26004 .m(m)
26005 .n(n)
26006 .k(1)
26007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026009 }
26010 }
26011 }
26012
26013 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
26014 for (uint32_t m = 1; m <= 2; m++) {
26015 GemmMicrokernelTester()
26016 .mr(2)
26017 .nr(4)
26018 .kr(1)
26019 .sr(1)
26020 .m(m)
26021 .n(4)
26022 .k(1)
26023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026024 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026025 }
26026 }
26027
26028 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
26029 for (uint32_t n = 1; n <= 4; n++) {
26030 GemmMicrokernelTester()
26031 .mr(2)
26032 .nr(4)
26033 .kr(1)
26034 .sr(1)
26035 .m(2)
26036 .n(n)
26037 .k(1)
26038 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026040 }
26041 }
26042
26043 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
26044 for (size_t k = 2; k < 10; k++) {
26045 GemmMicrokernelTester()
26046 .mr(2)
26047 .nr(4)
26048 .kr(1)
26049 .sr(1)
26050 .m(2)
26051 .n(4)
26052 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026053 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026054 }
26055 }
26056
26057 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_strided_a) {
26058 for (size_t k = 2; k < 10; k++) {
26059 GemmMicrokernelTester()
26060 .mr(2)
26061 .nr(4)
26062 .kr(1)
26063 .sr(1)
26064 .m(2)
26065 .n(4)
26066 .k(k)
26067 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026068 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026069 }
26070 }
26071
26072 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
26073 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026074 for (uint32_t n = 1; n <= 4; n++) {
26075 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026076 GemmMicrokernelTester()
26077 .mr(2)
26078 .nr(4)
26079 .kr(1)
26080 .sr(1)
26081 .m(m)
26082 .n(n)
26083 .k(k)
26084 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026085 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026086 }
26087 }
26088 }
26089 }
26090
26091 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
26092 for (uint32_t n = 5; n < 8; n++) {
26093 for (size_t k = 1; k <= 5; k += 2) {
26094 GemmMicrokernelTester()
26095 .mr(2)
26096 .nr(4)
26097 .kr(1)
26098 .sr(1)
26099 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026100 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026101 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026102 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026103 }
26104 }
26105 }
26106
26107 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
26108 for (uint32_t n = 5; n < 8; n++) {
26109 for (size_t k = 1; k <= 5; k += 2) {
26110 GemmMicrokernelTester()
26111 .mr(2)
26112 .nr(4)
26113 .kr(1)
26114 .sr(1)
26115 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026116 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026117 .k(k)
26118 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026119 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026120 }
26121 }
26122 }
26123
26124 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_a) {
26125 for (uint32_t n = 5; n < 8; n++) {
26126 for (size_t k = 1; k <= 5; k += 2) {
26127 GemmMicrokernelTester()
26128 .mr(2)
26129 .nr(4)
26130 .kr(1)
26131 .sr(1)
26132 .m(2)
26133 .n(n)
26134 .k(k)
26135 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026136 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026137 }
26138 }
26139 }
26140
26141 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
26142 for (uint32_t n = 5; n < 8; n++) {
26143 for (size_t k = 1; k <= 5; k += 2) {
26144 for (uint32_t m = 1; m <= 2; m++) {
26145 GemmMicrokernelTester()
26146 .mr(2)
26147 .nr(4)
26148 .kr(1)
26149 .sr(1)
26150 .m(m)
26151 .n(n)
26152 .k(k)
26153 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026154 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026155 }
26156 }
26157 }
26158 }
26159
26160 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
26161 for (uint32_t n = 8; n <= 12; n += 4) {
26162 for (size_t k = 1; k <= 5; k += 2) {
26163 GemmMicrokernelTester()
26164 .mr(2)
26165 .nr(4)
26166 .kr(1)
26167 .sr(1)
26168 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026169 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026170 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026171 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026172 }
26173 }
26174 }
26175
26176 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
26177 for (uint32_t n = 8; n <= 12; n += 4) {
26178 for (size_t k = 1; k <= 5; k += 2) {
26179 GemmMicrokernelTester()
26180 .mr(2)
26181 .nr(4)
26182 .kr(1)
26183 .sr(1)
26184 .m(2)
26185 .n(n)
26186 .k(k)
26187 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026188 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026189 }
26190 }
26191 }
26192
26193 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_a) {
26194 for (uint32_t n = 8; n <= 12; n += 4) {
26195 for (size_t k = 1; k <= 5; k += 2) {
26196 GemmMicrokernelTester()
26197 .mr(2)
26198 .nr(4)
26199 .kr(1)
26200 .sr(1)
26201 .m(2)
26202 .n(n)
26203 .k(k)
26204 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026205 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026206 }
26207 }
26208 }
26209
26210 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
26211 for (uint32_t n = 8; n <= 12; n += 4) {
26212 for (size_t k = 1; k <= 5; k += 2) {
26213 for (uint32_t m = 1; m <= 2; m++) {
26214 GemmMicrokernelTester()
26215 .mr(2)
26216 .nr(4)
26217 .kr(1)
26218 .sr(1)
26219 .m(m)
26220 .n(n)
26221 .k(k)
26222 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026224 }
26225 }
26226 }
26227 }
26228
26229 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
26230 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026231 for (uint32_t n = 1; n <= 4; n++) {
26232 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026233 GemmMicrokernelTester()
26234 .mr(2)
26235 .nr(4)
26236 .kr(1)
26237 .sr(1)
26238 .m(m)
26239 .n(n)
26240 .k(k)
26241 .cm_stride(7)
26242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026243 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026244 }
26245 }
26246 }
26247 }
26248
26249 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
26250 GemmMicrokernelTester()
26251 .mr(2)
26252 .nr(4)
26253 .kr(1)
26254 .sr(1)
26255 .m(2)
26256 .n(4)
26257 .k(1)
26258 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026259 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026260 }
26261
26262 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
26263 GemmMicrokernelTester()
26264 .mr(2)
26265 .nr(4)
26266 .kr(1)
26267 .sr(1)
26268 .m(2)
26269 .n(4)
26270 .k(1)
26271 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026272 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026273 }
26274
26275 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
26276 GemmMicrokernelTester()
26277 .mr(2)
26278 .nr(4)
26279 .kr(1)
26280 .sr(1)
26281 .m(2)
26282 .n(4)
26283 .k(1)
26284 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026286 }
26287
26288 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_a_zero_point) {
26289 for (size_t k = 1; k <= 5; k += 2) {
26290 GemmMicrokernelTester()
26291 .mr(2)
26292 .nr(4)
26293 .kr(1)
26294 .sr(1)
26295 .m(2)
26296 .n(4)
26297 .k(k)
26298 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026299 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026300 }
26301 }
26302
26303 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_b_zero_point) {
26304 for (size_t k = 1; k <= 5; k += 2) {
26305 GemmMicrokernelTester()
26306 .mr(2)
26307 .nr(4)
26308 .kr(1)
26309 .sr(1)
26310 .m(2)
26311 .n(4)
26312 .k(k)
26313 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026314 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026315 }
26316 }
26317
26318 TEST(QU8_GEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_zero_point) {
26319 for (size_t k = 1; k <= 5; k += 2) {
26320 GemmMicrokernelTester()
26321 .mr(2)
26322 .nr(4)
26323 .kr(1)
26324 .sr(1)
26325 .m(2)
26326 .n(4)
26327 .k(k)
26328 .a_zero_point(0)
26329 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026330 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026331 }
26332 }
26333#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26334
26335
26336#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26337 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
26338 GemmMicrokernelTester()
26339 .mr(4)
26340 .nr(4)
26341 .kr(1)
26342 .sr(1)
26343 .m(4)
26344 .n(4)
26345 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026346 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026347 }
26348
26349 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
26350 GemmMicrokernelTester()
26351 .mr(4)
26352 .nr(4)
26353 .kr(1)
26354 .sr(1)
26355 .m(4)
26356 .n(4)
26357 .k(1)
26358 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026359 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026360 }
26361
26362 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_strided_a) {
26363 GemmMicrokernelTester()
26364 .mr(4)
26365 .nr(4)
26366 .kr(1)
26367 .sr(1)
26368 .m(4)
26369 .n(4)
26370 .k(1)
26371 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026372 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026373 }
26374
26375 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026376 for (uint32_t n = 1; n <= 4; n++) {
26377 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026378 GemmMicrokernelTester()
26379 .mr(4)
26380 .nr(4)
26381 .kr(1)
26382 .sr(1)
26383 .m(m)
26384 .n(n)
26385 .k(1)
26386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026387 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026388 }
26389 }
26390 }
26391
26392 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
26393 for (uint32_t m = 1; m <= 4; m++) {
26394 GemmMicrokernelTester()
26395 .mr(4)
26396 .nr(4)
26397 .kr(1)
26398 .sr(1)
26399 .m(m)
26400 .n(4)
26401 .k(1)
26402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026403 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026404 }
26405 }
26406
26407 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
26408 for (uint32_t n = 1; n <= 4; n++) {
26409 GemmMicrokernelTester()
26410 .mr(4)
26411 .nr(4)
26412 .kr(1)
26413 .sr(1)
26414 .m(4)
26415 .n(n)
26416 .k(1)
26417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026418 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026419 }
26420 }
26421
26422 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
26423 for (size_t k = 2; k < 10; k++) {
26424 GemmMicrokernelTester()
26425 .mr(4)
26426 .nr(4)
26427 .kr(1)
26428 .sr(1)
26429 .m(4)
26430 .n(4)
26431 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026433 }
26434 }
26435
26436 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_strided_a) {
26437 for (size_t k = 2; k < 10; k++) {
26438 GemmMicrokernelTester()
26439 .mr(4)
26440 .nr(4)
26441 .kr(1)
26442 .sr(1)
26443 .m(4)
26444 .n(4)
26445 .k(k)
26446 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026447 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026448 }
26449 }
26450
26451 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
26452 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026453 for (uint32_t n = 1; n <= 4; n++) {
26454 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026455 GemmMicrokernelTester()
26456 .mr(4)
26457 .nr(4)
26458 .kr(1)
26459 .sr(1)
26460 .m(m)
26461 .n(n)
26462 .k(k)
26463 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026464 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026465 }
26466 }
26467 }
26468 }
26469
26470 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
26471 for (uint32_t n = 5; n < 8; n++) {
26472 for (size_t k = 1; k <= 5; k += 2) {
26473 GemmMicrokernelTester()
26474 .mr(4)
26475 .nr(4)
26476 .kr(1)
26477 .sr(1)
26478 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026479 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026480 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026481 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026482 }
26483 }
26484 }
26485
26486 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
26487 for (uint32_t n = 5; n < 8; n++) {
26488 for (size_t k = 1; k <= 5; k += 2) {
26489 GemmMicrokernelTester()
26490 .mr(4)
26491 .nr(4)
26492 .kr(1)
26493 .sr(1)
26494 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026495 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026496 .k(k)
26497 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026498 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026499 }
26500 }
26501 }
26502
26503 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_a) {
26504 for (uint32_t n = 5; n < 8; n++) {
26505 for (size_t k = 1; k <= 5; k += 2) {
26506 GemmMicrokernelTester()
26507 .mr(4)
26508 .nr(4)
26509 .kr(1)
26510 .sr(1)
26511 .m(4)
26512 .n(n)
26513 .k(k)
26514 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026515 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026516 }
26517 }
26518 }
26519
26520 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
26521 for (uint32_t n = 5; n < 8; n++) {
26522 for (size_t k = 1; k <= 5; k += 2) {
26523 for (uint32_t m = 1; m <= 4; m++) {
26524 GemmMicrokernelTester()
26525 .mr(4)
26526 .nr(4)
26527 .kr(1)
26528 .sr(1)
26529 .m(m)
26530 .n(n)
26531 .k(k)
26532 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026534 }
26535 }
26536 }
26537 }
26538
26539 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
26540 for (uint32_t n = 8; n <= 12; n += 4) {
26541 for (size_t k = 1; k <= 5; k += 2) {
26542 GemmMicrokernelTester()
26543 .mr(4)
26544 .nr(4)
26545 .kr(1)
26546 .sr(1)
26547 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026548 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026549 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026550 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026551 }
26552 }
26553 }
26554
26555 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
26556 for (uint32_t n = 8; n <= 12; n += 4) {
26557 for (size_t k = 1; k <= 5; k += 2) {
26558 GemmMicrokernelTester()
26559 .mr(4)
26560 .nr(4)
26561 .kr(1)
26562 .sr(1)
26563 .m(4)
26564 .n(n)
26565 .k(k)
26566 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026567 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026568 }
26569 }
26570 }
26571
26572 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_a) {
26573 for (uint32_t n = 8; n <= 12; n += 4) {
26574 for (size_t k = 1; k <= 5; k += 2) {
26575 GemmMicrokernelTester()
26576 .mr(4)
26577 .nr(4)
26578 .kr(1)
26579 .sr(1)
26580 .m(4)
26581 .n(n)
26582 .k(k)
26583 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026584 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026585 }
26586 }
26587 }
26588
26589 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
26590 for (uint32_t n = 8; n <= 12; n += 4) {
26591 for (size_t k = 1; k <= 5; k += 2) {
26592 for (uint32_t m = 1; m <= 4; m++) {
26593 GemmMicrokernelTester()
26594 .mr(4)
26595 .nr(4)
26596 .kr(1)
26597 .sr(1)
26598 .m(m)
26599 .n(n)
26600 .k(k)
26601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026603 }
26604 }
26605 }
26606 }
26607
26608 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
26609 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026610 for (uint32_t n = 1; n <= 4; n++) {
26611 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026612 GemmMicrokernelTester()
26613 .mr(4)
26614 .nr(4)
26615 .kr(1)
26616 .sr(1)
26617 .m(m)
26618 .n(n)
26619 .k(k)
26620 .cm_stride(7)
26621 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026622 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026623 }
26624 }
26625 }
26626 }
26627
26628 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
26629 GemmMicrokernelTester()
26630 .mr(4)
26631 .nr(4)
26632 .kr(1)
26633 .sr(1)
26634 .m(4)
26635 .n(4)
26636 .k(1)
26637 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026638 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026639 }
26640
26641 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
26642 GemmMicrokernelTester()
26643 .mr(4)
26644 .nr(4)
26645 .kr(1)
26646 .sr(1)
26647 .m(4)
26648 .n(4)
26649 .k(1)
26650 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026651 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026652 }
26653
26654 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
26655 GemmMicrokernelTester()
26656 .mr(4)
26657 .nr(4)
26658 .kr(1)
26659 .sr(1)
26660 .m(4)
26661 .n(4)
26662 .k(1)
26663 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026664 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026665 }
26666
26667 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_a_zero_point) {
26668 for (size_t k = 1; k <= 5; k += 2) {
26669 GemmMicrokernelTester()
26670 .mr(4)
26671 .nr(4)
26672 .kr(1)
26673 .sr(1)
26674 .m(4)
26675 .n(4)
26676 .k(k)
26677 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026678 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026679 }
26680 }
26681
26682 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_b_zero_point) {
26683 for (size_t k = 1; k <= 5; k += 2) {
26684 GemmMicrokernelTester()
26685 .mr(4)
26686 .nr(4)
26687 .kr(1)
26688 .sr(1)
26689 .m(4)
26690 .n(4)
26691 .k(k)
26692 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026693 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026694 }
26695 }
26696
26697 TEST(QU8_GEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_zero_point) {
26698 for (size_t k = 1; k <= 5; k += 2) {
26699 GemmMicrokernelTester()
26700 .mr(4)
26701 .nr(4)
26702 .kr(1)
26703 .sr(1)
26704 .m(4)
26705 .n(4)
26706 .k(k)
26707 .a_zero_point(0)
26708 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026709 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026710 }
26711 }
26712#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26713
26714
26715TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
26716 GemmMicrokernelTester()
26717 .mr(1)
26718 .nr(2)
26719 .kr(1)
26720 .sr(1)
26721 .m(1)
26722 .n(2)
26723 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026724 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026725}
26726
26727TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
26728 GemmMicrokernelTester()
26729 .mr(1)
26730 .nr(2)
26731 .kr(1)
26732 .sr(1)
26733 .m(1)
26734 .n(2)
26735 .k(1)
26736 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080026737 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026738}
26739
26740TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
26741 GemmMicrokernelTester()
26742 .mr(1)
26743 .nr(2)
26744 .kr(1)
26745 .sr(1)
26746 .m(1)
26747 .n(2)
26748 .k(1)
26749 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026750 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026751}
26752
26753TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026754 for (uint32_t n = 1; n <= 2; n++) {
26755 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026756 GemmMicrokernelTester()
26757 .mr(1)
26758 .nr(2)
26759 .kr(1)
26760 .sr(1)
26761 .m(m)
26762 .n(n)
26763 .k(1)
26764 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026765 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026766 }
26767 }
26768}
26769
26770TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
26771 for (uint32_t m = 1; m <= 1; m++) {
26772 GemmMicrokernelTester()
26773 .mr(1)
26774 .nr(2)
26775 .kr(1)
26776 .sr(1)
26777 .m(m)
26778 .n(2)
26779 .k(1)
26780 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026782 }
26783}
26784
26785TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
26786 for (uint32_t n = 1; n <= 2; n++) {
26787 GemmMicrokernelTester()
26788 .mr(1)
26789 .nr(2)
26790 .kr(1)
26791 .sr(1)
26792 .m(1)
26793 .n(n)
26794 .k(1)
26795 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026796 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026797 }
26798}
26799
26800TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
26801 for (size_t k = 2; k < 10; k++) {
26802 GemmMicrokernelTester()
26803 .mr(1)
26804 .nr(2)
26805 .kr(1)
26806 .sr(1)
26807 .m(1)
26808 .n(2)
26809 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026810 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026811 }
26812}
26813
26814TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
26815 for (size_t k = 2; k < 10; k++) {
26816 GemmMicrokernelTester()
26817 .mr(1)
26818 .nr(2)
26819 .kr(1)
26820 .sr(1)
26821 .m(1)
26822 .n(2)
26823 .k(k)
26824 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026825 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026826 }
26827}
26828
26829TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
26830 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026831 for (uint32_t n = 1; n <= 2; n++) {
26832 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026833 GemmMicrokernelTester()
26834 .mr(1)
26835 .nr(2)
26836 .kr(1)
26837 .sr(1)
26838 .m(m)
26839 .n(n)
26840 .k(k)
26841 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026842 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026843 }
26844 }
26845 }
26846}
26847
26848TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
26849 for (uint32_t n = 3; n < 4; n++) {
26850 for (size_t k = 1; k <= 5; k += 2) {
26851 GemmMicrokernelTester()
26852 .mr(1)
26853 .nr(2)
26854 .kr(1)
26855 .sr(1)
26856 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026857 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026858 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026859 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026860 }
26861 }
26862}
26863
26864TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
26865 for (uint32_t n = 3; n < 4; n++) {
26866 for (size_t k = 1; k <= 5; k += 2) {
26867 GemmMicrokernelTester()
26868 .mr(1)
26869 .nr(2)
26870 .kr(1)
26871 .sr(1)
26872 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026873 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026874 .k(k)
26875 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080026876 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026877 }
26878 }
26879}
26880
26881TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
26882 for (uint32_t n = 3; n < 4; n++) {
26883 for (size_t k = 1; k <= 5; k += 2) {
26884 GemmMicrokernelTester()
26885 .mr(1)
26886 .nr(2)
26887 .kr(1)
26888 .sr(1)
26889 .m(1)
26890 .n(n)
26891 .k(k)
26892 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026893 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026894 }
26895 }
26896}
26897
26898TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
26899 for (uint32_t n = 3; n < 4; n++) {
26900 for (size_t k = 1; k <= 5; k += 2) {
26901 for (uint32_t m = 1; m <= 1; m++) {
26902 GemmMicrokernelTester()
26903 .mr(1)
26904 .nr(2)
26905 .kr(1)
26906 .sr(1)
26907 .m(m)
26908 .n(n)
26909 .k(k)
26910 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026911 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026912 }
26913 }
26914 }
26915}
26916
26917TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
26918 for (uint32_t n = 4; n <= 6; n += 2) {
26919 for (size_t k = 1; k <= 5; k += 2) {
26920 GemmMicrokernelTester()
26921 .mr(1)
26922 .nr(2)
26923 .kr(1)
26924 .sr(1)
26925 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026926 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026927 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026928 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026929 }
26930 }
26931}
26932
26933TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
26934 for (uint32_t n = 4; n <= 6; n += 2) {
26935 for (size_t k = 1; k <= 5; k += 2) {
26936 GemmMicrokernelTester()
26937 .mr(1)
26938 .nr(2)
26939 .kr(1)
26940 .sr(1)
26941 .m(1)
26942 .n(n)
26943 .k(k)
26944 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080026945 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026946 }
26947 }
26948}
26949
26950TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_a) {
26951 for (uint32_t n = 4; n <= 6; n += 2) {
26952 for (size_t k = 1; k <= 5; k += 2) {
26953 GemmMicrokernelTester()
26954 .mr(1)
26955 .nr(2)
26956 .kr(1)
26957 .sr(1)
26958 .m(1)
26959 .n(n)
26960 .k(k)
26961 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026962 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026963 }
26964 }
26965}
26966
26967TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
26968 for (uint32_t n = 4; n <= 6; n += 2) {
26969 for (size_t k = 1; k <= 5; k += 2) {
26970 for (uint32_t m = 1; m <= 1; m++) {
26971 GemmMicrokernelTester()
26972 .mr(1)
26973 .nr(2)
26974 .kr(1)
26975 .sr(1)
26976 .m(m)
26977 .n(n)
26978 .k(k)
26979 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026980 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026981 }
26982 }
26983 }
26984}
26985
26986TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
26987 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026988 for (uint32_t n = 1; n <= 2; n++) {
26989 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026990 GemmMicrokernelTester()
26991 .mr(1)
26992 .nr(2)
26993 .kr(1)
26994 .sr(1)
26995 .m(m)
26996 .n(n)
26997 .k(k)
26998 .cm_stride(5)
26999 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027000 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027001 }
27002 }
27003 }
27004}
27005
27006TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
27007 GemmMicrokernelTester()
27008 .mr(1)
27009 .nr(2)
27010 .kr(1)
27011 .sr(1)
27012 .m(1)
27013 .n(2)
27014 .k(1)
27015 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027016 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027017}
27018
27019TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
27020 GemmMicrokernelTester()
27021 .mr(1)
27022 .nr(2)
27023 .kr(1)
27024 .sr(1)
27025 .m(1)
27026 .n(2)
27027 .k(1)
27028 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027029 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027030}
27031
27032TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
27033 GemmMicrokernelTester()
27034 .mr(1)
27035 .nr(2)
27036 .kr(1)
27037 .sr(1)
27038 .m(1)
27039 .n(2)
27040 .k(1)
27041 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080027042 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027043}
27044
27045TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_a_zero_point) {
27046 for (size_t k = 1; k <= 5; k += 2) {
27047 GemmMicrokernelTester()
27048 .mr(1)
27049 .nr(2)
27050 .kr(1)
27051 .sr(1)
27052 .m(1)
27053 .n(2)
27054 .k(k)
27055 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027056 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027057 }
27058}
27059
27060TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_b_zero_point) {
27061 for (size_t k = 1; k <= 5; k += 2) {
27062 GemmMicrokernelTester()
27063 .mr(1)
27064 .nr(2)
27065 .kr(1)
27066 .sr(1)
27067 .m(1)
27068 .n(2)
27069 .k(k)
27070 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027071 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027072 }
27073}
27074
27075TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_zero_point) {
27076 for (size_t k = 1; k <= 5; k += 2) {
27077 GemmMicrokernelTester()
27078 .mr(1)
27079 .nr(2)
27080 .kr(1)
27081 .sr(1)
27082 .m(1)
27083 .n(2)
27084 .k(k)
27085 .a_zero_point(0)
27086 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027087 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027088 }
27089}
27090
27091TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
27092 GemmMicrokernelTester()
27093 .mr(2)
27094 .nr(2)
27095 .kr(1)
27096 .sr(1)
27097 .m(2)
27098 .n(2)
27099 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027100 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027101}
27102
27103TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
27104 GemmMicrokernelTester()
27105 .mr(2)
27106 .nr(2)
27107 .kr(1)
27108 .sr(1)
27109 .m(2)
27110 .n(2)
27111 .k(1)
27112 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080027113 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027114}
27115
27116TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
27117 GemmMicrokernelTester()
27118 .mr(2)
27119 .nr(2)
27120 .kr(1)
27121 .sr(1)
27122 .m(2)
27123 .n(2)
27124 .k(1)
27125 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027126 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027127}
27128
27129TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027130 for (uint32_t n = 1; n <= 2; n++) {
27131 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027132 GemmMicrokernelTester()
27133 .mr(2)
27134 .nr(2)
27135 .kr(1)
27136 .sr(1)
27137 .m(m)
27138 .n(n)
27139 .k(1)
27140 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027141 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027142 }
27143 }
27144}
27145
27146TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
27147 for (uint32_t m = 1; m <= 2; m++) {
27148 GemmMicrokernelTester()
27149 .mr(2)
27150 .nr(2)
27151 .kr(1)
27152 .sr(1)
27153 .m(m)
27154 .n(2)
27155 .k(1)
27156 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027157 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027158 }
27159}
27160
27161TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
27162 for (uint32_t n = 1; n <= 2; n++) {
27163 GemmMicrokernelTester()
27164 .mr(2)
27165 .nr(2)
27166 .kr(1)
27167 .sr(1)
27168 .m(2)
27169 .n(n)
27170 .k(1)
27171 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027172 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027173 }
27174}
27175
27176TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
27177 for (size_t k = 2; k < 10; k++) {
27178 GemmMicrokernelTester()
27179 .mr(2)
27180 .nr(2)
27181 .kr(1)
27182 .sr(1)
27183 .m(2)
27184 .n(2)
27185 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027186 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027187 }
27188}
27189
27190TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
27191 for (size_t k = 2; k < 10; k++) {
27192 GemmMicrokernelTester()
27193 .mr(2)
27194 .nr(2)
27195 .kr(1)
27196 .sr(1)
27197 .m(2)
27198 .n(2)
27199 .k(k)
27200 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027201 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027202 }
27203}
27204
27205TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
27206 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027207 for (uint32_t n = 1; n <= 2; n++) {
27208 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027209 GemmMicrokernelTester()
27210 .mr(2)
27211 .nr(2)
27212 .kr(1)
27213 .sr(1)
27214 .m(m)
27215 .n(n)
27216 .k(k)
27217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027218 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027219 }
27220 }
27221 }
27222}
27223
27224TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
27225 for (uint32_t n = 3; n < 4; n++) {
27226 for (size_t k = 1; k <= 5; k += 2) {
27227 GemmMicrokernelTester()
27228 .mr(2)
27229 .nr(2)
27230 .kr(1)
27231 .sr(1)
27232 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027233 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027234 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027235 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027236 }
27237 }
27238}
27239
27240TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
27241 for (uint32_t n = 3; n < 4; n++) {
27242 for (size_t k = 1; k <= 5; k += 2) {
27243 GemmMicrokernelTester()
27244 .mr(2)
27245 .nr(2)
27246 .kr(1)
27247 .sr(1)
27248 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027249 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027250 .k(k)
27251 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080027252 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027253 }
27254 }
27255}
27256
27257TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
27258 for (uint32_t n = 3; n < 4; n++) {
27259 for (size_t k = 1; k <= 5; k += 2) {
27260 GemmMicrokernelTester()
27261 .mr(2)
27262 .nr(2)
27263 .kr(1)
27264 .sr(1)
27265 .m(2)
27266 .n(n)
27267 .k(k)
27268 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027269 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027270 }
27271 }
27272}
27273
27274TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
27275 for (uint32_t n = 3; n < 4; n++) {
27276 for (size_t k = 1; k <= 5; k += 2) {
27277 for (uint32_t m = 1; m <= 2; m++) {
27278 GemmMicrokernelTester()
27279 .mr(2)
27280 .nr(2)
27281 .kr(1)
27282 .sr(1)
27283 .m(m)
27284 .n(n)
27285 .k(k)
27286 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027287 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027288 }
27289 }
27290 }
27291}
27292
27293TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
27294 for (uint32_t n = 4; n <= 6; n += 2) {
27295 for (size_t k = 1; k <= 5; k += 2) {
27296 GemmMicrokernelTester()
27297 .mr(2)
27298 .nr(2)
27299 .kr(1)
27300 .sr(1)
27301 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027302 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027303 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027304 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027305 }
27306 }
27307}
27308
27309TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
27310 for (uint32_t n = 4; n <= 6; n += 2) {
27311 for (size_t k = 1; k <= 5; k += 2) {
27312 GemmMicrokernelTester()
27313 .mr(2)
27314 .nr(2)
27315 .kr(1)
27316 .sr(1)
27317 .m(2)
27318 .n(n)
27319 .k(k)
27320 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080027321 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027322 }
27323 }
27324}
27325
27326TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_a) {
27327 for (uint32_t n = 4; n <= 6; n += 2) {
27328 for (size_t k = 1; k <= 5; k += 2) {
27329 GemmMicrokernelTester()
27330 .mr(2)
27331 .nr(2)
27332 .kr(1)
27333 .sr(1)
27334 .m(2)
27335 .n(n)
27336 .k(k)
27337 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027338 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027339 }
27340 }
27341}
27342
27343TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
27344 for (uint32_t n = 4; n <= 6; n += 2) {
27345 for (size_t k = 1; k <= 5; k += 2) {
27346 for (uint32_t m = 1; m <= 2; m++) {
27347 GemmMicrokernelTester()
27348 .mr(2)
27349 .nr(2)
27350 .kr(1)
27351 .sr(1)
27352 .m(m)
27353 .n(n)
27354 .k(k)
27355 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027357 }
27358 }
27359 }
27360}
27361
27362TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
27363 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027364 for (uint32_t n = 1; n <= 2; n++) {
27365 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027366 GemmMicrokernelTester()
27367 .mr(2)
27368 .nr(2)
27369 .kr(1)
27370 .sr(1)
27371 .m(m)
27372 .n(n)
27373 .k(k)
27374 .cm_stride(5)
27375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027376 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027377 }
27378 }
27379 }
27380}
27381
27382TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
27383 GemmMicrokernelTester()
27384 .mr(2)
27385 .nr(2)
27386 .kr(1)
27387 .sr(1)
27388 .m(2)
27389 .n(2)
27390 .k(1)
27391 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027392 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027393}
27394
27395TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
27396 GemmMicrokernelTester()
27397 .mr(2)
27398 .nr(2)
27399 .kr(1)
27400 .sr(1)
27401 .m(2)
27402 .n(2)
27403 .k(1)
27404 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027405 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027406}
27407
27408TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
27409 GemmMicrokernelTester()
27410 .mr(2)
27411 .nr(2)
27412 .kr(1)
27413 .sr(1)
27414 .m(2)
27415 .n(2)
27416 .k(1)
27417 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080027418 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027419}
27420
27421TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_a_zero_point) {
27422 for (size_t k = 1; k <= 5; k += 2) {
27423 GemmMicrokernelTester()
27424 .mr(2)
27425 .nr(2)
27426 .kr(1)
27427 .sr(1)
27428 .m(2)
27429 .n(2)
27430 .k(k)
27431 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027433 }
27434}
27435
27436TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_b_zero_point) {
27437 for (size_t k = 1; k <= 5; k += 2) {
27438 GemmMicrokernelTester()
27439 .mr(2)
27440 .nr(2)
27441 .kr(1)
27442 .sr(1)
27443 .m(2)
27444 .n(2)
27445 .k(k)
27446 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027447 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027448 }
27449}
27450
27451TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_zero_point) {
27452 for (size_t k = 1; k <= 5; k += 2) {
27453 GemmMicrokernelTester()
27454 .mr(2)
27455 .nr(2)
27456 .kr(1)
27457 .sr(1)
27458 .m(2)
27459 .n(2)
27460 .k(k)
27461 .a_zero_point(0)
27462 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027463 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027464 }
27465}
27466
27467TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
27468 GemmMicrokernelTester()
27469 .mr(1)
27470 .nr(4)
27471 .kr(1)
27472 .sr(1)
27473 .m(1)
27474 .n(4)
27475 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027476 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027477}
27478
27479TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
27480 GemmMicrokernelTester()
27481 .mr(1)
27482 .nr(4)
27483 .kr(1)
27484 .sr(1)
27485 .m(1)
27486 .n(4)
27487 .k(1)
27488 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027489 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027490}
27491
27492TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
27493 GemmMicrokernelTester()
27494 .mr(1)
27495 .nr(4)
27496 .kr(1)
27497 .sr(1)
27498 .m(1)
27499 .n(4)
27500 .k(1)
27501 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027502 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027503}
27504
27505TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027506 for (uint32_t n = 1; n <= 4; n++) {
27507 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027508 GemmMicrokernelTester()
27509 .mr(1)
27510 .nr(4)
27511 .kr(1)
27512 .sr(1)
27513 .m(m)
27514 .n(n)
27515 .k(1)
27516 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027517 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027518 }
27519 }
27520}
27521
27522TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
27523 for (uint32_t m = 1; m <= 1; m++) {
27524 GemmMicrokernelTester()
27525 .mr(1)
27526 .nr(4)
27527 .kr(1)
27528 .sr(1)
27529 .m(m)
27530 .n(4)
27531 .k(1)
27532 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027534 }
27535}
27536
27537TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
27538 for (uint32_t n = 1; n <= 4; n++) {
27539 GemmMicrokernelTester()
27540 .mr(1)
27541 .nr(4)
27542 .kr(1)
27543 .sr(1)
27544 .m(1)
27545 .n(n)
27546 .k(1)
27547 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027548 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027549 }
27550}
27551
27552TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
27553 for (size_t k = 2; k < 10; k++) {
27554 GemmMicrokernelTester()
27555 .mr(1)
27556 .nr(4)
27557 .kr(1)
27558 .sr(1)
27559 .m(1)
27560 .n(4)
27561 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027562 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027563 }
27564}
27565
27566TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
27567 for (size_t k = 2; k < 10; k++) {
27568 GemmMicrokernelTester()
27569 .mr(1)
27570 .nr(4)
27571 .kr(1)
27572 .sr(1)
27573 .m(1)
27574 .n(4)
27575 .k(k)
27576 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027578 }
27579}
27580
27581TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
27582 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027583 for (uint32_t n = 1; n <= 4; n++) {
27584 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027585 GemmMicrokernelTester()
27586 .mr(1)
27587 .nr(4)
27588 .kr(1)
27589 .sr(1)
27590 .m(m)
27591 .n(n)
27592 .k(k)
27593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027594 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027595 }
27596 }
27597 }
27598}
27599
27600TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
27601 for (uint32_t n = 5; n < 8; n++) {
27602 for (size_t k = 1; k <= 5; k += 2) {
27603 GemmMicrokernelTester()
27604 .mr(1)
27605 .nr(4)
27606 .kr(1)
27607 .sr(1)
27608 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027609 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027610 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027611 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027612 }
27613 }
27614}
27615
27616TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
27617 for (uint32_t n = 5; n < 8; n++) {
27618 for (size_t k = 1; k <= 5; k += 2) {
27619 GemmMicrokernelTester()
27620 .mr(1)
27621 .nr(4)
27622 .kr(1)
27623 .sr(1)
27624 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027625 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027626 .k(k)
27627 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027628 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027629 }
27630 }
27631}
27632
27633TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
27634 for (uint32_t n = 5; n < 8; n++) {
27635 for (size_t k = 1; k <= 5; k += 2) {
27636 GemmMicrokernelTester()
27637 .mr(1)
27638 .nr(4)
27639 .kr(1)
27640 .sr(1)
27641 .m(1)
27642 .n(n)
27643 .k(k)
27644 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027645 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027646 }
27647 }
27648}
27649
27650TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
27651 for (uint32_t n = 5; n < 8; n++) {
27652 for (size_t k = 1; k <= 5; k += 2) {
27653 for (uint32_t m = 1; m <= 1; m++) {
27654 GemmMicrokernelTester()
27655 .mr(1)
27656 .nr(4)
27657 .kr(1)
27658 .sr(1)
27659 .m(m)
27660 .n(n)
27661 .k(k)
27662 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027663 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027664 }
27665 }
27666 }
27667}
27668
27669TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
27670 for (uint32_t n = 8; n <= 12; n += 4) {
27671 for (size_t k = 1; k <= 5; k += 2) {
27672 GemmMicrokernelTester()
27673 .mr(1)
27674 .nr(4)
27675 .kr(1)
27676 .sr(1)
27677 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027678 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027679 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027680 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027681 }
27682 }
27683}
27684
27685TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
27686 for (uint32_t n = 8; n <= 12; n += 4) {
27687 for (size_t k = 1; k <= 5; k += 2) {
27688 GemmMicrokernelTester()
27689 .mr(1)
27690 .nr(4)
27691 .kr(1)
27692 .sr(1)
27693 .m(1)
27694 .n(n)
27695 .k(k)
27696 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027697 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027698 }
27699 }
27700}
27701
27702TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_a) {
27703 for (uint32_t n = 8; n <= 12; n += 4) {
27704 for (size_t k = 1; k <= 5; k += 2) {
27705 GemmMicrokernelTester()
27706 .mr(1)
27707 .nr(4)
27708 .kr(1)
27709 .sr(1)
27710 .m(1)
27711 .n(n)
27712 .k(k)
27713 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027714 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027715 }
27716 }
27717}
27718
27719TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
27720 for (uint32_t n = 8; n <= 12; n += 4) {
27721 for (size_t k = 1; k <= 5; k += 2) {
27722 for (uint32_t m = 1; m <= 1; m++) {
27723 GemmMicrokernelTester()
27724 .mr(1)
27725 .nr(4)
27726 .kr(1)
27727 .sr(1)
27728 .m(m)
27729 .n(n)
27730 .k(k)
27731 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027732 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027733 }
27734 }
27735 }
27736}
27737
27738TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
27739 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027740 for (uint32_t n = 1; n <= 4; n++) {
27741 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027742 GemmMicrokernelTester()
27743 .mr(1)
27744 .nr(4)
27745 .kr(1)
27746 .sr(1)
27747 .m(m)
27748 .n(n)
27749 .k(k)
27750 .cm_stride(7)
27751 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027752 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027753 }
27754 }
27755 }
27756}
27757
27758TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
27759 GemmMicrokernelTester()
27760 .mr(1)
27761 .nr(4)
27762 .kr(1)
27763 .sr(1)
27764 .m(1)
27765 .n(4)
27766 .k(1)
27767 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027768 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027769}
27770
27771TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
27772 GemmMicrokernelTester()
27773 .mr(1)
27774 .nr(4)
27775 .kr(1)
27776 .sr(1)
27777 .m(1)
27778 .n(4)
27779 .k(1)
27780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027781 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027782}
27783
27784TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
27785 GemmMicrokernelTester()
27786 .mr(1)
27787 .nr(4)
27788 .kr(1)
27789 .sr(1)
27790 .m(1)
27791 .n(4)
27792 .k(1)
27793 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027794 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027795}
27796
27797TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_a_zero_point) {
27798 for (size_t k = 1; k <= 5; k += 2) {
27799 GemmMicrokernelTester()
27800 .mr(1)
27801 .nr(4)
27802 .kr(1)
27803 .sr(1)
27804 .m(1)
27805 .n(4)
27806 .k(k)
27807 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027808 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027809 }
27810}
27811
27812TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_b_zero_point) {
27813 for (size_t k = 1; k <= 5; k += 2) {
27814 GemmMicrokernelTester()
27815 .mr(1)
27816 .nr(4)
27817 .kr(1)
27818 .sr(1)
27819 .m(1)
27820 .n(4)
27821 .k(k)
27822 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027823 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027824 }
27825}
27826
27827TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_zero_point) {
27828 for (size_t k = 1; k <= 5; k += 2) {
27829 GemmMicrokernelTester()
27830 .mr(1)
27831 .nr(4)
27832 .kr(1)
27833 .sr(1)
27834 .m(1)
27835 .n(4)
27836 .k(k)
27837 .a_zero_point(0)
27838 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027839 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027840 }
27841}
27842
27843TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
27844 GemmMicrokernelTester()
27845 .mr(2)
27846 .nr(4)
27847 .kr(1)
27848 .sr(1)
27849 .m(2)
27850 .n(4)
27851 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027852 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027853}
27854
27855TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
27856 GemmMicrokernelTester()
27857 .mr(2)
27858 .nr(4)
27859 .kr(1)
27860 .sr(1)
27861 .m(2)
27862 .n(4)
27863 .k(1)
27864 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027865 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027866}
27867
27868TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
27869 GemmMicrokernelTester()
27870 .mr(2)
27871 .nr(4)
27872 .kr(1)
27873 .sr(1)
27874 .m(2)
27875 .n(4)
27876 .k(1)
27877 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027878 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027879}
27880
27881TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027882 for (uint32_t n = 1; n <= 4; n++) {
27883 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027884 GemmMicrokernelTester()
27885 .mr(2)
27886 .nr(4)
27887 .kr(1)
27888 .sr(1)
27889 .m(m)
27890 .n(n)
27891 .k(1)
27892 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027893 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027894 }
27895 }
27896}
27897
27898TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
27899 for (uint32_t m = 1; m <= 2; m++) {
27900 GemmMicrokernelTester()
27901 .mr(2)
27902 .nr(4)
27903 .kr(1)
27904 .sr(1)
27905 .m(m)
27906 .n(4)
27907 .k(1)
27908 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027909 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027910 }
27911}
27912
27913TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
27914 for (uint32_t n = 1; n <= 4; n++) {
27915 GemmMicrokernelTester()
27916 .mr(2)
27917 .nr(4)
27918 .kr(1)
27919 .sr(1)
27920 .m(2)
27921 .n(n)
27922 .k(1)
27923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027924 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027925 }
27926}
27927
27928TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
27929 for (size_t k = 2; k < 10; k++) {
27930 GemmMicrokernelTester()
27931 .mr(2)
27932 .nr(4)
27933 .kr(1)
27934 .sr(1)
27935 .m(2)
27936 .n(4)
27937 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027938 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027939 }
27940}
27941
27942TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
27943 for (size_t k = 2; k < 10; k++) {
27944 GemmMicrokernelTester()
27945 .mr(2)
27946 .nr(4)
27947 .kr(1)
27948 .sr(1)
27949 .m(2)
27950 .n(4)
27951 .k(k)
27952 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027953 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027954 }
27955}
27956
27957TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
27958 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027959 for (uint32_t n = 1; n <= 4; n++) {
27960 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027961 GemmMicrokernelTester()
27962 .mr(2)
27963 .nr(4)
27964 .kr(1)
27965 .sr(1)
27966 .m(m)
27967 .n(n)
27968 .k(k)
27969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027970 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027971 }
27972 }
27973 }
27974}
27975
27976TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
27977 for (uint32_t n = 5; n < 8; n++) {
27978 for (size_t k = 1; k <= 5; k += 2) {
27979 GemmMicrokernelTester()
27980 .mr(2)
27981 .nr(4)
27982 .kr(1)
27983 .sr(1)
27984 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027985 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027986 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027987 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027988 }
27989 }
27990}
27991
27992TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
27993 for (uint32_t n = 5; n < 8; n++) {
27994 for (size_t k = 1; k <= 5; k += 2) {
27995 GemmMicrokernelTester()
27996 .mr(2)
27997 .nr(4)
27998 .kr(1)
27999 .sr(1)
28000 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028001 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028002 .k(k)
28003 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028004 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028005 }
28006 }
28007}
28008
28009TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
28010 for (uint32_t n = 5; n < 8; n++) {
28011 for (size_t k = 1; k <= 5; k += 2) {
28012 GemmMicrokernelTester()
28013 .mr(2)
28014 .nr(4)
28015 .kr(1)
28016 .sr(1)
28017 .m(2)
28018 .n(n)
28019 .k(k)
28020 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028021 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028022 }
28023 }
28024}
28025
28026TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
28027 for (uint32_t n = 5; n < 8; n++) {
28028 for (size_t k = 1; k <= 5; k += 2) {
28029 for (uint32_t m = 1; m <= 2; m++) {
28030 GemmMicrokernelTester()
28031 .mr(2)
28032 .nr(4)
28033 .kr(1)
28034 .sr(1)
28035 .m(m)
28036 .n(n)
28037 .k(k)
28038 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028039 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028040 }
28041 }
28042 }
28043}
28044
28045TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
28046 for (uint32_t n = 8; n <= 12; n += 4) {
28047 for (size_t k = 1; k <= 5; k += 2) {
28048 GemmMicrokernelTester()
28049 .mr(2)
28050 .nr(4)
28051 .kr(1)
28052 .sr(1)
28053 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028054 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028055 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028056 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028057 }
28058 }
28059}
28060
28061TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
28062 for (uint32_t n = 8; n <= 12; n += 4) {
28063 for (size_t k = 1; k <= 5; k += 2) {
28064 GemmMicrokernelTester()
28065 .mr(2)
28066 .nr(4)
28067 .kr(1)
28068 .sr(1)
28069 .m(2)
28070 .n(n)
28071 .k(k)
28072 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028073 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028074 }
28075 }
28076}
28077
28078TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_a) {
28079 for (uint32_t n = 8; n <= 12; n += 4) {
28080 for (size_t k = 1; k <= 5; k += 2) {
28081 GemmMicrokernelTester()
28082 .mr(2)
28083 .nr(4)
28084 .kr(1)
28085 .sr(1)
28086 .m(2)
28087 .n(n)
28088 .k(k)
28089 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028090 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028091 }
28092 }
28093}
28094
28095TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
28096 for (uint32_t n = 8; n <= 12; n += 4) {
28097 for (size_t k = 1; k <= 5; k += 2) {
28098 for (uint32_t m = 1; m <= 2; m++) {
28099 GemmMicrokernelTester()
28100 .mr(2)
28101 .nr(4)
28102 .kr(1)
28103 .sr(1)
28104 .m(m)
28105 .n(n)
28106 .k(k)
28107 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028109 }
28110 }
28111 }
28112}
28113
28114TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
28115 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028116 for (uint32_t n = 1; n <= 4; n++) {
28117 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028118 GemmMicrokernelTester()
28119 .mr(2)
28120 .nr(4)
28121 .kr(1)
28122 .sr(1)
28123 .m(m)
28124 .n(n)
28125 .k(k)
28126 .cm_stride(7)
28127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028128 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028129 }
28130 }
28131 }
28132}
28133
28134TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
28135 GemmMicrokernelTester()
28136 .mr(2)
28137 .nr(4)
28138 .kr(1)
28139 .sr(1)
28140 .m(2)
28141 .n(4)
28142 .k(1)
28143 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028144 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028145}
28146
28147TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
28148 GemmMicrokernelTester()
28149 .mr(2)
28150 .nr(4)
28151 .kr(1)
28152 .sr(1)
28153 .m(2)
28154 .n(4)
28155 .k(1)
28156 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028157 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028158}
28159
28160TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
28161 GemmMicrokernelTester()
28162 .mr(2)
28163 .nr(4)
28164 .kr(1)
28165 .sr(1)
28166 .m(2)
28167 .n(4)
28168 .k(1)
28169 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028170 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028171}
28172
28173TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_a_zero_point) {
28174 for (size_t k = 1; k <= 5; k += 2) {
28175 GemmMicrokernelTester()
28176 .mr(2)
28177 .nr(4)
28178 .kr(1)
28179 .sr(1)
28180 .m(2)
28181 .n(4)
28182 .k(k)
28183 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028184 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028185 }
28186}
28187
28188TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_b_zero_point) {
28189 for (size_t k = 1; k <= 5; k += 2) {
28190 GemmMicrokernelTester()
28191 .mr(2)
28192 .nr(4)
28193 .kr(1)
28194 .sr(1)
28195 .m(2)
28196 .n(4)
28197 .k(k)
28198 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028199 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028200 }
28201}
28202
28203TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_zero_point) {
28204 for (size_t k = 1; k <= 5; k += 2) {
28205 GemmMicrokernelTester()
28206 .mr(2)
28207 .nr(4)
28208 .kr(1)
28209 .sr(1)
28210 .m(2)
28211 .n(4)
28212 .k(k)
28213 .a_zero_point(0)
28214 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028215 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028216 }
28217}
28218
28219TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
28220 GemmMicrokernelTester()
28221 .mr(3)
28222 .nr(2)
28223 .kr(1)
28224 .sr(1)
28225 .m(3)
28226 .n(2)
28227 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028228 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028229}
28230
28231TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
28232 GemmMicrokernelTester()
28233 .mr(3)
28234 .nr(2)
28235 .kr(1)
28236 .sr(1)
28237 .m(3)
28238 .n(2)
28239 .k(1)
28240 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028241 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028242}
28243
28244TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
28245 GemmMicrokernelTester()
28246 .mr(3)
28247 .nr(2)
28248 .kr(1)
28249 .sr(1)
28250 .m(3)
28251 .n(2)
28252 .k(1)
28253 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028254 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028255}
28256
28257TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028258 for (uint32_t n = 1; n <= 2; n++) {
28259 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028260 GemmMicrokernelTester()
28261 .mr(3)
28262 .nr(2)
28263 .kr(1)
28264 .sr(1)
28265 .m(m)
28266 .n(n)
28267 .k(1)
28268 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028269 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028270 }
28271 }
28272}
28273
28274TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
28275 for (uint32_t m = 1; m <= 3; m++) {
28276 GemmMicrokernelTester()
28277 .mr(3)
28278 .nr(2)
28279 .kr(1)
28280 .sr(1)
28281 .m(m)
28282 .n(2)
28283 .k(1)
28284 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028286 }
28287}
28288
28289TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
28290 for (uint32_t n = 1; n <= 2; n++) {
28291 GemmMicrokernelTester()
28292 .mr(3)
28293 .nr(2)
28294 .kr(1)
28295 .sr(1)
28296 .m(3)
28297 .n(n)
28298 .k(1)
28299 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028300 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028301 }
28302}
28303
28304TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
28305 for (size_t k = 2; k < 10; k++) {
28306 GemmMicrokernelTester()
28307 .mr(3)
28308 .nr(2)
28309 .kr(1)
28310 .sr(1)
28311 .m(3)
28312 .n(2)
28313 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028314 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028315 }
28316}
28317
28318TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
28319 for (size_t k = 2; k < 10; k++) {
28320 GemmMicrokernelTester()
28321 .mr(3)
28322 .nr(2)
28323 .kr(1)
28324 .sr(1)
28325 .m(3)
28326 .n(2)
28327 .k(k)
28328 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080028329 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028330 }
28331}
28332
28333TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
28334 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028335 for (uint32_t n = 1; n <= 2; n++) {
28336 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028337 GemmMicrokernelTester()
28338 .mr(3)
28339 .nr(2)
28340 .kr(1)
28341 .sr(1)
28342 .m(m)
28343 .n(n)
28344 .k(k)
28345 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028346 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028347 }
28348 }
28349 }
28350}
28351
28352TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
28353 for (uint32_t n = 3; n < 4; n++) {
28354 for (size_t k = 1; k <= 5; k += 2) {
28355 GemmMicrokernelTester()
28356 .mr(3)
28357 .nr(2)
28358 .kr(1)
28359 .sr(1)
28360 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028361 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028362 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028363 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028364 }
28365 }
28366}
28367
28368TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
28369 for (uint32_t n = 3; n < 4; n++) {
28370 for (size_t k = 1; k <= 5; k += 2) {
28371 GemmMicrokernelTester()
28372 .mr(3)
28373 .nr(2)
28374 .kr(1)
28375 .sr(1)
28376 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028377 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028378 .k(k)
28379 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028380 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028381 }
28382 }
28383}
28384
28385TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
28386 for (uint32_t n = 3; n < 4; n++) {
28387 for (size_t k = 1; k <= 5; k += 2) {
28388 GemmMicrokernelTester()
28389 .mr(3)
28390 .nr(2)
28391 .kr(1)
28392 .sr(1)
28393 .m(3)
28394 .n(n)
28395 .k(k)
28396 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028397 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028398 }
28399 }
28400}
28401
28402TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
28403 for (uint32_t n = 3; n < 4; n++) {
28404 for (size_t k = 1; k <= 5; k += 2) {
28405 for (uint32_t m = 1; m <= 3; m++) {
28406 GemmMicrokernelTester()
28407 .mr(3)
28408 .nr(2)
28409 .kr(1)
28410 .sr(1)
28411 .m(m)
28412 .n(n)
28413 .k(k)
28414 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028415 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028416 }
28417 }
28418 }
28419}
28420
28421TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
28422 for (uint32_t n = 4; n <= 6; n += 2) {
28423 for (size_t k = 1; k <= 5; k += 2) {
28424 GemmMicrokernelTester()
28425 .mr(3)
28426 .nr(2)
28427 .kr(1)
28428 .sr(1)
28429 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028430 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028431 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028432 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028433 }
28434 }
28435}
28436
28437TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
28438 for (uint32_t n = 4; n <= 6; n += 2) {
28439 for (size_t k = 1; k <= 5; k += 2) {
28440 GemmMicrokernelTester()
28441 .mr(3)
28442 .nr(2)
28443 .kr(1)
28444 .sr(1)
28445 .m(3)
28446 .n(n)
28447 .k(k)
28448 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028449 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028450 }
28451 }
28452}
28453
28454TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
28455 for (uint32_t n = 4; n <= 6; n += 2) {
28456 for (size_t k = 1; k <= 5; k += 2) {
28457 GemmMicrokernelTester()
28458 .mr(3)
28459 .nr(2)
28460 .kr(1)
28461 .sr(1)
28462 .m(3)
28463 .n(n)
28464 .k(k)
28465 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028466 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028467 }
28468 }
28469}
28470
28471TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
28472 for (uint32_t n = 4; n <= 6; n += 2) {
28473 for (size_t k = 1; k <= 5; k += 2) {
28474 for (uint32_t m = 1; m <= 3; m++) {
28475 GemmMicrokernelTester()
28476 .mr(3)
28477 .nr(2)
28478 .kr(1)
28479 .sr(1)
28480 .m(m)
28481 .n(n)
28482 .k(k)
28483 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028485 }
28486 }
28487 }
28488}
28489
28490TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
28491 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028492 for (uint32_t n = 1; n <= 2; n++) {
28493 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028494 GemmMicrokernelTester()
28495 .mr(3)
28496 .nr(2)
28497 .kr(1)
28498 .sr(1)
28499 .m(m)
28500 .n(n)
28501 .k(k)
28502 .cm_stride(5)
28503 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028504 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028505 }
28506 }
28507 }
28508}
28509
28510TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
28511 GemmMicrokernelTester()
28512 .mr(3)
28513 .nr(2)
28514 .kr(1)
28515 .sr(1)
28516 .m(3)
28517 .n(2)
28518 .k(1)
28519 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028520 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028521}
28522
28523TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
28524 GemmMicrokernelTester()
28525 .mr(3)
28526 .nr(2)
28527 .kr(1)
28528 .sr(1)
28529 .m(3)
28530 .n(2)
28531 .k(1)
28532 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028533 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028534}
28535
28536TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
28537 GemmMicrokernelTester()
28538 .mr(3)
28539 .nr(2)
28540 .kr(1)
28541 .sr(1)
28542 .m(3)
28543 .n(2)
28544 .k(1)
28545 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028546 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028547}
28548
28549TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
28550 for (size_t k = 1; k <= 5; k += 2) {
28551 GemmMicrokernelTester()
28552 .mr(3)
28553 .nr(2)
28554 .kr(1)
28555 .sr(1)
28556 .m(3)
28557 .n(2)
28558 .k(k)
28559 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028561 }
28562}
28563
28564TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
28565 for (size_t k = 1; k <= 5; k += 2) {
28566 GemmMicrokernelTester()
28567 .mr(3)
28568 .nr(2)
28569 .kr(1)
28570 .sr(1)
28571 .m(3)
28572 .n(2)
28573 .k(k)
28574 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028575 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028576 }
28577}
28578
28579TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
28580 for (size_t k = 1; k <= 5; k += 2) {
28581 GemmMicrokernelTester()
28582 .mr(3)
28583 .nr(2)
28584 .kr(1)
28585 .sr(1)
28586 .m(3)
28587 .n(2)
28588 .k(k)
28589 .a_zero_point(0)
28590 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028591 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028592 }
28593}
28594
28595TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
28596 GemmMicrokernelTester()
28597 .mr(4)
28598 .nr(2)
28599 .kr(1)
28600 .sr(1)
28601 .m(4)
28602 .n(2)
28603 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028604 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028605}
28606
28607TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
28608 GemmMicrokernelTester()
28609 .mr(4)
28610 .nr(2)
28611 .kr(1)
28612 .sr(1)
28613 .m(4)
28614 .n(2)
28615 .k(1)
28616 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028617 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028618}
28619
28620TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
28621 GemmMicrokernelTester()
28622 .mr(4)
28623 .nr(2)
28624 .kr(1)
28625 .sr(1)
28626 .m(4)
28627 .n(2)
28628 .k(1)
28629 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028630 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028631}
28632
28633TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028634 for (uint32_t n = 1; n <= 2; n++) {
28635 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028636 GemmMicrokernelTester()
28637 .mr(4)
28638 .nr(2)
28639 .kr(1)
28640 .sr(1)
28641 .m(m)
28642 .n(n)
28643 .k(1)
28644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028645 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028646 }
28647 }
28648}
28649
28650TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
28651 for (uint32_t m = 1; m <= 4; m++) {
28652 GemmMicrokernelTester()
28653 .mr(4)
28654 .nr(2)
28655 .kr(1)
28656 .sr(1)
28657 .m(m)
28658 .n(2)
28659 .k(1)
28660 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028661 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028662 }
28663}
28664
28665TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
28666 for (uint32_t n = 1; n <= 2; n++) {
28667 GemmMicrokernelTester()
28668 .mr(4)
28669 .nr(2)
28670 .kr(1)
28671 .sr(1)
28672 .m(4)
28673 .n(n)
28674 .k(1)
28675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028676 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028677 }
28678}
28679
28680TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
28681 for (size_t k = 2; k < 10; k++) {
28682 GemmMicrokernelTester()
28683 .mr(4)
28684 .nr(2)
28685 .kr(1)
28686 .sr(1)
28687 .m(4)
28688 .n(2)
28689 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028690 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028691 }
28692}
28693
28694TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
28695 for (size_t k = 2; k < 10; k++) {
28696 GemmMicrokernelTester()
28697 .mr(4)
28698 .nr(2)
28699 .kr(1)
28700 .sr(1)
28701 .m(4)
28702 .n(2)
28703 .k(k)
28704 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080028705 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028706 }
28707}
28708
28709TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
28710 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028711 for (uint32_t n = 1; n <= 2; n++) {
28712 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028713 GemmMicrokernelTester()
28714 .mr(4)
28715 .nr(2)
28716 .kr(1)
28717 .sr(1)
28718 .m(m)
28719 .n(n)
28720 .k(k)
28721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028722 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028723 }
28724 }
28725 }
28726}
28727
28728TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
28729 for (uint32_t n = 3; n < 4; n++) {
28730 for (size_t k = 1; k <= 5; k += 2) {
28731 GemmMicrokernelTester()
28732 .mr(4)
28733 .nr(2)
28734 .kr(1)
28735 .sr(1)
28736 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028737 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028738 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028739 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028740 }
28741 }
28742}
28743
28744TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
28745 for (uint32_t n = 3; n < 4; n++) {
28746 for (size_t k = 1; k <= 5; k += 2) {
28747 GemmMicrokernelTester()
28748 .mr(4)
28749 .nr(2)
28750 .kr(1)
28751 .sr(1)
28752 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028753 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028754 .k(k)
28755 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028756 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028757 }
28758 }
28759}
28760
28761TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
28762 for (uint32_t n = 3; n < 4; n++) {
28763 for (size_t k = 1; k <= 5; k += 2) {
28764 GemmMicrokernelTester()
28765 .mr(4)
28766 .nr(2)
28767 .kr(1)
28768 .sr(1)
28769 .m(4)
28770 .n(n)
28771 .k(k)
28772 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028774 }
28775 }
28776}
28777
28778TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
28779 for (uint32_t n = 3; n < 4; n++) {
28780 for (size_t k = 1; k <= 5; k += 2) {
28781 for (uint32_t m = 1; m <= 4; m++) {
28782 GemmMicrokernelTester()
28783 .mr(4)
28784 .nr(2)
28785 .kr(1)
28786 .sr(1)
28787 .m(m)
28788 .n(n)
28789 .k(k)
28790 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028791 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028792 }
28793 }
28794 }
28795}
28796
28797TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
28798 for (uint32_t n = 4; n <= 6; n += 2) {
28799 for (size_t k = 1; k <= 5; k += 2) {
28800 GemmMicrokernelTester()
28801 .mr(4)
28802 .nr(2)
28803 .kr(1)
28804 .sr(1)
28805 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028806 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028807 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028808 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028809 }
28810 }
28811}
28812
28813TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
28814 for (uint32_t n = 4; n <= 6; n += 2) {
28815 for (size_t k = 1; k <= 5; k += 2) {
28816 GemmMicrokernelTester()
28817 .mr(4)
28818 .nr(2)
28819 .kr(1)
28820 .sr(1)
28821 .m(4)
28822 .n(n)
28823 .k(k)
28824 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028825 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028826 }
28827 }
28828}
28829
28830TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
28831 for (uint32_t n = 4; n <= 6; n += 2) {
28832 for (size_t k = 1; k <= 5; k += 2) {
28833 GemmMicrokernelTester()
28834 .mr(4)
28835 .nr(2)
28836 .kr(1)
28837 .sr(1)
28838 .m(4)
28839 .n(n)
28840 .k(k)
28841 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028842 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028843 }
28844 }
28845}
28846
28847TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
28848 for (uint32_t n = 4; n <= 6; n += 2) {
28849 for (size_t k = 1; k <= 5; k += 2) {
28850 for (uint32_t m = 1; m <= 4; m++) {
28851 GemmMicrokernelTester()
28852 .mr(4)
28853 .nr(2)
28854 .kr(1)
28855 .sr(1)
28856 .m(m)
28857 .n(n)
28858 .k(k)
28859 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028860 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028861 }
28862 }
28863 }
28864}
28865
28866TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
28867 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028868 for (uint32_t n = 1; n <= 2; n++) {
28869 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028870 GemmMicrokernelTester()
28871 .mr(4)
28872 .nr(2)
28873 .kr(1)
28874 .sr(1)
28875 .m(m)
28876 .n(n)
28877 .k(k)
28878 .cm_stride(5)
28879 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028880 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028881 }
28882 }
28883 }
28884}
28885
28886TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
28887 GemmMicrokernelTester()
28888 .mr(4)
28889 .nr(2)
28890 .kr(1)
28891 .sr(1)
28892 .m(4)
28893 .n(2)
28894 .k(1)
28895 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028896 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028897}
28898
28899TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
28900 GemmMicrokernelTester()
28901 .mr(4)
28902 .nr(2)
28903 .kr(1)
28904 .sr(1)
28905 .m(4)
28906 .n(2)
28907 .k(1)
28908 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028909 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028910}
28911
28912TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
28913 GemmMicrokernelTester()
28914 .mr(4)
28915 .nr(2)
28916 .kr(1)
28917 .sr(1)
28918 .m(4)
28919 .n(2)
28920 .k(1)
28921 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028922 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028923}
28924
28925TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
28926 for (size_t k = 1; k <= 5; k += 2) {
28927 GemmMicrokernelTester()
28928 .mr(4)
28929 .nr(2)
28930 .kr(1)
28931 .sr(1)
28932 .m(4)
28933 .n(2)
28934 .k(k)
28935 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028936 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028937 }
28938}
28939
28940TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
28941 for (size_t k = 1; k <= 5; k += 2) {
28942 GemmMicrokernelTester()
28943 .mr(4)
28944 .nr(2)
28945 .kr(1)
28946 .sr(1)
28947 .m(4)
28948 .n(2)
28949 .k(k)
28950 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028951 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028952 }
28953}
28954
28955TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
28956 for (size_t k = 1; k <= 5; k += 2) {
28957 GemmMicrokernelTester()
28958 .mr(4)
28959 .nr(2)
28960 .kr(1)
28961 .sr(1)
28962 .m(4)
28963 .n(2)
28964 .k(k)
28965 .a_zero_point(0)
28966 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028967 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028968 }
28969}
28970
28971TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
28972 GemmMicrokernelTester()
28973 .mr(3)
28974 .nr(4)
28975 .kr(1)
28976 .sr(1)
28977 .m(3)
28978 .n(4)
28979 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028980 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028981}
28982
28983TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
28984 GemmMicrokernelTester()
28985 .mr(3)
28986 .nr(4)
28987 .kr(1)
28988 .sr(1)
28989 .m(3)
28990 .n(4)
28991 .k(1)
28992 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028993 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028994}
28995
28996TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
28997 GemmMicrokernelTester()
28998 .mr(3)
28999 .nr(4)
29000 .kr(1)
29001 .sr(1)
29002 .m(3)
29003 .n(4)
29004 .k(1)
29005 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029006 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029007}
29008
29009TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029010 for (uint32_t n = 1; n <= 4; n++) {
29011 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029012 GemmMicrokernelTester()
29013 .mr(3)
29014 .nr(4)
29015 .kr(1)
29016 .sr(1)
29017 .m(m)
29018 .n(n)
29019 .k(1)
29020 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029021 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029022 }
29023 }
29024}
29025
29026TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
29027 for (uint32_t m = 1; m <= 3; m++) {
29028 GemmMicrokernelTester()
29029 .mr(3)
29030 .nr(4)
29031 .kr(1)
29032 .sr(1)
29033 .m(m)
29034 .n(4)
29035 .k(1)
29036 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029038 }
29039}
29040
29041TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
29042 for (uint32_t n = 1; n <= 4; n++) {
29043 GemmMicrokernelTester()
29044 .mr(3)
29045 .nr(4)
29046 .kr(1)
29047 .sr(1)
29048 .m(3)
29049 .n(n)
29050 .k(1)
29051 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029052 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029053 }
29054}
29055
29056TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
29057 for (size_t k = 2; k < 10; k++) {
29058 GemmMicrokernelTester()
29059 .mr(3)
29060 .nr(4)
29061 .kr(1)
29062 .sr(1)
29063 .m(3)
29064 .n(4)
29065 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029066 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029067 }
29068}
29069
29070TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
29071 for (size_t k = 2; k < 10; k++) {
29072 GemmMicrokernelTester()
29073 .mr(3)
29074 .nr(4)
29075 .kr(1)
29076 .sr(1)
29077 .m(3)
29078 .n(4)
29079 .k(k)
29080 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029081 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029082 }
29083}
29084
29085TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
29086 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029087 for (uint32_t n = 1; n <= 4; n++) {
29088 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029089 GemmMicrokernelTester()
29090 .mr(3)
29091 .nr(4)
29092 .kr(1)
29093 .sr(1)
29094 .m(m)
29095 .n(n)
29096 .k(k)
29097 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029099 }
29100 }
29101 }
29102}
29103
29104TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
29105 for (uint32_t n = 5; n < 8; n++) {
29106 for (size_t k = 1; k <= 5; k += 2) {
29107 GemmMicrokernelTester()
29108 .mr(3)
29109 .nr(4)
29110 .kr(1)
29111 .sr(1)
29112 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029113 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029114 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029115 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029116 }
29117 }
29118}
29119
29120TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
29121 for (uint32_t n = 5; n < 8; n++) {
29122 for (size_t k = 1; k <= 5; k += 2) {
29123 GemmMicrokernelTester()
29124 .mr(3)
29125 .nr(4)
29126 .kr(1)
29127 .sr(1)
29128 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029129 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029130 .k(k)
29131 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029132 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029133 }
29134 }
29135}
29136
29137TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
29138 for (uint32_t n = 5; n < 8; n++) {
29139 for (size_t k = 1; k <= 5; k += 2) {
29140 GemmMicrokernelTester()
29141 .mr(3)
29142 .nr(4)
29143 .kr(1)
29144 .sr(1)
29145 .m(3)
29146 .n(n)
29147 .k(k)
29148 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029149 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029150 }
29151 }
29152}
29153
29154TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
29155 for (uint32_t n = 5; n < 8; n++) {
29156 for (size_t k = 1; k <= 5; k += 2) {
29157 for (uint32_t m = 1; m <= 3; m++) {
29158 GemmMicrokernelTester()
29159 .mr(3)
29160 .nr(4)
29161 .kr(1)
29162 .sr(1)
29163 .m(m)
29164 .n(n)
29165 .k(k)
29166 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029167 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029168 }
29169 }
29170 }
29171}
29172
29173TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
29174 for (uint32_t n = 8; n <= 12; n += 4) {
29175 for (size_t k = 1; k <= 5; k += 2) {
29176 GemmMicrokernelTester()
29177 .mr(3)
29178 .nr(4)
29179 .kr(1)
29180 .sr(1)
29181 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029182 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029183 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029184 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029185 }
29186 }
29187}
29188
29189TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
29190 for (uint32_t n = 8; n <= 12; n += 4) {
29191 for (size_t k = 1; k <= 5; k += 2) {
29192 GemmMicrokernelTester()
29193 .mr(3)
29194 .nr(4)
29195 .kr(1)
29196 .sr(1)
29197 .m(3)
29198 .n(n)
29199 .k(k)
29200 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029201 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029202 }
29203 }
29204}
29205
29206TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
29207 for (uint32_t n = 8; n <= 12; n += 4) {
29208 for (size_t k = 1; k <= 5; k += 2) {
29209 GemmMicrokernelTester()
29210 .mr(3)
29211 .nr(4)
29212 .kr(1)
29213 .sr(1)
29214 .m(3)
29215 .n(n)
29216 .k(k)
29217 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029218 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029219 }
29220 }
29221}
29222
29223TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
29224 for (uint32_t n = 8; n <= 12; n += 4) {
29225 for (size_t k = 1; k <= 5; k += 2) {
29226 for (uint32_t m = 1; m <= 3; m++) {
29227 GemmMicrokernelTester()
29228 .mr(3)
29229 .nr(4)
29230 .kr(1)
29231 .sr(1)
29232 .m(m)
29233 .n(n)
29234 .k(k)
29235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029236 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029237 }
29238 }
29239 }
29240}
29241
29242TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
29243 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029244 for (uint32_t n = 1; n <= 4; n++) {
29245 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029246 GemmMicrokernelTester()
29247 .mr(3)
29248 .nr(4)
29249 .kr(1)
29250 .sr(1)
29251 .m(m)
29252 .n(n)
29253 .k(k)
29254 .cm_stride(7)
29255 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029256 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029257 }
29258 }
29259 }
29260}
29261
29262TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
29263 GemmMicrokernelTester()
29264 .mr(3)
29265 .nr(4)
29266 .kr(1)
29267 .sr(1)
29268 .m(3)
29269 .n(4)
29270 .k(1)
29271 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029272 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029273}
29274
29275TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
29276 GemmMicrokernelTester()
29277 .mr(3)
29278 .nr(4)
29279 .kr(1)
29280 .sr(1)
29281 .m(3)
29282 .n(4)
29283 .k(1)
29284 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029285 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029286}
29287
29288TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
29289 GemmMicrokernelTester()
29290 .mr(3)
29291 .nr(4)
29292 .kr(1)
29293 .sr(1)
29294 .m(3)
29295 .n(4)
29296 .k(1)
29297 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029298 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029299}
29300
29301TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
29302 for (size_t k = 1; k <= 5; k += 2) {
29303 GemmMicrokernelTester()
29304 .mr(3)
29305 .nr(4)
29306 .kr(1)
29307 .sr(1)
29308 .m(3)
29309 .n(4)
29310 .k(k)
29311 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029313 }
29314}
29315
29316TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
29317 for (size_t k = 1; k <= 5; k += 2) {
29318 GemmMicrokernelTester()
29319 .mr(3)
29320 .nr(4)
29321 .kr(1)
29322 .sr(1)
29323 .m(3)
29324 .n(4)
29325 .k(k)
29326 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029327 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029328 }
29329}
29330
29331TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
29332 for (size_t k = 1; k <= 5; k += 2) {
29333 GemmMicrokernelTester()
29334 .mr(3)
29335 .nr(4)
29336 .kr(1)
29337 .sr(1)
29338 .m(3)
29339 .n(4)
29340 .k(k)
29341 .a_zero_point(0)
29342 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029343 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029344 }
29345}
29346
29347TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
29348 GemmMicrokernelTester()
29349 .mr(4)
29350 .nr(4)
29351 .kr(1)
29352 .sr(1)
29353 .m(4)
29354 .n(4)
29355 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029356 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029357}
29358
29359TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
29360 GemmMicrokernelTester()
29361 .mr(4)
29362 .nr(4)
29363 .kr(1)
29364 .sr(1)
29365 .m(4)
29366 .n(4)
29367 .k(1)
29368 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029369 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029370}
29371
29372TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
29373 GemmMicrokernelTester()
29374 .mr(4)
29375 .nr(4)
29376 .kr(1)
29377 .sr(1)
29378 .m(4)
29379 .n(4)
29380 .k(1)
29381 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029382 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029383}
29384
29385TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029386 for (uint32_t n = 1; n <= 4; n++) {
29387 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029388 GemmMicrokernelTester()
29389 .mr(4)
29390 .nr(4)
29391 .kr(1)
29392 .sr(1)
29393 .m(m)
29394 .n(n)
29395 .k(1)
29396 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029397 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029398 }
29399 }
29400}
29401
29402TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
29403 for (uint32_t m = 1; m <= 4; m++) {
29404 GemmMicrokernelTester()
29405 .mr(4)
29406 .nr(4)
29407 .kr(1)
29408 .sr(1)
29409 .m(m)
29410 .n(4)
29411 .k(1)
29412 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029413 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029414 }
29415}
29416
29417TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
29418 for (uint32_t n = 1; n <= 4; n++) {
29419 GemmMicrokernelTester()
29420 .mr(4)
29421 .nr(4)
29422 .kr(1)
29423 .sr(1)
29424 .m(4)
29425 .n(n)
29426 .k(1)
29427 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029428 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029429 }
29430}
29431
29432TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
29433 for (size_t k = 2; k < 10; k++) {
29434 GemmMicrokernelTester()
29435 .mr(4)
29436 .nr(4)
29437 .kr(1)
29438 .sr(1)
29439 .m(4)
29440 .n(4)
29441 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029442 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029443 }
29444}
29445
29446TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
29447 for (size_t k = 2; k < 10; k++) {
29448 GemmMicrokernelTester()
29449 .mr(4)
29450 .nr(4)
29451 .kr(1)
29452 .sr(1)
29453 .m(4)
29454 .n(4)
29455 .k(k)
29456 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029457 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029458 }
29459}
29460
29461TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
29462 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029463 for (uint32_t n = 1; n <= 4; n++) {
29464 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029465 GemmMicrokernelTester()
29466 .mr(4)
29467 .nr(4)
29468 .kr(1)
29469 .sr(1)
29470 .m(m)
29471 .n(n)
29472 .k(k)
29473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029474 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029475 }
29476 }
29477 }
29478}
29479
29480TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
29481 for (uint32_t n = 5; n < 8; n++) {
29482 for (size_t k = 1; k <= 5; k += 2) {
29483 GemmMicrokernelTester()
29484 .mr(4)
29485 .nr(4)
29486 .kr(1)
29487 .sr(1)
29488 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029489 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029491 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029492 }
29493 }
29494}
29495
29496TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
29497 for (uint32_t n = 5; n < 8; n++) {
29498 for (size_t k = 1; k <= 5; k += 2) {
29499 GemmMicrokernelTester()
29500 .mr(4)
29501 .nr(4)
29502 .kr(1)
29503 .sr(1)
29504 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029505 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029506 .k(k)
29507 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029508 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029509 }
29510 }
29511}
29512
29513TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
29514 for (uint32_t n = 5; n < 8; n++) {
29515 for (size_t k = 1; k <= 5; k += 2) {
29516 GemmMicrokernelTester()
29517 .mr(4)
29518 .nr(4)
29519 .kr(1)
29520 .sr(1)
29521 .m(4)
29522 .n(n)
29523 .k(k)
29524 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029525 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029526 }
29527 }
29528}
29529
29530TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
29531 for (uint32_t n = 5; n < 8; n++) {
29532 for (size_t k = 1; k <= 5; k += 2) {
29533 for (uint32_t m = 1; m <= 4; m++) {
29534 GemmMicrokernelTester()
29535 .mr(4)
29536 .nr(4)
29537 .kr(1)
29538 .sr(1)
29539 .m(m)
29540 .n(n)
29541 .k(k)
29542 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029543 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029544 }
29545 }
29546 }
29547}
29548
29549TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
29550 for (uint32_t n = 8; n <= 12; n += 4) {
29551 for (size_t k = 1; k <= 5; k += 2) {
29552 GemmMicrokernelTester()
29553 .mr(4)
29554 .nr(4)
29555 .kr(1)
29556 .sr(1)
29557 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029558 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029559 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029560 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029561 }
29562 }
29563}
29564
29565TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
29566 for (uint32_t n = 8; n <= 12; n += 4) {
29567 for (size_t k = 1; k <= 5; k += 2) {
29568 GemmMicrokernelTester()
29569 .mr(4)
29570 .nr(4)
29571 .kr(1)
29572 .sr(1)
29573 .m(4)
29574 .n(n)
29575 .k(k)
29576 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029577 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029578 }
29579 }
29580}
29581
29582TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
29583 for (uint32_t n = 8; n <= 12; n += 4) {
29584 for (size_t k = 1; k <= 5; k += 2) {
29585 GemmMicrokernelTester()
29586 .mr(4)
29587 .nr(4)
29588 .kr(1)
29589 .sr(1)
29590 .m(4)
29591 .n(n)
29592 .k(k)
29593 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029594 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029595 }
29596 }
29597}
29598
29599TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
29600 for (uint32_t n = 8; n <= 12; n += 4) {
29601 for (size_t k = 1; k <= 5; k += 2) {
29602 for (uint32_t m = 1; m <= 4; m++) {
29603 GemmMicrokernelTester()
29604 .mr(4)
29605 .nr(4)
29606 .kr(1)
29607 .sr(1)
29608 .m(m)
29609 .n(n)
29610 .k(k)
29611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029612 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029613 }
29614 }
29615 }
29616}
29617
29618TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
29619 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029620 for (uint32_t n = 1; n <= 4; n++) {
29621 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029622 GemmMicrokernelTester()
29623 .mr(4)
29624 .nr(4)
29625 .kr(1)
29626 .sr(1)
29627 .m(m)
29628 .n(n)
29629 .k(k)
29630 .cm_stride(7)
29631 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029632 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029633 }
29634 }
29635 }
29636}
29637
29638TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
29639 GemmMicrokernelTester()
29640 .mr(4)
29641 .nr(4)
29642 .kr(1)
29643 .sr(1)
29644 .m(4)
29645 .n(4)
29646 .k(1)
29647 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029648 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029649}
29650
29651TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
29652 GemmMicrokernelTester()
29653 .mr(4)
29654 .nr(4)
29655 .kr(1)
29656 .sr(1)
29657 .m(4)
29658 .n(4)
29659 .k(1)
29660 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029661 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029662}
29663
29664TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
29665 GemmMicrokernelTester()
29666 .mr(4)
29667 .nr(4)
29668 .kr(1)
29669 .sr(1)
29670 .m(4)
29671 .n(4)
29672 .k(1)
29673 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029674 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029675}
29676
29677TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
29678 for (size_t k = 1; k <= 5; k += 2) {
29679 GemmMicrokernelTester()
29680 .mr(4)
29681 .nr(4)
29682 .kr(1)
29683 .sr(1)
29684 .m(4)
29685 .n(4)
29686 .k(k)
29687 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029689 }
29690}
29691
29692TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
29693 for (size_t k = 1; k <= 5; k += 2) {
29694 GemmMicrokernelTester()
29695 .mr(4)
29696 .nr(4)
29697 .kr(1)
29698 .sr(1)
29699 .m(4)
29700 .n(4)
29701 .k(k)
29702 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029703 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029704 }
29705}
29706
29707TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
29708 for (size_t k = 1; k <= 5; k += 2) {
29709 GemmMicrokernelTester()
29710 .mr(4)
29711 .nr(4)
29712 .kr(1)
29713 .sr(1)
29714 .m(4)
29715 .n(4)
29716 .k(k)
29717 .a_zero_point(0)
29718 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029719 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029720 }
29721}
29722
29723TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
29724 GemmMicrokernelTester()
29725 .mr(1)
29726 .nr(2)
29727 .kr(1)
29728 .sr(1)
29729 .m(1)
29730 .n(2)
29731 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029732 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029733}
29734
29735TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
29736 GemmMicrokernelTester()
29737 .mr(1)
29738 .nr(2)
29739 .kr(1)
29740 .sr(1)
29741 .m(1)
29742 .n(2)
29743 .k(1)
29744 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080029745 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029746}
29747
29748TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
29749 GemmMicrokernelTester()
29750 .mr(1)
29751 .nr(2)
29752 .kr(1)
29753 .sr(1)
29754 .m(1)
29755 .n(2)
29756 .k(1)
29757 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029758 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029759}
29760
29761TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029762 for (uint32_t n = 1; n <= 2; n++) {
29763 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029764 GemmMicrokernelTester()
29765 .mr(1)
29766 .nr(2)
29767 .kr(1)
29768 .sr(1)
29769 .m(m)
29770 .n(n)
29771 .k(1)
29772 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029773 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029774 }
29775 }
29776}
29777
29778TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
29779 for (uint32_t m = 1; m <= 1; m++) {
29780 GemmMicrokernelTester()
29781 .mr(1)
29782 .nr(2)
29783 .kr(1)
29784 .sr(1)
29785 .m(m)
29786 .n(2)
29787 .k(1)
29788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029790 }
29791}
29792
29793TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
29794 for (uint32_t n = 1; n <= 2; n++) {
29795 GemmMicrokernelTester()
29796 .mr(1)
29797 .nr(2)
29798 .kr(1)
29799 .sr(1)
29800 .m(1)
29801 .n(n)
29802 .k(1)
29803 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029804 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029805 }
29806}
29807
29808TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
29809 for (size_t k = 2; k < 10; k++) {
29810 GemmMicrokernelTester()
29811 .mr(1)
29812 .nr(2)
29813 .kr(1)
29814 .sr(1)
29815 .m(1)
29816 .n(2)
29817 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029818 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029819 }
29820}
29821
29822TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
29823 for (size_t k = 2; k < 10; k++) {
29824 GemmMicrokernelTester()
29825 .mr(1)
29826 .nr(2)
29827 .kr(1)
29828 .sr(1)
29829 .m(1)
29830 .n(2)
29831 .k(k)
29832 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029833 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029834 }
29835}
29836
29837TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
29838 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029839 for (uint32_t n = 1; n <= 2; n++) {
29840 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029841 GemmMicrokernelTester()
29842 .mr(1)
29843 .nr(2)
29844 .kr(1)
29845 .sr(1)
29846 .m(m)
29847 .n(n)
29848 .k(k)
29849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029850 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029851 }
29852 }
29853 }
29854}
29855
29856TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
29857 for (uint32_t n = 3; n < 4; n++) {
29858 for (size_t k = 1; k <= 5; k += 2) {
29859 GemmMicrokernelTester()
29860 .mr(1)
29861 .nr(2)
29862 .kr(1)
29863 .sr(1)
29864 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029865 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029866 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029867 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029868 }
29869 }
29870}
29871
29872TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
29873 for (uint32_t n = 3; n < 4; n++) {
29874 for (size_t k = 1; k <= 5; k += 2) {
29875 GemmMicrokernelTester()
29876 .mr(1)
29877 .nr(2)
29878 .kr(1)
29879 .sr(1)
29880 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029881 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029882 .k(k)
29883 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080029884 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029885 }
29886 }
29887}
29888
29889TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
29890 for (uint32_t n = 3; n < 4; n++) {
29891 for (size_t k = 1; k <= 5; k += 2) {
29892 GemmMicrokernelTester()
29893 .mr(1)
29894 .nr(2)
29895 .kr(1)
29896 .sr(1)
29897 .m(1)
29898 .n(n)
29899 .k(k)
29900 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029902 }
29903 }
29904}
29905
29906TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
29907 for (uint32_t n = 3; n < 4; n++) {
29908 for (size_t k = 1; k <= 5; k += 2) {
29909 for (uint32_t m = 1; m <= 1; m++) {
29910 GemmMicrokernelTester()
29911 .mr(1)
29912 .nr(2)
29913 .kr(1)
29914 .sr(1)
29915 .m(m)
29916 .n(n)
29917 .k(k)
29918 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029919 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029920 }
29921 }
29922 }
29923}
29924
29925TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
29926 for (uint32_t n = 4; n <= 6; n += 2) {
29927 for (size_t k = 1; k <= 5; k += 2) {
29928 GemmMicrokernelTester()
29929 .mr(1)
29930 .nr(2)
29931 .kr(1)
29932 .sr(1)
29933 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029934 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029935 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029936 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029937 }
29938 }
29939}
29940
29941TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
29942 for (uint32_t n = 4; n <= 6; n += 2) {
29943 for (size_t k = 1; k <= 5; k += 2) {
29944 GemmMicrokernelTester()
29945 .mr(1)
29946 .nr(2)
29947 .kr(1)
29948 .sr(1)
29949 .m(1)
29950 .n(n)
29951 .k(k)
29952 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080029953 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029954 }
29955 }
29956}
29957
29958TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
29959 for (uint32_t n = 4; n <= 6; n += 2) {
29960 for (size_t k = 1; k <= 5; k += 2) {
29961 GemmMicrokernelTester()
29962 .mr(1)
29963 .nr(2)
29964 .kr(1)
29965 .sr(1)
29966 .m(1)
29967 .n(n)
29968 .k(k)
29969 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029970 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029971 }
29972 }
29973}
29974
29975TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
29976 for (uint32_t n = 4; n <= 6; n += 2) {
29977 for (size_t k = 1; k <= 5; k += 2) {
29978 for (uint32_t m = 1; m <= 1; m++) {
29979 GemmMicrokernelTester()
29980 .mr(1)
29981 .nr(2)
29982 .kr(1)
29983 .sr(1)
29984 .m(m)
29985 .n(n)
29986 .k(k)
29987 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029988 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029989 }
29990 }
29991 }
29992}
29993
29994TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
29995 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029996 for (uint32_t n = 1; n <= 2; n++) {
29997 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029998 GemmMicrokernelTester()
29999 .mr(1)
30000 .nr(2)
30001 .kr(1)
30002 .sr(1)
30003 .m(m)
30004 .n(n)
30005 .k(k)
30006 .cm_stride(5)
30007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030008 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030009 }
30010 }
30011 }
30012}
30013
30014TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
30015 GemmMicrokernelTester()
30016 .mr(1)
30017 .nr(2)
30018 .kr(1)
30019 .sr(1)
30020 .m(1)
30021 .n(2)
30022 .k(1)
30023 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030024 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030025}
30026
30027TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
30028 GemmMicrokernelTester()
30029 .mr(1)
30030 .nr(2)
30031 .kr(1)
30032 .sr(1)
30033 .m(1)
30034 .n(2)
30035 .k(1)
30036 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030037 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030038}
30039
30040TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
30041 GemmMicrokernelTester()
30042 .mr(1)
30043 .nr(2)
30044 .kr(1)
30045 .sr(1)
30046 .m(1)
30047 .n(2)
30048 .k(1)
30049 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030050 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030051}
30052
30053TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
30054 for (size_t k = 1; k <= 5; k += 2) {
30055 GemmMicrokernelTester()
30056 .mr(1)
30057 .nr(2)
30058 .kr(1)
30059 .sr(1)
30060 .m(1)
30061 .n(2)
30062 .k(k)
30063 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030064 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030065 }
30066}
30067
30068TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
30069 for (size_t k = 1; k <= 5; k += 2) {
30070 GemmMicrokernelTester()
30071 .mr(1)
30072 .nr(2)
30073 .kr(1)
30074 .sr(1)
30075 .m(1)
30076 .n(2)
30077 .k(k)
30078 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030079 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030080 }
30081}
30082
30083TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
30084 for (size_t k = 1; k <= 5; k += 2) {
30085 GemmMicrokernelTester()
30086 .mr(1)
30087 .nr(2)
30088 .kr(1)
30089 .sr(1)
30090 .m(1)
30091 .n(2)
30092 .k(k)
30093 .a_zero_point(0)
30094 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030095 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030096 }
30097}
30098
30099TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
30100 GemmMicrokernelTester()
30101 .mr(2)
30102 .nr(2)
30103 .kr(1)
30104 .sr(1)
30105 .m(2)
30106 .n(2)
30107 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030108 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030109}
30110
30111TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
30112 GemmMicrokernelTester()
30113 .mr(2)
30114 .nr(2)
30115 .kr(1)
30116 .sr(1)
30117 .m(2)
30118 .n(2)
30119 .k(1)
30120 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030121 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030122}
30123
30124TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
30125 GemmMicrokernelTester()
30126 .mr(2)
30127 .nr(2)
30128 .kr(1)
30129 .sr(1)
30130 .m(2)
30131 .n(2)
30132 .k(1)
30133 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030134 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030135}
30136
30137TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030138 for (uint32_t n = 1; n <= 2; n++) {
30139 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030140 GemmMicrokernelTester()
30141 .mr(2)
30142 .nr(2)
30143 .kr(1)
30144 .sr(1)
30145 .m(m)
30146 .n(n)
30147 .k(1)
30148 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030149 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030150 }
30151 }
30152}
30153
30154TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
30155 for (uint32_t m = 1; m <= 2; m++) {
30156 GemmMicrokernelTester()
30157 .mr(2)
30158 .nr(2)
30159 .kr(1)
30160 .sr(1)
30161 .m(m)
30162 .n(2)
30163 .k(1)
30164 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030165 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030166 }
30167}
30168
30169TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
30170 for (uint32_t n = 1; n <= 2; n++) {
30171 GemmMicrokernelTester()
30172 .mr(2)
30173 .nr(2)
30174 .kr(1)
30175 .sr(1)
30176 .m(2)
30177 .n(n)
30178 .k(1)
30179 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030180 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030181 }
30182}
30183
30184TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
30185 for (size_t k = 2; k < 10; k++) {
30186 GemmMicrokernelTester()
30187 .mr(2)
30188 .nr(2)
30189 .kr(1)
30190 .sr(1)
30191 .m(2)
30192 .n(2)
30193 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030194 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030195 }
30196}
30197
30198TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
30199 for (size_t k = 2; k < 10; k++) {
30200 GemmMicrokernelTester()
30201 .mr(2)
30202 .nr(2)
30203 .kr(1)
30204 .sr(1)
30205 .m(2)
30206 .n(2)
30207 .k(k)
30208 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030209 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030210 }
30211}
30212
30213TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
30214 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030215 for (uint32_t n = 1; n <= 2; n++) {
30216 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030217 GemmMicrokernelTester()
30218 .mr(2)
30219 .nr(2)
30220 .kr(1)
30221 .sr(1)
30222 .m(m)
30223 .n(n)
30224 .k(k)
30225 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030226 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030227 }
30228 }
30229 }
30230}
30231
30232TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
30233 for (uint32_t n = 3; n < 4; n++) {
30234 for (size_t k = 1; k <= 5; k += 2) {
30235 GemmMicrokernelTester()
30236 .mr(2)
30237 .nr(2)
30238 .kr(1)
30239 .sr(1)
30240 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030241 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030242 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030243 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030244 }
30245 }
30246}
30247
30248TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
30249 for (uint32_t n = 3; n < 4; n++) {
30250 for (size_t k = 1; k <= 5; k += 2) {
30251 GemmMicrokernelTester()
30252 .mr(2)
30253 .nr(2)
30254 .kr(1)
30255 .sr(1)
30256 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030257 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030258 .k(k)
30259 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030260 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030261 }
30262 }
30263}
30264
30265TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
30266 for (uint32_t n = 3; n < 4; n++) {
30267 for (size_t k = 1; k <= 5; k += 2) {
30268 GemmMicrokernelTester()
30269 .mr(2)
30270 .nr(2)
30271 .kr(1)
30272 .sr(1)
30273 .m(2)
30274 .n(n)
30275 .k(k)
30276 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030277 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030278 }
30279 }
30280}
30281
30282TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
30283 for (uint32_t n = 3; n < 4; n++) {
30284 for (size_t k = 1; k <= 5; k += 2) {
30285 for (uint32_t m = 1; m <= 2; m++) {
30286 GemmMicrokernelTester()
30287 .mr(2)
30288 .nr(2)
30289 .kr(1)
30290 .sr(1)
30291 .m(m)
30292 .n(n)
30293 .k(k)
30294 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030295 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030296 }
30297 }
30298 }
30299}
30300
30301TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
30302 for (uint32_t n = 4; n <= 6; n += 2) {
30303 for (size_t k = 1; k <= 5; k += 2) {
30304 GemmMicrokernelTester()
30305 .mr(2)
30306 .nr(2)
30307 .kr(1)
30308 .sr(1)
30309 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030310 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030311 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030312 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030313 }
30314 }
30315}
30316
30317TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
30318 for (uint32_t n = 4; n <= 6; n += 2) {
30319 for (size_t k = 1; k <= 5; k += 2) {
30320 GemmMicrokernelTester()
30321 .mr(2)
30322 .nr(2)
30323 .kr(1)
30324 .sr(1)
30325 .m(2)
30326 .n(n)
30327 .k(k)
30328 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030329 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030330 }
30331 }
30332}
30333
30334TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
30335 for (uint32_t n = 4; n <= 6; n += 2) {
30336 for (size_t k = 1; k <= 5; k += 2) {
30337 GemmMicrokernelTester()
30338 .mr(2)
30339 .nr(2)
30340 .kr(1)
30341 .sr(1)
30342 .m(2)
30343 .n(n)
30344 .k(k)
30345 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030346 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030347 }
30348 }
30349}
30350
30351TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
30352 for (uint32_t n = 4; n <= 6; n += 2) {
30353 for (size_t k = 1; k <= 5; k += 2) {
30354 for (uint32_t m = 1; m <= 2; m++) {
30355 GemmMicrokernelTester()
30356 .mr(2)
30357 .nr(2)
30358 .kr(1)
30359 .sr(1)
30360 .m(m)
30361 .n(n)
30362 .k(k)
30363 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030364 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030365 }
30366 }
30367 }
30368}
30369
30370TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
30371 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030372 for (uint32_t n = 1; n <= 2; n++) {
30373 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030374 GemmMicrokernelTester()
30375 .mr(2)
30376 .nr(2)
30377 .kr(1)
30378 .sr(1)
30379 .m(m)
30380 .n(n)
30381 .k(k)
30382 .cm_stride(5)
30383 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030384 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030385 }
30386 }
30387 }
30388}
30389
30390TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
30391 GemmMicrokernelTester()
30392 .mr(2)
30393 .nr(2)
30394 .kr(1)
30395 .sr(1)
30396 .m(2)
30397 .n(2)
30398 .k(1)
30399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030400 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030401}
30402
30403TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
30404 GemmMicrokernelTester()
30405 .mr(2)
30406 .nr(2)
30407 .kr(1)
30408 .sr(1)
30409 .m(2)
30410 .n(2)
30411 .k(1)
30412 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030413 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030414}
30415
30416TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
30417 GemmMicrokernelTester()
30418 .mr(2)
30419 .nr(2)
30420 .kr(1)
30421 .sr(1)
30422 .m(2)
30423 .n(2)
30424 .k(1)
30425 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030426 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030427}
30428
30429TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
30430 for (size_t k = 1; k <= 5; k += 2) {
30431 GemmMicrokernelTester()
30432 .mr(2)
30433 .nr(2)
30434 .kr(1)
30435 .sr(1)
30436 .m(2)
30437 .n(2)
30438 .k(k)
30439 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030440 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030441 }
30442}
30443
30444TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
30445 for (size_t k = 1; k <= 5; k += 2) {
30446 GemmMicrokernelTester()
30447 .mr(2)
30448 .nr(2)
30449 .kr(1)
30450 .sr(1)
30451 .m(2)
30452 .n(2)
30453 .k(k)
30454 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030455 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030456 }
30457}
30458
30459TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
30460 for (size_t k = 1; k <= 5; k += 2) {
30461 GemmMicrokernelTester()
30462 .mr(2)
30463 .nr(2)
30464 .kr(1)
30465 .sr(1)
30466 .m(2)
30467 .n(2)
30468 .k(k)
30469 .a_zero_point(0)
30470 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030471 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030472 }
30473}
30474
30475TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
30476 GemmMicrokernelTester()
30477 .mr(1)
30478 .nr(4)
30479 .kr(1)
30480 .sr(1)
30481 .m(1)
30482 .n(4)
30483 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030484 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030485}
30486
30487TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
30488 GemmMicrokernelTester()
30489 .mr(1)
30490 .nr(4)
30491 .kr(1)
30492 .sr(1)
30493 .m(1)
30494 .n(4)
30495 .k(1)
30496 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030497 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030498}
30499
30500TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
30501 GemmMicrokernelTester()
30502 .mr(1)
30503 .nr(4)
30504 .kr(1)
30505 .sr(1)
30506 .m(1)
30507 .n(4)
30508 .k(1)
30509 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030510 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030511}
30512
30513TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030514 for (uint32_t n = 1; n <= 4; n++) {
30515 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030516 GemmMicrokernelTester()
30517 .mr(1)
30518 .nr(4)
30519 .kr(1)
30520 .sr(1)
30521 .m(m)
30522 .n(n)
30523 .k(1)
30524 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030525 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030526 }
30527 }
30528}
30529
30530TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
30531 for (uint32_t m = 1; m <= 1; m++) {
30532 GemmMicrokernelTester()
30533 .mr(1)
30534 .nr(4)
30535 .kr(1)
30536 .sr(1)
30537 .m(m)
30538 .n(4)
30539 .k(1)
30540 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030541 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030542 }
30543}
30544
30545TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
30546 for (uint32_t n = 1; n <= 4; n++) {
30547 GemmMicrokernelTester()
30548 .mr(1)
30549 .nr(4)
30550 .kr(1)
30551 .sr(1)
30552 .m(1)
30553 .n(n)
30554 .k(1)
30555 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030556 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030557 }
30558}
30559
30560TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
30561 for (size_t k = 2; k < 10; k++) {
30562 GemmMicrokernelTester()
30563 .mr(1)
30564 .nr(4)
30565 .kr(1)
30566 .sr(1)
30567 .m(1)
30568 .n(4)
30569 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030570 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030571 }
30572}
30573
30574TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
30575 for (size_t k = 2; k < 10; k++) {
30576 GemmMicrokernelTester()
30577 .mr(1)
30578 .nr(4)
30579 .kr(1)
30580 .sr(1)
30581 .m(1)
30582 .n(4)
30583 .k(k)
30584 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030585 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030586 }
30587}
30588
30589TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
30590 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030591 for (uint32_t n = 1; n <= 4; n++) {
30592 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030593 GemmMicrokernelTester()
30594 .mr(1)
30595 .nr(4)
30596 .kr(1)
30597 .sr(1)
30598 .m(m)
30599 .n(n)
30600 .k(k)
30601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030602 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030603 }
30604 }
30605 }
30606}
30607
30608TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
30609 for (uint32_t n = 5; n < 8; n++) {
30610 for (size_t k = 1; k <= 5; k += 2) {
30611 GemmMicrokernelTester()
30612 .mr(1)
30613 .nr(4)
30614 .kr(1)
30615 .sr(1)
30616 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030617 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030618 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030619 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030620 }
30621 }
30622}
30623
30624TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
30625 for (uint32_t n = 5; n < 8; n++) {
30626 for (size_t k = 1; k <= 5; k += 2) {
30627 GemmMicrokernelTester()
30628 .mr(1)
30629 .nr(4)
30630 .kr(1)
30631 .sr(1)
30632 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030633 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030634 .k(k)
30635 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030636 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030637 }
30638 }
30639}
30640
30641TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
30642 for (uint32_t n = 5; n < 8; n++) {
30643 for (size_t k = 1; k <= 5; k += 2) {
30644 GemmMicrokernelTester()
30645 .mr(1)
30646 .nr(4)
30647 .kr(1)
30648 .sr(1)
30649 .m(1)
30650 .n(n)
30651 .k(k)
30652 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030653 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030654 }
30655 }
30656}
30657
30658TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
30659 for (uint32_t n = 5; n < 8; n++) {
30660 for (size_t k = 1; k <= 5; k += 2) {
30661 for (uint32_t m = 1; m <= 1; m++) {
30662 GemmMicrokernelTester()
30663 .mr(1)
30664 .nr(4)
30665 .kr(1)
30666 .sr(1)
30667 .m(m)
30668 .n(n)
30669 .k(k)
30670 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030671 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030672 }
30673 }
30674 }
30675}
30676
30677TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
30678 for (uint32_t n = 8; n <= 12; n += 4) {
30679 for (size_t k = 1; k <= 5; k += 2) {
30680 GemmMicrokernelTester()
30681 .mr(1)
30682 .nr(4)
30683 .kr(1)
30684 .sr(1)
30685 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030686 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030687 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030688 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030689 }
30690 }
30691}
30692
30693TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
30694 for (uint32_t n = 8; n <= 12; n += 4) {
30695 for (size_t k = 1; k <= 5; k += 2) {
30696 GemmMicrokernelTester()
30697 .mr(1)
30698 .nr(4)
30699 .kr(1)
30700 .sr(1)
30701 .m(1)
30702 .n(n)
30703 .k(k)
30704 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030705 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030706 }
30707 }
30708}
30709
30710TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
30711 for (uint32_t n = 8; n <= 12; n += 4) {
30712 for (size_t k = 1; k <= 5; k += 2) {
30713 GemmMicrokernelTester()
30714 .mr(1)
30715 .nr(4)
30716 .kr(1)
30717 .sr(1)
30718 .m(1)
30719 .n(n)
30720 .k(k)
30721 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030722 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030723 }
30724 }
30725}
30726
30727TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
30728 for (uint32_t n = 8; n <= 12; n += 4) {
30729 for (size_t k = 1; k <= 5; k += 2) {
30730 for (uint32_t m = 1; m <= 1; m++) {
30731 GemmMicrokernelTester()
30732 .mr(1)
30733 .nr(4)
30734 .kr(1)
30735 .sr(1)
30736 .m(m)
30737 .n(n)
30738 .k(k)
30739 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030740 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030741 }
30742 }
30743 }
30744}
30745
30746TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
30747 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030748 for (uint32_t n = 1; n <= 4; n++) {
30749 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030750 GemmMicrokernelTester()
30751 .mr(1)
30752 .nr(4)
30753 .kr(1)
30754 .sr(1)
30755 .m(m)
30756 .n(n)
30757 .k(k)
30758 .cm_stride(7)
30759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030760 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030761 }
30762 }
30763 }
30764}
30765
30766TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
30767 GemmMicrokernelTester()
30768 .mr(1)
30769 .nr(4)
30770 .kr(1)
30771 .sr(1)
30772 .m(1)
30773 .n(4)
30774 .k(1)
30775 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030776 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030777}
30778
30779TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
30780 GemmMicrokernelTester()
30781 .mr(1)
30782 .nr(4)
30783 .kr(1)
30784 .sr(1)
30785 .m(1)
30786 .n(4)
30787 .k(1)
30788 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030789 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030790}
30791
30792TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
30793 GemmMicrokernelTester()
30794 .mr(1)
30795 .nr(4)
30796 .kr(1)
30797 .sr(1)
30798 .m(1)
30799 .n(4)
30800 .k(1)
30801 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030802 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030803}
30804
30805TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
30806 for (size_t k = 1; k <= 5; k += 2) {
30807 GemmMicrokernelTester()
30808 .mr(1)
30809 .nr(4)
30810 .kr(1)
30811 .sr(1)
30812 .m(1)
30813 .n(4)
30814 .k(k)
30815 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030816 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030817 }
30818}
30819
30820TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
30821 for (size_t k = 1; k <= 5; k += 2) {
30822 GemmMicrokernelTester()
30823 .mr(1)
30824 .nr(4)
30825 .kr(1)
30826 .sr(1)
30827 .m(1)
30828 .n(4)
30829 .k(k)
30830 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030831 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030832 }
30833}
30834
30835TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
30836 for (size_t k = 1; k <= 5; k += 2) {
30837 GemmMicrokernelTester()
30838 .mr(1)
30839 .nr(4)
30840 .kr(1)
30841 .sr(1)
30842 .m(1)
30843 .n(4)
30844 .k(k)
30845 .a_zero_point(0)
30846 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030847 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030848 }
30849}
30850
30851TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
30852 GemmMicrokernelTester()
30853 .mr(2)
30854 .nr(4)
30855 .kr(1)
30856 .sr(1)
30857 .m(2)
30858 .n(4)
30859 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030860 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030861}
30862
30863TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
30864 GemmMicrokernelTester()
30865 .mr(2)
30866 .nr(4)
30867 .kr(1)
30868 .sr(1)
30869 .m(2)
30870 .n(4)
30871 .k(1)
30872 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030873 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030874}
30875
30876TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
30877 GemmMicrokernelTester()
30878 .mr(2)
30879 .nr(4)
30880 .kr(1)
30881 .sr(1)
30882 .m(2)
30883 .n(4)
30884 .k(1)
30885 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030886 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030887}
30888
30889TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030890 for (uint32_t n = 1; n <= 4; n++) {
30891 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030892 GemmMicrokernelTester()
30893 .mr(2)
30894 .nr(4)
30895 .kr(1)
30896 .sr(1)
30897 .m(m)
30898 .n(n)
30899 .k(1)
30900 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030901 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030902 }
30903 }
30904}
30905
30906TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
30907 for (uint32_t m = 1; m <= 2; m++) {
30908 GemmMicrokernelTester()
30909 .mr(2)
30910 .nr(4)
30911 .kr(1)
30912 .sr(1)
30913 .m(m)
30914 .n(4)
30915 .k(1)
30916 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030917 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030918 }
30919}
30920
30921TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
30922 for (uint32_t n = 1; n <= 4; n++) {
30923 GemmMicrokernelTester()
30924 .mr(2)
30925 .nr(4)
30926 .kr(1)
30927 .sr(1)
30928 .m(2)
30929 .n(n)
30930 .k(1)
30931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030932 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030933 }
30934}
30935
30936TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
30937 for (size_t k = 2; k < 10; k++) {
30938 GemmMicrokernelTester()
30939 .mr(2)
30940 .nr(4)
30941 .kr(1)
30942 .sr(1)
30943 .m(2)
30944 .n(4)
30945 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030946 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030947 }
30948}
30949
30950TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
30951 for (size_t k = 2; k < 10; k++) {
30952 GemmMicrokernelTester()
30953 .mr(2)
30954 .nr(4)
30955 .kr(1)
30956 .sr(1)
30957 .m(2)
30958 .n(4)
30959 .k(k)
30960 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030961 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030962 }
30963}
30964
30965TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
30966 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030967 for (uint32_t n = 1; n <= 4; n++) {
30968 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030969 GemmMicrokernelTester()
30970 .mr(2)
30971 .nr(4)
30972 .kr(1)
30973 .sr(1)
30974 .m(m)
30975 .n(n)
30976 .k(k)
30977 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030978 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030979 }
30980 }
30981 }
30982}
30983
30984TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
30985 for (uint32_t n = 5; n < 8; n++) {
30986 for (size_t k = 1; k <= 5; k += 2) {
30987 GemmMicrokernelTester()
30988 .mr(2)
30989 .nr(4)
30990 .kr(1)
30991 .sr(1)
30992 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030993 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030994 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030995 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030996 }
30997 }
30998}
30999
31000TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
31001 for (uint32_t n = 5; n < 8; n++) {
31002 for (size_t k = 1; k <= 5; k += 2) {
31003 GemmMicrokernelTester()
31004 .mr(2)
31005 .nr(4)
31006 .kr(1)
31007 .sr(1)
31008 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031009 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031010 .k(k)
31011 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031012 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031013 }
31014 }
31015}
31016
31017TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
31018 for (uint32_t n = 5; n < 8; n++) {
31019 for (size_t k = 1; k <= 5; k += 2) {
31020 GemmMicrokernelTester()
31021 .mr(2)
31022 .nr(4)
31023 .kr(1)
31024 .sr(1)
31025 .m(2)
31026 .n(n)
31027 .k(k)
31028 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031029 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031030 }
31031 }
31032}
31033
31034TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
31035 for (uint32_t n = 5; n < 8; n++) {
31036 for (size_t k = 1; k <= 5; k += 2) {
31037 for (uint32_t m = 1; m <= 2; m++) {
31038 GemmMicrokernelTester()
31039 .mr(2)
31040 .nr(4)
31041 .kr(1)
31042 .sr(1)
31043 .m(m)
31044 .n(n)
31045 .k(k)
31046 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031047 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031048 }
31049 }
31050 }
31051}
31052
31053TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
31054 for (uint32_t n = 8; n <= 12; n += 4) {
31055 for (size_t k = 1; k <= 5; k += 2) {
31056 GemmMicrokernelTester()
31057 .mr(2)
31058 .nr(4)
31059 .kr(1)
31060 .sr(1)
31061 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031062 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031063 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031064 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031065 }
31066 }
31067}
31068
31069TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
31070 for (uint32_t n = 8; n <= 12; n += 4) {
31071 for (size_t k = 1; k <= 5; k += 2) {
31072 GemmMicrokernelTester()
31073 .mr(2)
31074 .nr(4)
31075 .kr(1)
31076 .sr(1)
31077 .m(2)
31078 .n(n)
31079 .k(k)
31080 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031081 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031082 }
31083 }
31084}
31085
31086TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
31087 for (uint32_t n = 8; n <= 12; n += 4) {
31088 for (size_t k = 1; k <= 5; k += 2) {
31089 GemmMicrokernelTester()
31090 .mr(2)
31091 .nr(4)
31092 .kr(1)
31093 .sr(1)
31094 .m(2)
31095 .n(n)
31096 .k(k)
31097 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031098 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031099 }
31100 }
31101}
31102
31103TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
31104 for (uint32_t n = 8; n <= 12; n += 4) {
31105 for (size_t k = 1; k <= 5; k += 2) {
31106 for (uint32_t m = 1; m <= 2; m++) {
31107 GemmMicrokernelTester()
31108 .mr(2)
31109 .nr(4)
31110 .kr(1)
31111 .sr(1)
31112 .m(m)
31113 .n(n)
31114 .k(k)
31115 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031116 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031117 }
31118 }
31119 }
31120}
31121
31122TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
31123 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031124 for (uint32_t n = 1; n <= 4; n++) {
31125 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031126 GemmMicrokernelTester()
31127 .mr(2)
31128 .nr(4)
31129 .kr(1)
31130 .sr(1)
31131 .m(m)
31132 .n(n)
31133 .k(k)
31134 .cm_stride(7)
31135 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031136 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031137 }
31138 }
31139 }
31140}
31141
31142TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
31143 GemmMicrokernelTester()
31144 .mr(2)
31145 .nr(4)
31146 .kr(1)
31147 .sr(1)
31148 .m(2)
31149 .n(4)
31150 .k(1)
31151 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031152 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031153}
31154
31155TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
31156 GemmMicrokernelTester()
31157 .mr(2)
31158 .nr(4)
31159 .kr(1)
31160 .sr(1)
31161 .m(2)
31162 .n(4)
31163 .k(1)
31164 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031165 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031166}
31167
31168TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
31169 GemmMicrokernelTester()
31170 .mr(2)
31171 .nr(4)
31172 .kr(1)
31173 .sr(1)
31174 .m(2)
31175 .n(4)
31176 .k(1)
31177 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031178 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031179}
31180
31181TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
31182 for (size_t k = 1; k <= 5; k += 2) {
31183 GemmMicrokernelTester()
31184 .mr(2)
31185 .nr(4)
31186 .kr(1)
31187 .sr(1)
31188 .m(2)
31189 .n(4)
31190 .k(k)
31191 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031192 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031193 }
31194}
31195
31196TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
31197 for (size_t k = 1; k <= 5; k += 2) {
31198 GemmMicrokernelTester()
31199 .mr(2)
31200 .nr(4)
31201 .kr(1)
31202 .sr(1)
31203 .m(2)
31204 .n(4)
31205 .k(k)
31206 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031207 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031208 }
31209}
31210
31211TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
31212 for (size_t k = 1; k <= 5; k += 2) {
31213 GemmMicrokernelTester()
31214 .mr(2)
31215 .nr(4)
31216 .kr(1)
31217 .sr(1)
31218 .m(2)
31219 .n(4)
31220 .k(k)
31221 .a_zero_point(0)
31222 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031223 .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031224 }
31225}