blob: 5c6da29a8965eabeafe62c71354335e244f29c21 [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-gemm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Zhi An Ngb43b47a2021-12-23 16:27:22 -080016#include <xnnpack/allocator.h>
Marat Dukhan1c587112020-04-08 20:04:28 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard3cb54f92020-04-10 10:46:08 -070027 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2) {
28 TEST_REQUIRES_ARM_NEON_FMA;
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070037 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -070038 }
39
40 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_FMA;
42 GemmMicrokernelTester()
43 .mr(1)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(1)
48 .n(8)
49 .k(2)
50 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070051 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -070052 }
53
54 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
55 TEST_REQUIRES_ARM_NEON_FMA;
56 GemmMicrokernelTester()
57 .mr(1)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(1)
62 .n(8)
63 .k(2)
64 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070065 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -070066 }
67
68 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
69 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3cb54f92020-04-10 10:46:08 -070072 GemmMicrokernelTester()
73 .mr(1)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(2)
80 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070081 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -070082 }
83 }
84 }
85
86 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
87 TEST_REQUIRES_ARM_NEON_FMA;
88 for (uint32_t m = 1; m <= 1; m++) {
89 GemmMicrokernelTester()
90 .mr(1)
91 .nr(8)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(2)
97 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070098 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -070099 }
100 }
101
102 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
103 TEST_REQUIRES_ARM_NEON_FMA;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(1)
107 .nr(8)
108 .kr(1)
109 .sr(1)
110 .m(1)
111 .n(n)
112 .k(2)
113 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700114 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700115 }
116 }
117
118 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2) {
119 TEST_REQUIRES_ARM_NEON_FMA;
120 for (size_t k = 1; k < 2; k++) {
121 GemmMicrokernelTester()
122 .mr(1)
123 .nr(8)
124 .kr(1)
125 .sr(1)
126 .m(1)
127 .n(8)
128 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700129 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700130 }
131 }
132
133 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
134 TEST_REQUIRES_ARM_NEON_FMA;
135 for (size_t k = 1; k < 2; k++) {
136 GemmMicrokernelTester()
137 .mr(1)
138 .nr(8)
139 .kr(1)
140 .sr(1)
141 .m(1)
142 .n(8)
143 .k(k)
144 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700145 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700146 }
147 }
148
149 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
150 TEST_REQUIRES_ARM_NEON_FMA;
151 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800152 for (uint32_t n = 1; n <= 8; n++) {
153 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3cb54f92020-04-10 10:46:08 -0700154 GemmMicrokernelTester()
155 .mr(1)
156 .nr(8)
157 .kr(1)
158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700163 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700164 }
165 }
166 }
167 }
168
169 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2) {
170 TEST_REQUIRES_ARM_NEON_FMA;
171 for (size_t k = 3; k < 4; k++) {
172 GemmMicrokernelTester()
173 .mr(1)
174 .nr(8)
175 .kr(1)
176 .sr(1)
177 .m(1)
178 .n(8)
179 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700180 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700181 }
182 }
183
184 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
185 TEST_REQUIRES_ARM_NEON_FMA;
186 for (size_t k = 3; k < 4; k++) {
187 GemmMicrokernelTester()
188 .mr(1)
189 .nr(8)
190 .kr(1)
191 .sr(1)
192 .m(1)
193 .n(8)
194 .k(k)
195 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700196 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700197 }
198 }
199
200 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
201 TEST_REQUIRES_ARM_NEON_FMA;
202 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800203 for (uint32_t n = 1; n <= 8; n++) {
204 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3cb54f92020-04-10 10:46:08 -0700205 GemmMicrokernelTester()
206 .mr(1)
207 .nr(8)
208 .kr(1)
209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700214 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700215 }
216 }
217 }
218 }
219
220 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2) {
221 TEST_REQUIRES_ARM_NEON_FMA;
222 for (size_t k = 4; k <= 20; k += 2) {
223 GemmMicrokernelTester()
224 .mr(1)
225 .nr(8)
226 .kr(1)
227 .sr(1)
228 .m(1)
229 .n(8)
230 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700231 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700232 }
233 }
234
235 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
236 TEST_REQUIRES_ARM_NEON_FMA;
237 for (size_t k = 4; k <= 20; k += 2) {
238 GemmMicrokernelTester()
239 .mr(1)
240 .nr(8)
241 .kr(1)
242 .sr(1)
243 .m(1)
244 .n(8)
245 .k(k)
246 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700247 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700248 }
249 }
250
251 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
252 TEST_REQUIRES_ARM_NEON_FMA;
253 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3cb54f92020-04-10 10:46:08 -0700256 GemmMicrokernelTester()
257 .mr(1)
258 .nr(8)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700265 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700266 }
267 }
268 }
269 }
270
271 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8) {
272 TEST_REQUIRES_ARM_NEON_FMA;
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 10; k += 3) {
275 GemmMicrokernelTester()
276 .mr(1)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Frank Barchard3cb54f92020-04-10 10:46:08 -0700282 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700283 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700284 }
285 }
286 }
287
288 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
289 TEST_REQUIRES_ARM_NEON_FMA;
290 for (uint32_t n = 9; n < 16; n++) {
291 for (size_t k = 1; k <= 10; k += 3) {
292 GemmMicrokernelTester()
293 .mr(1)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800298 .n(n)
Frank Barchard3cb54f92020-04-10 10:46:08 -0700299 .k(k)
300 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700301 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700302 }
303 }
304 }
305
306 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
307 TEST_REQUIRES_ARM_NEON_FMA;
308 for (uint32_t n = 9; n < 16; n++) {
309 for (size_t k = 1; k <= 10; k += 3) {
310 GemmMicrokernelTester()
311 .mr(1)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(1)
316 .n(n)
317 .k(k)
318 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700319 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700320 }
321 }
322 }
323
324 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
325 TEST_REQUIRES_ARM_NEON_FMA;
326 for (uint32_t n = 9; n < 16; n++) {
327 for (size_t k = 1; k <= 10; k += 3) {
328 for (uint32_t m = 1; m <= 1; m++) {
329 GemmMicrokernelTester()
330 .mr(1)
331 .nr(8)
332 .kr(1)
333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700338 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700339 }
340 }
341 }
342 }
343
344 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8) {
345 TEST_REQUIRES_ARM_NEON_FMA;
346 for (uint32_t n = 16; n <= 24; n += 8) {
347 for (size_t k = 1; k <= 10; k += 3) {
348 GemmMicrokernelTester()
349 .mr(1)
350 .nr(8)
351 .kr(1)
352 .sr(1)
353 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800354 .n(n)
Frank Barchard3cb54f92020-04-10 10:46:08 -0700355 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700356 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700357 }
358 }
359 }
360
361 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
362 TEST_REQUIRES_ARM_NEON_FMA;
363 for (uint32_t n = 16; n <= 24; n += 8) {
364 for (size_t k = 1; k <= 10; k += 3) {
365 GemmMicrokernelTester()
366 .mr(1)
367 .nr(8)
368 .kr(1)
369 .sr(1)
370 .m(1)
371 .n(n)
372 .k(k)
373 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700374 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700375 }
376 }
377 }
378
379 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
380 TEST_REQUIRES_ARM_NEON_FMA;
381 for (uint32_t n = 16; n <= 24; n += 8) {
382 for (size_t k = 1; k <= 10; k += 3) {
383 GemmMicrokernelTester()
384 .mr(1)
385 .nr(8)
386 .kr(1)
387 .sr(1)
388 .m(1)
389 .n(n)
390 .k(k)
391 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700392 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700393 }
394 }
395 }
396
397 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
398 TEST_REQUIRES_ARM_NEON_FMA;
399 for (uint32_t n = 16; n <= 24; n += 8) {
400 for (size_t k = 1; k <= 10; k += 3) {
401 for (uint32_t m = 1; m <= 1; m++) {
402 GemmMicrokernelTester()
403 .mr(1)
404 .nr(8)
405 .kr(1)
406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700411 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700412 }
413 }
414 }
415 }
416
417 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON_FMA;
419 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800420 for (uint32_t n = 1; n <= 8; n++) {
421 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3cb54f92020-04-10 10:46:08 -0700422 GemmMicrokernelTester()
423 .mr(1)
424 .nr(8)
425 .kr(1)
426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(11)
431 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700432 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700433 }
434 }
435 }
436 }
437
438 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, qmin) {
439 TEST_REQUIRES_ARM_NEON_FMA;
440 GemmMicrokernelTester()
441 .mr(1)
442 .nr(8)
443 .kr(1)
444 .sr(1)
445 .m(1)
446 .n(8)
447 .k(2)
448 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700449 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700450 }
451
452 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, qmax) {
453 TEST_REQUIRES_ARM_NEON_FMA;
454 GemmMicrokernelTester()
455 .mr(1)
456 .nr(8)
457 .kr(1)
458 .sr(1)
459 .m(1)
460 .n(8)
461 .k(2)
462 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700463 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700464 }
465
466 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_LD64, strided_cm) {
467 TEST_REQUIRES_ARM_NEON_FMA;
468 GemmMicrokernelTester()
469 .mr(1)
470 .nr(8)
471 .kr(1)
472 .sr(1)
473 .m(1)
474 .n(8)
475 .k(2)
476 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700477 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Frank Barchard3cb54f92020-04-10 10:46:08 -0700478 }
479#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
480
481
482#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -0700483 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700484 TEST_REQUIRES_ARM_NEON_FMA;
485 GemmMicrokernelTester()
486 .mr(1)
487 .nr(8)
488 .kr(1)
489 .sr(1)
490 .m(1)
491 .n(8)
492 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700493 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700494 }
495
Marat Dukhande06f492020-04-09 00:19:31 -0700496 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700497 TEST_REQUIRES_ARM_NEON_FMA;
498 GemmMicrokernelTester()
499 .mr(1)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(1)
504 .n(8)
505 .k(8)
506 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700507 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700508 }
509
Marat Dukhande06f492020-04-09 00:19:31 -0700510 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700511 TEST_REQUIRES_ARM_NEON_FMA;
512 GemmMicrokernelTester()
513 .mr(1)
514 .nr(8)
515 .kr(1)
516 .sr(1)
517 .m(1)
518 .n(8)
519 .k(8)
520 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700521 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700522 }
523
Marat Dukhande06f492020-04-09 00:19:31 -0700524 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700525 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800526 for (uint32_t n = 1; n <= 8; n++) {
527 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700528 GemmMicrokernelTester()
529 .mr(1)
530 .nr(8)
531 .kr(1)
532 .sr(1)
533 .m(m)
534 .n(n)
535 .k(8)
536 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700537 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700538 }
539 }
540 }
541
Marat Dukhande06f492020-04-09 00:19:31 -0700542 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700543 TEST_REQUIRES_ARM_NEON_FMA;
544 for (uint32_t m = 1; m <= 1; m++) {
545 GemmMicrokernelTester()
546 .mr(1)
547 .nr(8)
548 .kr(1)
549 .sr(1)
550 .m(m)
551 .n(8)
552 .k(8)
553 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700554 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700555 }
556 }
557
Marat Dukhande06f492020-04-09 00:19:31 -0700558 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700559 TEST_REQUIRES_ARM_NEON_FMA;
560 for (uint32_t n = 1; n <= 8; n++) {
561 GemmMicrokernelTester()
562 .mr(1)
563 .nr(8)
564 .kr(1)
565 .sr(1)
566 .m(1)
567 .n(n)
568 .k(8)
569 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700570 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700571 }
572 }
573
Marat Dukhande06f492020-04-09 00:19:31 -0700574 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700575 TEST_REQUIRES_ARM_NEON_FMA;
576 GemmMicrokernelTester()
577 .mr(1)
578 .nr(8)
579 .kr(1)
580 .sr(1)
581 .m(1)
582 .n(8)
583 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700584 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700585 }
586
Marat Dukhande06f492020-04-09 00:19:31 -0700587 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700588 TEST_REQUIRES_ARM_NEON_FMA;
589 GemmMicrokernelTester()
590 .mr(1)
591 .nr(8)
592 .kr(1)
593 .sr(1)
594 .m(1)
595 .n(8)
596 .k(16)
597 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700598 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700599 }
600
Marat Dukhande06f492020-04-09 00:19:31 -0700601 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700602 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800603 for (uint32_t n = 1; n <= 8; n++) {
604 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700605 GemmMicrokernelTester()
606 .mr(1)
607 .nr(8)
608 .kr(1)
609 .sr(1)
610 .m(m)
611 .n(n)
612 .k(16)
613 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700614 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700615 }
616 }
617 }
618
Marat Dukhande06f492020-04-09 00:19:31 -0700619 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700620 TEST_REQUIRES_ARM_NEON_FMA;
621 for (size_t k = 1; k < 16; k++) {
622 GemmMicrokernelTester()
623 .mr(1)
624 .nr(8)
625 .kr(1)
626 .sr(1)
627 .m(1)
628 .n(8)
629 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700630 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700631 }
632 }
633
Marat Dukhande06f492020-04-09 00:19:31 -0700634 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700635 TEST_REQUIRES_ARM_NEON_FMA;
636 for (size_t k = 1; k < 16; k++) {
637 GemmMicrokernelTester()
638 .mr(1)
639 .nr(8)
640 .kr(1)
641 .sr(1)
642 .m(1)
643 .n(8)
644 .k(k)
645 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700646 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700647 }
648 }
649
Marat Dukhande06f492020-04-09 00:19:31 -0700650 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700651 TEST_REQUIRES_ARM_NEON_FMA;
652 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800653 for (uint32_t n = 1; n <= 8; n++) {
654 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700655 GemmMicrokernelTester()
656 .mr(1)
657 .nr(8)
658 .kr(1)
659 .sr(1)
660 .m(m)
661 .n(n)
662 .k(k)
663 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700664 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700665 }
666 }
667 }
668 }
669
Marat Dukhande06f492020-04-09 00:19:31 -0700670 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700671 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800672 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700673 GemmMicrokernelTester()
674 .mr(1)
675 .nr(8)
676 .kr(1)
677 .sr(1)
678 .m(1)
679 .n(8)
680 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700681 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700682 }
683 }
684
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800685 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700686 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800687 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700688 GemmMicrokernelTester()
689 .mr(1)
690 .nr(8)
691 .kr(1)
692 .sr(1)
693 .m(1)
694 .n(8)
695 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800696 .a_stride(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700697 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700698 }
699 }
700
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800701 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700702 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800703 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800704 for (uint32_t n = 1; n <= 8; n++) {
705 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700706 GemmMicrokernelTester()
707 .mr(1)
708 .nr(8)
709 .kr(1)
710 .sr(1)
711 .m(m)
712 .n(n)
713 .k(k)
714 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700715 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700716 }
717 }
718 }
719 }
720
Marat Dukhande06f492020-04-09 00:19:31 -0700721 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700722 TEST_REQUIRES_ARM_NEON_FMA;
723 for (size_t k = 24; k <= 80; k += 8) {
724 GemmMicrokernelTester()
725 .mr(1)
726 .nr(8)
727 .kr(1)
728 .sr(1)
729 .m(1)
730 .n(8)
731 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700732 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700733 }
734 }
735
Marat Dukhande06f492020-04-09 00:19:31 -0700736 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700737 TEST_REQUIRES_ARM_NEON_FMA;
738 for (size_t k = 24; k <= 80; k += 8) {
739 GemmMicrokernelTester()
740 .mr(1)
741 .nr(8)
742 .kr(1)
743 .sr(1)
744 .m(1)
745 .n(8)
746 .k(k)
747 .a_stride(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700748 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700749 }
750 }
751
Marat Dukhande06f492020-04-09 00:19:31 -0700752 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700753 TEST_REQUIRES_ARM_NEON_FMA;
754 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800755 for (uint32_t n = 1; n <= 8; n++) {
756 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700757 GemmMicrokernelTester()
758 .mr(1)
759 .nr(8)
760 .kr(1)
761 .sr(1)
762 .m(m)
763 .n(n)
764 .k(k)
765 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700766 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700767 }
768 }
769 }
770 }
771
Marat Dukhande06f492020-04-09 00:19:31 -0700772 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700773 TEST_REQUIRES_ARM_NEON_FMA;
774 for (uint32_t n = 9; n < 16; n++) {
775 for (size_t k = 1; k <= 40; k += 9) {
776 GemmMicrokernelTester()
777 .mr(1)
778 .nr(8)
779 .kr(1)
780 .sr(1)
781 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800782 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700783 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700784 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700785 }
786 }
787 }
788
Marat Dukhande06f492020-04-09 00:19:31 -0700789 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700790 TEST_REQUIRES_ARM_NEON_FMA;
791 for (uint32_t n = 9; n < 16; n++) {
792 for (size_t k = 1; k <= 40; k += 9) {
793 GemmMicrokernelTester()
794 .mr(1)
795 .nr(8)
796 .kr(1)
797 .sr(1)
798 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800799 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700800 .k(k)
801 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700802 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700803 }
804 }
805 }
806
Marat Dukhande06f492020-04-09 00:19:31 -0700807 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700808 TEST_REQUIRES_ARM_NEON_FMA;
809 for (uint32_t n = 9; n < 16; n++) {
810 for (size_t k = 1; k <= 40; k += 9) {
811 GemmMicrokernelTester()
812 .mr(1)
813 .nr(8)
814 .kr(1)
815 .sr(1)
816 .m(1)
817 .n(n)
818 .k(k)
819 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700820 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700821 }
822 }
823 }
824
Marat Dukhande06f492020-04-09 00:19:31 -0700825 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700826 TEST_REQUIRES_ARM_NEON_FMA;
827 for (uint32_t n = 9; n < 16; n++) {
828 for (size_t k = 1; k <= 40; k += 9) {
829 for (uint32_t m = 1; m <= 1; m++) {
830 GemmMicrokernelTester()
831 .mr(1)
832 .nr(8)
833 .kr(1)
834 .sr(1)
835 .m(m)
836 .n(n)
837 .k(k)
838 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700839 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700840 }
841 }
842 }
843 }
844
Marat Dukhande06f492020-04-09 00:19:31 -0700845 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700846 TEST_REQUIRES_ARM_NEON_FMA;
847 for (uint32_t n = 16; n <= 24; n += 8) {
848 for (size_t k = 1; k <= 40; k += 9) {
849 GemmMicrokernelTester()
850 .mr(1)
851 .nr(8)
852 .kr(1)
853 .sr(1)
854 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800855 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700856 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700857 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700858 }
859 }
860 }
861
Marat Dukhande06f492020-04-09 00:19:31 -0700862 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700863 TEST_REQUIRES_ARM_NEON_FMA;
864 for (uint32_t n = 16; n <= 24; n += 8) {
865 for (size_t k = 1; k <= 40; k += 9) {
866 GemmMicrokernelTester()
867 .mr(1)
868 .nr(8)
869 .kr(1)
870 .sr(1)
871 .m(1)
872 .n(n)
873 .k(k)
874 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700875 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700876 }
877 }
878 }
879
Marat Dukhande06f492020-04-09 00:19:31 -0700880 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700881 TEST_REQUIRES_ARM_NEON_FMA;
882 for (uint32_t n = 16; n <= 24; n += 8) {
883 for (size_t k = 1; k <= 40; k += 9) {
884 GemmMicrokernelTester()
885 .mr(1)
886 .nr(8)
887 .kr(1)
888 .sr(1)
889 .m(1)
890 .n(n)
891 .k(k)
892 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700893 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700894 }
895 }
896 }
897
Marat Dukhande06f492020-04-09 00:19:31 -0700898 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700899 TEST_REQUIRES_ARM_NEON_FMA;
900 for (uint32_t n = 16; n <= 24; n += 8) {
901 for (size_t k = 1; k <= 40; k += 9) {
902 for (uint32_t m = 1; m <= 1; m++) {
903 GemmMicrokernelTester()
904 .mr(1)
905 .nr(8)
906 .kr(1)
907 .sr(1)
908 .m(m)
909 .n(n)
910 .k(k)
911 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700912 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700913 }
914 }
915 }
916 }
917
Marat Dukhande06f492020-04-09 00:19:31 -0700918 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700919 TEST_REQUIRES_ARM_NEON_FMA;
920 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800921 for (uint32_t n = 1; n <= 8; n++) {
922 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700923 GemmMicrokernelTester()
924 .mr(1)
925 .nr(8)
926 .kr(1)
927 .sr(1)
928 .m(m)
929 .n(n)
930 .k(k)
931 .cm_stride(11)
932 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700933 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700934 }
935 }
936 }
937 }
938
Marat Dukhande06f492020-04-09 00:19:31 -0700939 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700940 TEST_REQUIRES_ARM_NEON_FMA;
941 GemmMicrokernelTester()
942 .mr(1)
943 .nr(8)
944 .kr(1)
945 .sr(1)
946 .m(1)
947 .n(8)
948 .k(8)
949 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700950 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700951 }
952
Marat Dukhande06f492020-04-09 00:19:31 -0700953 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700954 TEST_REQUIRES_ARM_NEON_FMA;
955 GemmMicrokernelTester()
956 .mr(1)
957 .nr(8)
958 .kr(1)
959 .sr(1)
960 .m(1)
961 .n(8)
962 .k(8)
963 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700964 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700965 }
966
Marat Dukhande06f492020-04-09 00:19:31 -0700967 TEST(F32_GEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700968 TEST_REQUIRES_ARM_NEON_FMA;
969 GemmMicrokernelTester()
970 .mr(1)
971 .nr(8)
972 .kr(1)
973 .sr(1)
974 .m(1)
975 .n(8)
976 .k(8)
977 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700978 .Test(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700979 }
980#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
981
982
983#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -0700984 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700985 TEST_REQUIRES_ARM_NEON_FMA;
986 GemmMicrokernelTester()
987 .mr(4)
988 .nr(8)
989 .kr(1)
990 .sr(1)
991 .m(4)
992 .n(8)
993 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700994 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700995 }
996
Marat Dukhande06f492020-04-09 00:19:31 -0700997 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700998 TEST_REQUIRES_ARM_NEON_FMA;
999 GemmMicrokernelTester()
1000 .mr(4)
1001 .nr(8)
1002 .kr(1)
1003 .sr(1)
1004 .m(4)
1005 .n(8)
1006 .k(4)
1007 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001008 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001009 }
1010
Marat Dukhande06f492020-04-09 00:19:31 -07001011 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001012 TEST_REQUIRES_ARM_NEON_FMA;
1013 GemmMicrokernelTester()
1014 .mr(4)
1015 .nr(8)
1016 .kr(1)
1017 .sr(1)
1018 .m(4)
1019 .n(8)
1020 .k(4)
1021 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001022 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001023 }
1024
Marat Dukhande06f492020-04-09 00:19:31 -07001025 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001026 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001027 for (uint32_t n = 1; n <= 8; n++) {
1028 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001029 GemmMicrokernelTester()
1030 .mr(4)
1031 .nr(8)
1032 .kr(1)
1033 .sr(1)
1034 .m(m)
1035 .n(n)
1036 .k(4)
1037 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001038 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001039 }
1040 }
1041 }
1042
Marat Dukhande06f492020-04-09 00:19:31 -07001043 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001044 TEST_REQUIRES_ARM_NEON_FMA;
1045 for (uint32_t m = 1; m <= 4; m++) {
1046 GemmMicrokernelTester()
1047 .mr(4)
1048 .nr(8)
1049 .kr(1)
1050 .sr(1)
1051 .m(m)
1052 .n(8)
1053 .k(4)
1054 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001055 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001056 }
1057 }
1058
Marat Dukhande06f492020-04-09 00:19:31 -07001059 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001060 TEST_REQUIRES_ARM_NEON_FMA;
1061 for (uint32_t n = 1; n <= 8; n++) {
1062 GemmMicrokernelTester()
1063 .mr(4)
1064 .nr(8)
1065 .kr(1)
1066 .sr(1)
1067 .m(4)
1068 .n(n)
1069 .k(4)
1070 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001071 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001072 }
1073 }
1074
Marat Dukhande06f492020-04-09 00:19:31 -07001075 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001076 TEST_REQUIRES_ARM_NEON_FMA;
1077 GemmMicrokernelTester()
1078 .mr(4)
1079 .nr(8)
1080 .kr(1)
1081 .sr(1)
1082 .m(4)
1083 .n(8)
1084 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001085 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001086 }
1087
Marat Dukhande06f492020-04-09 00:19:31 -07001088 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001089 TEST_REQUIRES_ARM_NEON_FMA;
1090 GemmMicrokernelTester()
1091 .mr(4)
1092 .nr(8)
1093 .kr(1)
1094 .sr(1)
1095 .m(4)
1096 .n(8)
1097 .k(8)
1098 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001099 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001100 }
1101
Marat Dukhande06f492020-04-09 00:19:31 -07001102 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001103 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001104 for (uint32_t n = 1; n <= 8; n++) {
1105 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001106 GemmMicrokernelTester()
1107 .mr(4)
1108 .nr(8)
1109 .kr(1)
1110 .sr(1)
1111 .m(m)
1112 .n(n)
1113 .k(8)
1114 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001115 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001116 }
1117 }
1118 }
1119
Marat Dukhande06f492020-04-09 00:19:31 -07001120 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001121 TEST_REQUIRES_ARM_NEON_FMA;
1122 for (size_t k = 1; k < 8; k++) {
1123 GemmMicrokernelTester()
1124 .mr(4)
1125 .nr(8)
1126 .kr(1)
1127 .sr(1)
1128 .m(4)
1129 .n(8)
1130 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001131 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001132 }
1133 }
1134
Marat Dukhande06f492020-04-09 00:19:31 -07001135 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001136 TEST_REQUIRES_ARM_NEON_FMA;
1137 for (size_t k = 1; k < 8; k++) {
1138 GemmMicrokernelTester()
1139 .mr(4)
1140 .nr(8)
1141 .kr(1)
1142 .sr(1)
1143 .m(4)
1144 .n(8)
1145 .k(k)
1146 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001147 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001148 }
1149 }
1150
Marat Dukhande06f492020-04-09 00:19:31 -07001151 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001152 TEST_REQUIRES_ARM_NEON_FMA;
1153 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001154 for (uint32_t n = 1; n <= 8; n++) {
1155 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001156 GemmMicrokernelTester()
1157 .mr(4)
1158 .nr(8)
1159 .kr(1)
1160 .sr(1)
1161 .m(m)
1162 .n(n)
1163 .k(k)
1164 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001165 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001166 }
1167 }
1168 }
1169 }
1170
Marat Dukhande06f492020-04-09 00:19:31 -07001171 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001172 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001173 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001174 GemmMicrokernelTester()
1175 .mr(4)
1176 .nr(8)
1177 .kr(1)
1178 .sr(1)
1179 .m(4)
1180 .n(8)
1181 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001182 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001183 }
1184 }
1185
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001186 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001187 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001188 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001189 GemmMicrokernelTester()
1190 .mr(4)
1191 .nr(8)
1192 .kr(1)
1193 .sr(1)
1194 .m(4)
1195 .n(8)
1196 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001197 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001198 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001199 }
1200 }
1201
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001202 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001203 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001204 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001205 for (uint32_t n = 1; n <= 8; n++) {
1206 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001207 GemmMicrokernelTester()
1208 .mr(4)
1209 .nr(8)
1210 .kr(1)
1211 .sr(1)
1212 .m(m)
1213 .n(n)
1214 .k(k)
1215 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001216 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001217 }
1218 }
1219 }
1220 }
1221
Marat Dukhande06f492020-04-09 00:19:31 -07001222 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001223 TEST_REQUIRES_ARM_NEON_FMA;
1224 for (size_t k = 12; k <= 40; k += 4) {
1225 GemmMicrokernelTester()
1226 .mr(4)
1227 .nr(8)
1228 .kr(1)
1229 .sr(1)
1230 .m(4)
1231 .n(8)
1232 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001233 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001234 }
1235 }
1236
Marat Dukhande06f492020-04-09 00:19:31 -07001237 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001238 TEST_REQUIRES_ARM_NEON_FMA;
1239 for (size_t k = 12; k <= 40; k += 4) {
1240 GemmMicrokernelTester()
1241 .mr(4)
1242 .nr(8)
1243 .kr(1)
1244 .sr(1)
1245 .m(4)
1246 .n(8)
1247 .k(k)
1248 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001249 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001250 }
1251 }
1252
Marat Dukhande06f492020-04-09 00:19:31 -07001253 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001254 TEST_REQUIRES_ARM_NEON_FMA;
1255 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001256 for (uint32_t n = 1; n <= 8; n++) {
1257 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001258 GemmMicrokernelTester()
1259 .mr(4)
1260 .nr(8)
1261 .kr(1)
1262 .sr(1)
1263 .m(m)
1264 .n(n)
1265 .k(k)
1266 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001267 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001268 }
1269 }
1270 }
1271 }
1272
Marat Dukhande06f492020-04-09 00:19:31 -07001273 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001274 TEST_REQUIRES_ARM_NEON_FMA;
1275 for (uint32_t n = 9; n < 16; n++) {
1276 for (size_t k = 1; k <= 20; k += 5) {
1277 GemmMicrokernelTester()
1278 .mr(4)
1279 .nr(8)
1280 .kr(1)
1281 .sr(1)
1282 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001283 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001284 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001285 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001286 }
1287 }
1288 }
1289
Marat Dukhande06f492020-04-09 00:19:31 -07001290 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001291 TEST_REQUIRES_ARM_NEON_FMA;
1292 for (uint32_t n = 9; n < 16; n++) {
1293 for (size_t k = 1; k <= 20; k += 5) {
1294 GemmMicrokernelTester()
1295 .mr(4)
1296 .nr(8)
1297 .kr(1)
1298 .sr(1)
1299 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001300 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001301 .k(k)
1302 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001303 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001304 }
1305 }
1306 }
1307
Marat Dukhande06f492020-04-09 00:19:31 -07001308 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001309 TEST_REQUIRES_ARM_NEON_FMA;
1310 for (uint32_t n = 9; n < 16; n++) {
1311 for (size_t k = 1; k <= 20; k += 5) {
1312 GemmMicrokernelTester()
1313 .mr(4)
1314 .nr(8)
1315 .kr(1)
1316 .sr(1)
1317 .m(4)
1318 .n(n)
1319 .k(k)
1320 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001321 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001322 }
1323 }
1324 }
1325
Marat Dukhande06f492020-04-09 00:19:31 -07001326 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001327 TEST_REQUIRES_ARM_NEON_FMA;
1328 for (uint32_t n = 9; n < 16; n++) {
1329 for (size_t k = 1; k <= 20; k += 5) {
1330 for (uint32_t m = 1; m <= 4; m++) {
1331 GemmMicrokernelTester()
1332 .mr(4)
1333 .nr(8)
1334 .kr(1)
1335 .sr(1)
1336 .m(m)
1337 .n(n)
1338 .k(k)
1339 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001340 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001341 }
1342 }
1343 }
1344 }
1345
Marat Dukhande06f492020-04-09 00:19:31 -07001346 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001347 TEST_REQUIRES_ARM_NEON_FMA;
1348 for (uint32_t n = 16; n <= 24; n += 8) {
1349 for (size_t k = 1; k <= 20; k += 5) {
1350 GemmMicrokernelTester()
1351 .mr(4)
1352 .nr(8)
1353 .kr(1)
1354 .sr(1)
1355 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001356 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001357 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001358 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001359 }
1360 }
1361 }
1362
Marat Dukhande06f492020-04-09 00:19:31 -07001363 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001364 TEST_REQUIRES_ARM_NEON_FMA;
1365 for (uint32_t n = 16; n <= 24; n += 8) {
1366 for (size_t k = 1; k <= 20; k += 5) {
1367 GemmMicrokernelTester()
1368 .mr(4)
1369 .nr(8)
1370 .kr(1)
1371 .sr(1)
1372 .m(4)
1373 .n(n)
1374 .k(k)
1375 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001376 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001377 }
1378 }
1379 }
1380
Marat Dukhande06f492020-04-09 00:19:31 -07001381 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001382 TEST_REQUIRES_ARM_NEON_FMA;
1383 for (uint32_t n = 16; n <= 24; n += 8) {
1384 for (size_t k = 1; k <= 20; k += 5) {
1385 GemmMicrokernelTester()
1386 .mr(4)
1387 .nr(8)
1388 .kr(1)
1389 .sr(1)
1390 .m(4)
1391 .n(n)
1392 .k(k)
1393 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001394 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001395 }
1396 }
1397 }
1398
Marat Dukhande06f492020-04-09 00:19:31 -07001399 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001400 TEST_REQUIRES_ARM_NEON_FMA;
1401 for (uint32_t n = 16; n <= 24; n += 8) {
1402 for (size_t k = 1; k <= 20; k += 5) {
1403 for (uint32_t m = 1; m <= 4; m++) {
1404 GemmMicrokernelTester()
1405 .mr(4)
1406 .nr(8)
1407 .kr(1)
1408 .sr(1)
1409 .m(m)
1410 .n(n)
1411 .k(k)
1412 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001413 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001414 }
1415 }
1416 }
1417 }
1418
Marat Dukhande06f492020-04-09 00:19:31 -07001419 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001420 TEST_REQUIRES_ARM_NEON_FMA;
1421 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001422 for (uint32_t n = 1; n <= 8; n++) {
1423 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(8)
1427 .kr(1)
1428 .sr(1)
1429 .m(m)
1430 .n(n)
1431 .k(k)
1432 .cm_stride(11)
1433 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001434 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001435 }
1436 }
1437 }
1438 }
1439
Marat Dukhande06f492020-04-09 00:19:31 -07001440 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001441 TEST_REQUIRES_ARM_NEON_FMA;
1442 GemmMicrokernelTester()
1443 .mr(4)
1444 .nr(8)
1445 .kr(1)
1446 .sr(1)
1447 .m(4)
1448 .n(8)
1449 .k(4)
1450 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001451 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001452 }
1453
Marat Dukhande06f492020-04-09 00:19:31 -07001454 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001455 TEST_REQUIRES_ARM_NEON_FMA;
1456 GemmMicrokernelTester()
1457 .mr(4)
1458 .nr(8)
1459 .kr(1)
1460 .sr(1)
1461 .m(4)
1462 .n(8)
1463 .k(4)
1464 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001465 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001466 }
1467
Marat Dukhande06f492020-04-09 00:19:31 -07001468 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001469 TEST_REQUIRES_ARM_NEON_FMA;
1470 GemmMicrokernelTester()
1471 .mr(4)
1472 .nr(8)
1473 .kr(1)
1474 .sr(1)
1475 .m(4)
1476 .n(8)
1477 .k(4)
1478 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001479 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001480 }
1481#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1482
1483
1484#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07001485 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001486 TEST_REQUIRES_ARM_NEON_FMA;
1487 GemmMicrokernelTester()
1488 .mr(4)
1489 .nr(8)
1490 .kr(1)
1491 .sr(1)
1492 .m(4)
1493 .n(8)
1494 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001495 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001496 }
1497
Marat Dukhande06f492020-04-09 00:19:31 -07001498 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001499 TEST_REQUIRES_ARM_NEON_FMA;
1500 GemmMicrokernelTester()
1501 .mr(4)
1502 .nr(8)
1503 .kr(1)
1504 .sr(1)
1505 .m(4)
1506 .n(8)
1507 .k(8)
1508 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001509 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001510 }
1511
Marat Dukhande06f492020-04-09 00:19:31 -07001512 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001513 TEST_REQUIRES_ARM_NEON_FMA;
1514 GemmMicrokernelTester()
1515 .mr(4)
1516 .nr(8)
1517 .kr(1)
1518 .sr(1)
1519 .m(4)
1520 .n(8)
1521 .k(8)
1522 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001523 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001524 }
1525
Marat Dukhande06f492020-04-09 00:19:31 -07001526 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001527 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001528 for (uint32_t n = 1; n <= 8; n++) {
1529 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001530 GemmMicrokernelTester()
1531 .mr(4)
1532 .nr(8)
1533 .kr(1)
1534 .sr(1)
1535 .m(m)
1536 .n(n)
1537 .k(8)
1538 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001539 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001540 }
1541 }
1542 }
1543
Marat Dukhande06f492020-04-09 00:19:31 -07001544 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001545 TEST_REQUIRES_ARM_NEON_FMA;
1546 for (uint32_t m = 1; m <= 4; m++) {
1547 GemmMicrokernelTester()
1548 .mr(4)
1549 .nr(8)
1550 .kr(1)
1551 .sr(1)
1552 .m(m)
1553 .n(8)
1554 .k(8)
1555 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001556 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001557 }
1558 }
1559
Marat Dukhande06f492020-04-09 00:19:31 -07001560 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001561 TEST_REQUIRES_ARM_NEON_FMA;
1562 for (uint32_t n = 1; n <= 8; n++) {
1563 GemmMicrokernelTester()
1564 .mr(4)
1565 .nr(8)
1566 .kr(1)
1567 .sr(1)
1568 .m(4)
1569 .n(n)
1570 .k(8)
1571 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001572 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001573 }
1574 }
1575
Marat Dukhande06f492020-04-09 00:19:31 -07001576 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001577 TEST_REQUIRES_ARM_NEON_FMA;
1578 GemmMicrokernelTester()
1579 .mr(4)
1580 .nr(8)
1581 .kr(1)
1582 .sr(1)
1583 .m(4)
1584 .n(8)
1585 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001586 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001587 }
1588
Marat Dukhande06f492020-04-09 00:19:31 -07001589 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001590 TEST_REQUIRES_ARM_NEON_FMA;
1591 GemmMicrokernelTester()
1592 .mr(4)
1593 .nr(8)
1594 .kr(1)
1595 .sr(1)
1596 .m(4)
1597 .n(8)
1598 .k(16)
1599 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001600 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001601 }
1602
Marat Dukhande06f492020-04-09 00:19:31 -07001603 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001604 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001605 for (uint32_t n = 1; n <= 8; n++) {
1606 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(8)
1610 .kr(1)
1611 .sr(1)
1612 .m(m)
1613 .n(n)
1614 .k(16)
1615 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001616 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001617 }
1618 }
1619 }
1620
Marat Dukhande06f492020-04-09 00:19:31 -07001621 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001622 TEST_REQUIRES_ARM_NEON_FMA;
1623 for (size_t k = 1; k < 16; k++) {
1624 GemmMicrokernelTester()
1625 .mr(4)
1626 .nr(8)
1627 .kr(1)
1628 .sr(1)
1629 .m(4)
1630 .n(8)
1631 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001632 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001633 }
1634 }
1635
Marat Dukhande06f492020-04-09 00:19:31 -07001636 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001637 TEST_REQUIRES_ARM_NEON_FMA;
1638 for (size_t k = 1; k < 16; k++) {
1639 GemmMicrokernelTester()
1640 .mr(4)
1641 .nr(8)
1642 .kr(1)
1643 .sr(1)
1644 .m(4)
1645 .n(8)
1646 .k(k)
1647 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001648 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001649 }
1650 }
1651
Marat Dukhande06f492020-04-09 00:19:31 -07001652 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001653 TEST_REQUIRES_ARM_NEON_FMA;
1654 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001655 for (uint32_t n = 1; n <= 8; n++) {
1656 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001657 GemmMicrokernelTester()
1658 .mr(4)
1659 .nr(8)
1660 .kr(1)
1661 .sr(1)
1662 .m(m)
1663 .n(n)
1664 .k(k)
1665 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001666 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001667 }
1668 }
1669 }
1670 }
1671
Marat Dukhande06f492020-04-09 00:19:31 -07001672 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001673 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001674 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001675 GemmMicrokernelTester()
1676 .mr(4)
1677 .nr(8)
1678 .kr(1)
1679 .sr(1)
1680 .m(4)
1681 .n(8)
1682 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001683 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001684 }
1685 }
1686
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001687 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001688 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001689 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001690 GemmMicrokernelTester()
1691 .mr(4)
1692 .nr(8)
1693 .kr(1)
1694 .sr(1)
1695 .m(4)
1696 .n(8)
1697 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001698 .a_stride(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001699 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001700 }
1701 }
1702
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001703 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001704 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001705 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001706 for (uint32_t n = 1; n <= 8; n++) {
1707 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001708 GemmMicrokernelTester()
1709 .mr(4)
1710 .nr(8)
1711 .kr(1)
1712 .sr(1)
1713 .m(m)
1714 .n(n)
1715 .k(k)
1716 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001717 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001718 }
1719 }
1720 }
1721 }
1722
Marat Dukhande06f492020-04-09 00:19:31 -07001723 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001724 TEST_REQUIRES_ARM_NEON_FMA;
1725 for (size_t k = 24; k <= 80; k += 8) {
1726 GemmMicrokernelTester()
1727 .mr(4)
1728 .nr(8)
1729 .kr(1)
1730 .sr(1)
1731 .m(4)
1732 .n(8)
1733 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001734 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001735 }
1736 }
1737
Marat Dukhande06f492020-04-09 00:19:31 -07001738 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001739 TEST_REQUIRES_ARM_NEON_FMA;
1740 for (size_t k = 24; k <= 80; k += 8) {
1741 GemmMicrokernelTester()
1742 .mr(4)
1743 .nr(8)
1744 .kr(1)
1745 .sr(1)
1746 .m(4)
1747 .n(8)
1748 .k(k)
1749 .a_stride(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001750 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001751 }
1752 }
1753
Marat Dukhande06f492020-04-09 00:19:31 -07001754 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001755 TEST_REQUIRES_ARM_NEON_FMA;
1756 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001757 for (uint32_t n = 1; n <= 8; n++) {
1758 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001759 GemmMicrokernelTester()
1760 .mr(4)
1761 .nr(8)
1762 .kr(1)
1763 .sr(1)
1764 .m(m)
1765 .n(n)
1766 .k(k)
1767 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001768 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001769 }
1770 }
1771 }
1772 }
1773
Marat Dukhande06f492020-04-09 00:19:31 -07001774 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001775 TEST_REQUIRES_ARM_NEON_FMA;
1776 for (uint32_t n = 9; n < 16; n++) {
1777 for (size_t k = 1; k <= 40; k += 9) {
1778 GemmMicrokernelTester()
1779 .mr(4)
1780 .nr(8)
1781 .kr(1)
1782 .sr(1)
1783 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001784 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001785 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001786 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001787 }
1788 }
1789 }
1790
Marat Dukhande06f492020-04-09 00:19:31 -07001791 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001792 TEST_REQUIRES_ARM_NEON_FMA;
1793 for (uint32_t n = 9; n < 16; n++) {
1794 for (size_t k = 1; k <= 40; k += 9) {
1795 GemmMicrokernelTester()
1796 .mr(4)
1797 .nr(8)
1798 .kr(1)
1799 .sr(1)
1800 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001801 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001802 .k(k)
1803 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001804 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001805 }
1806 }
1807 }
1808
Marat Dukhande06f492020-04-09 00:19:31 -07001809 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001810 TEST_REQUIRES_ARM_NEON_FMA;
1811 for (uint32_t n = 9; n < 16; n++) {
1812 for (size_t k = 1; k <= 40; k += 9) {
1813 GemmMicrokernelTester()
1814 .mr(4)
1815 .nr(8)
1816 .kr(1)
1817 .sr(1)
1818 .m(4)
1819 .n(n)
1820 .k(k)
1821 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001822 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001823 }
1824 }
1825 }
1826
Marat Dukhande06f492020-04-09 00:19:31 -07001827 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001828 TEST_REQUIRES_ARM_NEON_FMA;
1829 for (uint32_t n = 9; n < 16; n++) {
1830 for (size_t k = 1; k <= 40; k += 9) {
1831 for (uint32_t m = 1; m <= 4; m++) {
1832 GemmMicrokernelTester()
1833 .mr(4)
1834 .nr(8)
1835 .kr(1)
1836 .sr(1)
1837 .m(m)
1838 .n(n)
1839 .k(k)
1840 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001841 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001842 }
1843 }
1844 }
1845 }
1846
Marat Dukhande06f492020-04-09 00:19:31 -07001847 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001848 TEST_REQUIRES_ARM_NEON_FMA;
1849 for (uint32_t n = 16; n <= 24; n += 8) {
1850 for (size_t k = 1; k <= 40; k += 9) {
1851 GemmMicrokernelTester()
1852 .mr(4)
1853 .nr(8)
1854 .kr(1)
1855 .sr(1)
1856 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001857 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001858 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001859 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001860 }
1861 }
1862 }
1863
Marat Dukhande06f492020-04-09 00:19:31 -07001864 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001865 TEST_REQUIRES_ARM_NEON_FMA;
1866 for (uint32_t n = 16; n <= 24; n += 8) {
1867 for (size_t k = 1; k <= 40; k += 9) {
1868 GemmMicrokernelTester()
1869 .mr(4)
1870 .nr(8)
1871 .kr(1)
1872 .sr(1)
1873 .m(4)
1874 .n(n)
1875 .k(k)
1876 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001877 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001878 }
1879 }
1880 }
1881
Marat Dukhande06f492020-04-09 00:19:31 -07001882 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001883 TEST_REQUIRES_ARM_NEON_FMA;
1884 for (uint32_t n = 16; n <= 24; n += 8) {
1885 for (size_t k = 1; k <= 40; k += 9) {
1886 GemmMicrokernelTester()
1887 .mr(4)
1888 .nr(8)
1889 .kr(1)
1890 .sr(1)
1891 .m(4)
1892 .n(n)
1893 .k(k)
1894 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001895 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001896 }
1897 }
1898 }
1899
Marat Dukhande06f492020-04-09 00:19:31 -07001900 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001901 TEST_REQUIRES_ARM_NEON_FMA;
1902 for (uint32_t n = 16; n <= 24; n += 8) {
1903 for (size_t k = 1; k <= 40; k += 9) {
1904 for (uint32_t m = 1; m <= 4; m++) {
1905 GemmMicrokernelTester()
1906 .mr(4)
1907 .nr(8)
1908 .kr(1)
1909 .sr(1)
1910 .m(m)
1911 .n(n)
1912 .k(k)
1913 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001914 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001915 }
1916 }
1917 }
1918 }
1919
Marat Dukhande06f492020-04-09 00:19:31 -07001920 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001921 TEST_REQUIRES_ARM_NEON_FMA;
1922 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001923 for (uint32_t n = 1; n <= 8; n++) {
1924 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001925 GemmMicrokernelTester()
1926 .mr(4)
1927 .nr(8)
1928 .kr(1)
1929 .sr(1)
1930 .m(m)
1931 .n(n)
1932 .k(k)
1933 .cm_stride(11)
1934 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001935 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001936 }
1937 }
1938 }
1939 }
1940
Marat Dukhande06f492020-04-09 00:19:31 -07001941 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001942 TEST_REQUIRES_ARM_NEON_FMA;
1943 GemmMicrokernelTester()
1944 .mr(4)
1945 .nr(8)
1946 .kr(1)
1947 .sr(1)
1948 .m(4)
1949 .n(8)
1950 .k(8)
1951 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001952 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001953 }
1954
Marat Dukhande06f492020-04-09 00:19:31 -07001955 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001956 TEST_REQUIRES_ARM_NEON_FMA;
1957 GemmMicrokernelTester()
1958 .mr(4)
1959 .nr(8)
1960 .kr(1)
1961 .sr(1)
1962 .m(4)
1963 .n(8)
1964 .k(8)
1965 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001966 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001967 }
1968
Marat Dukhande06f492020-04-09 00:19:31 -07001969 TEST(F32_GEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001970 TEST_REQUIRES_ARM_NEON_FMA;
1971 GemmMicrokernelTester()
1972 .mr(4)
1973 .nr(8)
1974 .kr(1)
1975 .sr(1)
1976 .m(4)
1977 .n(8)
1978 .k(8)
1979 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001980 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001981 }
1982#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1983
1984
1985#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -07001986 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
1987 TEST_REQUIRES_ARM_NEON_FMA;
1988 GemmMicrokernelTester()
1989 .mr(5)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(5)
1994 .n(8)
1995 .k(8)
1996 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1997 }
1998
1999 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
2000 TEST_REQUIRES_ARM_NEON_FMA;
2001 GemmMicrokernelTester()
2002 .mr(5)
2003 .nr(8)
2004 .kr(1)
2005 .sr(1)
2006 .m(5)
2007 .n(8)
2008 .k(8)
2009 .cn_stride(11)
2010 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2011 }
2012
2013 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
2014 TEST_REQUIRES_ARM_NEON_FMA;
2015 GemmMicrokernelTester()
2016 .mr(5)
2017 .nr(8)
2018 .kr(1)
2019 .sr(1)
2020 .m(5)
2021 .n(8)
2022 .k(8)
2023 .a_stride(11)
2024 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2025 }
2026
2027 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
2028 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002029 for (uint32_t n = 1; n <= 8; n++) {
2030 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002031 GemmMicrokernelTester()
2032 .mr(5)
2033 .nr(8)
2034 .kr(1)
2035 .sr(1)
2036 .m(m)
2037 .n(n)
2038 .k(8)
2039 .iterations(1)
2040 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2041 }
2042 }
2043 }
2044
2045 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
2046 TEST_REQUIRES_ARM_NEON_FMA;
2047 for (uint32_t m = 1; m <= 5; m++) {
2048 GemmMicrokernelTester()
2049 .mr(5)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(m)
2054 .n(8)
2055 .k(8)
2056 .iterations(1)
2057 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2058 }
2059 }
2060
2061 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
2062 TEST_REQUIRES_ARM_NEON_FMA;
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 GemmMicrokernelTester()
2065 .mr(5)
2066 .nr(8)
2067 .kr(1)
2068 .sr(1)
2069 .m(5)
2070 .n(n)
2071 .k(8)
2072 .iterations(1)
2073 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2074 }
2075 }
2076
2077 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
2078 TEST_REQUIRES_ARM_NEON_FMA;
2079 GemmMicrokernelTester()
2080 .mr(5)
2081 .nr(8)
2082 .kr(1)
2083 .sr(1)
2084 .m(5)
2085 .n(8)
2086 .k(16)
2087 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2088 }
2089
2090 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
2091 TEST_REQUIRES_ARM_NEON_FMA;
2092 GemmMicrokernelTester()
2093 .mr(5)
2094 .nr(8)
2095 .kr(1)
2096 .sr(1)
2097 .m(5)
2098 .n(8)
2099 .k(16)
2100 .a_stride(19)
2101 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2102 }
2103
2104 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
2105 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002106 for (uint32_t n = 1; n <= 8; n++) {
2107 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002108 GemmMicrokernelTester()
2109 .mr(5)
2110 .nr(8)
2111 .kr(1)
2112 .sr(1)
2113 .m(m)
2114 .n(n)
2115 .k(16)
2116 .iterations(1)
2117 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2118 }
2119 }
2120 }
2121
2122 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
2123 TEST_REQUIRES_ARM_NEON_FMA;
2124 for (size_t k = 1; k < 16; k++) {
2125 GemmMicrokernelTester()
2126 .mr(5)
2127 .nr(8)
2128 .kr(1)
2129 .sr(1)
2130 .m(5)
2131 .n(8)
2132 .k(k)
2133 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2134 }
2135 }
2136
2137 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
2138 TEST_REQUIRES_ARM_NEON_FMA;
2139 for (size_t k = 1; k < 16; k++) {
2140 GemmMicrokernelTester()
2141 .mr(5)
2142 .nr(8)
2143 .kr(1)
2144 .sr(1)
2145 .m(5)
2146 .n(8)
2147 .k(k)
2148 .a_stride(19)
2149 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2150 }
2151 }
2152
2153 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
2154 TEST_REQUIRES_ARM_NEON_FMA;
2155 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002156 for (uint32_t n = 1; n <= 8; n++) {
2157 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002158 GemmMicrokernelTester()
2159 .mr(5)
2160 .nr(8)
2161 .kr(1)
2162 .sr(1)
2163 .m(m)
2164 .n(n)
2165 .k(k)
2166 .iterations(1)
2167 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2168 }
2169 }
2170 }
2171 }
2172
2173 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
2174 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002175 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002176 GemmMicrokernelTester()
2177 .mr(5)
2178 .nr(8)
2179 .kr(1)
2180 .sr(1)
2181 .m(5)
2182 .n(8)
2183 .k(k)
2184 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2185 }
2186 }
2187
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002188 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
Frank Barchard143a1102021-06-15 09:15:34 -07002189 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002190 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002191 GemmMicrokernelTester()
2192 .mr(5)
2193 .nr(8)
2194 .kr(1)
2195 .sr(1)
2196 .m(5)
2197 .n(8)
2198 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002199 .a_stride(37)
Frank Barchard143a1102021-06-15 09:15:34 -07002200 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2201 }
2202 }
2203
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002204 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
Frank Barchard143a1102021-06-15 09:15:34 -07002205 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002206 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002207 for (uint32_t n = 1; n <= 8; n++) {
2208 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002209 GemmMicrokernelTester()
2210 .mr(5)
2211 .nr(8)
2212 .kr(1)
2213 .sr(1)
2214 .m(m)
2215 .n(n)
2216 .k(k)
2217 .iterations(1)
2218 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2219 }
2220 }
2221 }
2222 }
2223
2224 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
2225 TEST_REQUIRES_ARM_NEON_FMA;
2226 for (size_t k = 24; k <= 80; k += 8) {
2227 GemmMicrokernelTester()
2228 .mr(5)
2229 .nr(8)
2230 .kr(1)
2231 .sr(1)
2232 .m(5)
2233 .n(8)
2234 .k(k)
2235 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2236 }
2237 }
2238
2239 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
2240 TEST_REQUIRES_ARM_NEON_FMA;
2241 for (size_t k = 24; k <= 80; k += 8) {
2242 GemmMicrokernelTester()
2243 .mr(5)
2244 .nr(8)
2245 .kr(1)
2246 .sr(1)
2247 .m(5)
2248 .n(8)
2249 .k(k)
2250 .a_stride(83)
2251 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2252 }
2253 }
2254
2255 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
2256 TEST_REQUIRES_ARM_NEON_FMA;
2257 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002258 for (uint32_t n = 1; n <= 8; n++) {
2259 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002260 GemmMicrokernelTester()
2261 .mr(5)
2262 .nr(8)
2263 .kr(1)
2264 .sr(1)
2265 .m(m)
2266 .n(n)
2267 .k(k)
2268 .iterations(1)
2269 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2270 }
2271 }
2272 }
2273 }
2274
2275 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
2276 TEST_REQUIRES_ARM_NEON_FMA;
2277 for (uint32_t n = 9; n < 16; n++) {
2278 for (size_t k = 1; k <= 40; k += 9) {
2279 GemmMicrokernelTester()
2280 .mr(5)
2281 .nr(8)
2282 .kr(1)
2283 .sr(1)
2284 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002285 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07002286 .k(k)
2287 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2288 }
2289 }
2290 }
2291
2292 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
2293 TEST_REQUIRES_ARM_NEON_FMA;
2294 for (uint32_t n = 9; n < 16; n++) {
2295 for (size_t k = 1; k <= 40; k += 9) {
2296 GemmMicrokernelTester()
2297 .mr(5)
2298 .nr(8)
2299 .kr(1)
2300 .sr(1)
2301 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002302 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07002303 .k(k)
2304 .cn_stride(11)
2305 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2306 }
2307 }
2308 }
2309
2310 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
2311 TEST_REQUIRES_ARM_NEON_FMA;
2312 for (uint32_t n = 9; n < 16; n++) {
2313 for (size_t k = 1; k <= 40; k += 9) {
2314 GemmMicrokernelTester()
2315 .mr(5)
2316 .nr(8)
2317 .kr(1)
2318 .sr(1)
2319 .m(5)
2320 .n(n)
2321 .k(k)
2322 .a_stride(43)
2323 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2324 }
2325 }
2326 }
2327
2328 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
2329 TEST_REQUIRES_ARM_NEON_FMA;
2330 for (uint32_t n = 9; n < 16; n++) {
2331 for (size_t k = 1; k <= 40; k += 9) {
2332 for (uint32_t m = 1; m <= 5; m++) {
2333 GemmMicrokernelTester()
2334 .mr(5)
2335 .nr(8)
2336 .kr(1)
2337 .sr(1)
2338 .m(m)
2339 .n(n)
2340 .k(k)
2341 .iterations(1)
2342 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2343 }
2344 }
2345 }
2346 }
2347
2348 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
2349 TEST_REQUIRES_ARM_NEON_FMA;
2350 for (uint32_t n = 16; n <= 24; n += 8) {
2351 for (size_t k = 1; k <= 40; k += 9) {
2352 GemmMicrokernelTester()
2353 .mr(5)
2354 .nr(8)
2355 .kr(1)
2356 .sr(1)
2357 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002358 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07002359 .k(k)
2360 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2361 }
2362 }
2363 }
2364
2365 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
2366 TEST_REQUIRES_ARM_NEON_FMA;
2367 for (uint32_t n = 16; n <= 24; n += 8) {
2368 for (size_t k = 1; k <= 40; k += 9) {
2369 GemmMicrokernelTester()
2370 .mr(5)
2371 .nr(8)
2372 .kr(1)
2373 .sr(1)
2374 .m(5)
2375 .n(n)
2376 .k(k)
2377 .cn_stride(11)
2378 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2379 }
2380 }
2381 }
2382
2383 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
2384 TEST_REQUIRES_ARM_NEON_FMA;
2385 for (uint32_t n = 16; n <= 24; n += 8) {
2386 for (size_t k = 1; k <= 40; k += 9) {
2387 GemmMicrokernelTester()
2388 .mr(5)
2389 .nr(8)
2390 .kr(1)
2391 .sr(1)
2392 .m(5)
2393 .n(n)
2394 .k(k)
2395 .a_stride(43)
2396 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2397 }
2398 }
2399 }
2400
2401 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
2402 TEST_REQUIRES_ARM_NEON_FMA;
2403 for (uint32_t n = 16; n <= 24; n += 8) {
2404 for (size_t k = 1; k <= 40; k += 9) {
2405 for (uint32_t m = 1; m <= 5; m++) {
2406 GemmMicrokernelTester()
2407 .mr(5)
2408 .nr(8)
2409 .kr(1)
2410 .sr(1)
2411 .m(m)
2412 .n(n)
2413 .k(k)
2414 .iterations(1)
2415 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2416 }
2417 }
2418 }
2419 }
2420
2421 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
2422 TEST_REQUIRES_ARM_NEON_FMA;
2423 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002424 for (uint32_t n = 1; n <= 8; n++) {
2425 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002426 GemmMicrokernelTester()
2427 .mr(5)
2428 .nr(8)
2429 .kr(1)
2430 .sr(1)
2431 .m(m)
2432 .n(n)
2433 .k(k)
2434 .cm_stride(11)
2435 .iterations(1)
2436 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2437 }
2438 }
2439 }
2440 }
2441
2442 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
2443 TEST_REQUIRES_ARM_NEON_FMA;
2444 GemmMicrokernelTester()
2445 .mr(5)
2446 .nr(8)
2447 .kr(1)
2448 .sr(1)
2449 .m(5)
2450 .n(8)
2451 .k(8)
2452 .qmin(128)
2453 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2454 }
2455
2456 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
2457 TEST_REQUIRES_ARM_NEON_FMA;
2458 GemmMicrokernelTester()
2459 .mr(5)
2460 .nr(8)
2461 .kr(1)
2462 .sr(1)
2463 .m(5)
2464 .n(8)
2465 .k(8)
2466 .qmax(128)
2467 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2468 }
2469
2470 TEST(F32_GEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
2471 TEST_REQUIRES_ARM_NEON_FMA;
2472 GemmMicrokernelTester()
2473 .mr(5)
2474 .nr(8)
2475 .kr(1)
2476 .sr(1)
2477 .m(5)
2478 .n(8)
2479 .k(8)
2480 .cm_stride(11)
2481 .Test(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2482 }
2483#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2484
2485
2486#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07002487 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002488 TEST_REQUIRES_ARM_NEON_FMA;
2489 GemmMicrokernelTester()
2490 .mr(6)
2491 .nr(8)
2492 .kr(1)
2493 .sr(1)
2494 .m(6)
2495 .n(8)
2496 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002497 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002498 }
2499
Marat Dukhande06f492020-04-09 00:19:31 -07002500 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002501 TEST_REQUIRES_ARM_NEON_FMA;
2502 GemmMicrokernelTester()
2503 .mr(6)
2504 .nr(8)
2505 .kr(1)
2506 .sr(1)
2507 .m(6)
2508 .n(8)
2509 .k(4)
2510 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002511 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002512 }
2513
Marat Dukhande06f492020-04-09 00:19:31 -07002514 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002515 TEST_REQUIRES_ARM_NEON_FMA;
2516 GemmMicrokernelTester()
2517 .mr(6)
2518 .nr(8)
2519 .kr(1)
2520 .sr(1)
2521 .m(6)
2522 .n(8)
2523 .k(4)
2524 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002525 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002526 }
2527
Marat Dukhande06f492020-04-09 00:19:31 -07002528 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002529 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002530 for (uint32_t n = 1; n <= 8; n++) {
2531 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002532 GemmMicrokernelTester()
2533 .mr(6)
2534 .nr(8)
2535 .kr(1)
2536 .sr(1)
2537 .m(m)
2538 .n(n)
2539 .k(4)
2540 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002541 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002542 }
2543 }
2544 }
2545
Marat Dukhande06f492020-04-09 00:19:31 -07002546 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002547 TEST_REQUIRES_ARM_NEON_FMA;
2548 for (uint32_t m = 1; m <= 6; m++) {
2549 GemmMicrokernelTester()
2550 .mr(6)
2551 .nr(8)
2552 .kr(1)
2553 .sr(1)
2554 .m(m)
2555 .n(8)
2556 .k(4)
2557 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002558 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002559 }
2560 }
2561
Marat Dukhande06f492020-04-09 00:19:31 -07002562 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002563 TEST_REQUIRES_ARM_NEON_FMA;
2564 for (uint32_t n = 1; n <= 8; n++) {
2565 GemmMicrokernelTester()
2566 .mr(6)
2567 .nr(8)
2568 .kr(1)
2569 .sr(1)
2570 .m(6)
2571 .n(n)
2572 .k(4)
2573 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002574 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002575 }
2576 }
2577
Marat Dukhande06f492020-04-09 00:19:31 -07002578 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002579 TEST_REQUIRES_ARM_NEON_FMA;
2580 GemmMicrokernelTester()
2581 .mr(6)
2582 .nr(8)
2583 .kr(1)
2584 .sr(1)
2585 .m(6)
2586 .n(8)
2587 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002588 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002589 }
2590
Marat Dukhande06f492020-04-09 00:19:31 -07002591 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002592 TEST_REQUIRES_ARM_NEON_FMA;
2593 GemmMicrokernelTester()
2594 .mr(6)
2595 .nr(8)
2596 .kr(1)
2597 .sr(1)
2598 .m(6)
2599 .n(8)
2600 .k(8)
2601 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002602 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002603 }
2604
Marat Dukhande06f492020-04-09 00:19:31 -07002605 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002606 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002607 for (uint32_t n = 1; n <= 8; n++) {
2608 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002609 GemmMicrokernelTester()
2610 .mr(6)
2611 .nr(8)
2612 .kr(1)
2613 .sr(1)
2614 .m(m)
2615 .n(n)
2616 .k(8)
2617 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002618 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002619 }
2620 }
2621 }
2622
Marat Dukhande06f492020-04-09 00:19:31 -07002623 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002624 TEST_REQUIRES_ARM_NEON_FMA;
2625 for (size_t k = 1; k < 8; k++) {
2626 GemmMicrokernelTester()
2627 .mr(6)
2628 .nr(8)
2629 .kr(1)
2630 .sr(1)
2631 .m(6)
2632 .n(8)
2633 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002634 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002635 }
2636 }
2637
Marat Dukhande06f492020-04-09 00:19:31 -07002638 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002639 TEST_REQUIRES_ARM_NEON_FMA;
2640 for (size_t k = 1; k < 8; k++) {
2641 GemmMicrokernelTester()
2642 .mr(6)
2643 .nr(8)
2644 .kr(1)
2645 .sr(1)
2646 .m(6)
2647 .n(8)
2648 .k(k)
2649 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002650 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002651 }
2652 }
2653
Marat Dukhande06f492020-04-09 00:19:31 -07002654 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002655 TEST_REQUIRES_ARM_NEON_FMA;
2656 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002657 for (uint32_t n = 1; n <= 8; n++) {
2658 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002659 GemmMicrokernelTester()
2660 .mr(6)
2661 .nr(8)
2662 .kr(1)
2663 .sr(1)
2664 .m(m)
2665 .n(n)
2666 .k(k)
2667 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002668 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002669 }
2670 }
2671 }
2672 }
2673
Marat Dukhande06f492020-04-09 00:19:31 -07002674 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002675 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002676 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002677 GemmMicrokernelTester()
2678 .mr(6)
2679 .nr(8)
2680 .kr(1)
2681 .sr(1)
2682 .m(6)
2683 .n(8)
2684 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002685 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002686 }
2687 }
2688
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002689 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002690 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002691 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002692 GemmMicrokernelTester()
2693 .mr(6)
2694 .nr(8)
2695 .kr(1)
2696 .sr(1)
2697 .m(6)
2698 .n(8)
2699 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002700 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002701 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002702 }
2703 }
2704
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002705 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002706 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002707 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002708 for (uint32_t n = 1; n <= 8; n++) {
2709 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002710 GemmMicrokernelTester()
2711 .mr(6)
2712 .nr(8)
2713 .kr(1)
2714 .sr(1)
2715 .m(m)
2716 .n(n)
2717 .k(k)
2718 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002719 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002720 }
2721 }
2722 }
2723 }
2724
Marat Dukhande06f492020-04-09 00:19:31 -07002725 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002726 TEST_REQUIRES_ARM_NEON_FMA;
2727 for (size_t k = 12; k <= 40; k += 4) {
2728 GemmMicrokernelTester()
2729 .mr(6)
2730 .nr(8)
2731 .kr(1)
2732 .sr(1)
2733 .m(6)
2734 .n(8)
2735 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002736 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002737 }
2738 }
2739
Marat Dukhande06f492020-04-09 00:19:31 -07002740 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002741 TEST_REQUIRES_ARM_NEON_FMA;
2742 for (size_t k = 12; k <= 40; k += 4) {
2743 GemmMicrokernelTester()
2744 .mr(6)
2745 .nr(8)
2746 .kr(1)
2747 .sr(1)
2748 .m(6)
2749 .n(8)
2750 .k(k)
2751 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002752 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002753 }
2754 }
2755
Marat Dukhande06f492020-04-09 00:19:31 -07002756 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002757 TEST_REQUIRES_ARM_NEON_FMA;
2758 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002759 for (uint32_t n = 1; n <= 8; n++) {
2760 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002761 GemmMicrokernelTester()
2762 .mr(6)
2763 .nr(8)
2764 .kr(1)
2765 .sr(1)
2766 .m(m)
2767 .n(n)
2768 .k(k)
2769 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002770 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002771 }
2772 }
2773 }
2774 }
2775
Marat Dukhande06f492020-04-09 00:19:31 -07002776 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002777 TEST_REQUIRES_ARM_NEON_FMA;
2778 for (uint32_t n = 9; n < 16; n++) {
2779 for (size_t k = 1; k <= 20; k += 5) {
2780 GemmMicrokernelTester()
2781 .mr(6)
2782 .nr(8)
2783 .kr(1)
2784 .sr(1)
2785 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002786 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002787 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002788 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002789 }
2790 }
2791 }
2792
Marat Dukhande06f492020-04-09 00:19:31 -07002793 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002794 TEST_REQUIRES_ARM_NEON_FMA;
2795 for (uint32_t n = 9; n < 16; n++) {
2796 for (size_t k = 1; k <= 20; k += 5) {
2797 GemmMicrokernelTester()
2798 .mr(6)
2799 .nr(8)
2800 .kr(1)
2801 .sr(1)
2802 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002803 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002804 .k(k)
2805 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002806 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002807 }
2808 }
2809 }
2810
Marat Dukhande06f492020-04-09 00:19:31 -07002811 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002812 TEST_REQUIRES_ARM_NEON_FMA;
2813 for (uint32_t n = 9; n < 16; n++) {
2814 for (size_t k = 1; k <= 20; k += 5) {
2815 GemmMicrokernelTester()
2816 .mr(6)
2817 .nr(8)
2818 .kr(1)
2819 .sr(1)
2820 .m(6)
2821 .n(n)
2822 .k(k)
2823 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002824 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002825 }
2826 }
2827 }
2828
Marat Dukhande06f492020-04-09 00:19:31 -07002829 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002830 TEST_REQUIRES_ARM_NEON_FMA;
2831 for (uint32_t n = 9; n < 16; n++) {
2832 for (size_t k = 1; k <= 20; k += 5) {
2833 for (uint32_t m = 1; m <= 6; m++) {
2834 GemmMicrokernelTester()
2835 .mr(6)
2836 .nr(8)
2837 .kr(1)
2838 .sr(1)
2839 .m(m)
2840 .n(n)
2841 .k(k)
2842 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002843 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002844 }
2845 }
2846 }
2847 }
2848
Marat Dukhande06f492020-04-09 00:19:31 -07002849 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002850 TEST_REQUIRES_ARM_NEON_FMA;
2851 for (uint32_t n = 16; n <= 24; n += 8) {
2852 for (size_t k = 1; k <= 20; k += 5) {
2853 GemmMicrokernelTester()
2854 .mr(6)
2855 .nr(8)
2856 .kr(1)
2857 .sr(1)
2858 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002859 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002860 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002861 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002862 }
2863 }
2864 }
2865
Marat Dukhande06f492020-04-09 00:19:31 -07002866 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002867 TEST_REQUIRES_ARM_NEON_FMA;
2868 for (uint32_t n = 16; n <= 24; n += 8) {
2869 for (size_t k = 1; k <= 20; k += 5) {
2870 GemmMicrokernelTester()
2871 .mr(6)
2872 .nr(8)
2873 .kr(1)
2874 .sr(1)
2875 .m(6)
2876 .n(n)
2877 .k(k)
2878 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002879 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002880 }
2881 }
2882 }
2883
Marat Dukhande06f492020-04-09 00:19:31 -07002884 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002885 TEST_REQUIRES_ARM_NEON_FMA;
2886 for (uint32_t n = 16; n <= 24; n += 8) {
2887 for (size_t k = 1; k <= 20; k += 5) {
2888 GemmMicrokernelTester()
2889 .mr(6)
2890 .nr(8)
2891 .kr(1)
2892 .sr(1)
2893 .m(6)
2894 .n(n)
2895 .k(k)
2896 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002897 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002898 }
2899 }
2900 }
2901
Marat Dukhande06f492020-04-09 00:19:31 -07002902 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002903 TEST_REQUIRES_ARM_NEON_FMA;
2904 for (uint32_t n = 16; n <= 24; n += 8) {
2905 for (size_t k = 1; k <= 20; k += 5) {
2906 for (uint32_t m = 1; m <= 6; m++) {
2907 GemmMicrokernelTester()
2908 .mr(6)
2909 .nr(8)
2910 .kr(1)
2911 .sr(1)
2912 .m(m)
2913 .n(n)
2914 .k(k)
2915 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002916 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002917 }
2918 }
2919 }
2920 }
2921
Marat Dukhande06f492020-04-09 00:19:31 -07002922 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002923 TEST_REQUIRES_ARM_NEON_FMA;
2924 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002925 for (uint32_t n = 1; n <= 8; n++) {
2926 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002927 GemmMicrokernelTester()
2928 .mr(6)
2929 .nr(8)
2930 .kr(1)
2931 .sr(1)
2932 .m(m)
2933 .n(n)
2934 .k(k)
2935 .cm_stride(11)
2936 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002937 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002938 }
2939 }
2940 }
2941 }
2942
Marat Dukhande06f492020-04-09 00:19:31 -07002943 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002944 TEST_REQUIRES_ARM_NEON_FMA;
2945 GemmMicrokernelTester()
2946 .mr(6)
2947 .nr(8)
2948 .kr(1)
2949 .sr(1)
2950 .m(6)
2951 .n(8)
2952 .k(4)
2953 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002954 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002955 }
2956
Marat Dukhande06f492020-04-09 00:19:31 -07002957 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002958 TEST_REQUIRES_ARM_NEON_FMA;
2959 GemmMicrokernelTester()
2960 .mr(6)
2961 .nr(8)
2962 .kr(1)
2963 .sr(1)
2964 .m(6)
2965 .n(8)
2966 .k(4)
2967 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002968 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002969 }
2970
Marat Dukhande06f492020-04-09 00:19:31 -07002971 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002972 TEST_REQUIRES_ARM_NEON_FMA;
2973 GemmMicrokernelTester()
2974 .mr(6)
2975 .nr(8)
2976 .kr(1)
2977 .sr(1)
2978 .m(6)
2979 .n(8)
2980 .k(4)
2981 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002982 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002983 }
2984#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2985
2986
2987#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07002988 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002989 TEST_REQUIRES_ARM_NEON_FMA;
2990 GemmMicrokernelTester()
2991 .mr(6)
2992 .nr(8)
2993 .kr(1)
2994 .sr(1)
2995 .m(6)
2996 .n(8)
2997 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002998 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002999 }
3000
Marat Dukhande06f492020-04-09 00:19:31 -07003001 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003002 TEST_REQUIRES_ARM_NEON_FMA;
3003 GemmMicrokernelTester()
3004 .mr(6)
3005 .nr(8)
3006 .kr(1)
3007 .sr(1)
3008 .m(6)
3009 .n(8)
3010 .k(8)
3011 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003012 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003013 }
3014
Marat Dukhande06f492020-04-09 00:19:31 -07003015 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003016 TEST_REQUIRES_ARM_NEON_FMA;
3017 GemmMicrokernelTester()
3018 .mr(6)
3019 .nr(8)
3020 .kr(1)
3021 .sr(1)
3022 .m(6)
3023 .n(8)
3024 .k(8)
3025 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003026 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003027 }
3028
Marat Dukhande06f492020-04-09 00:19:31 -07003029 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003030 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003031 for (uint32_t n = 1; n <= 8; n++) {
3032 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003033 GemmMicrokernelTester()
3034 .mr(6)
3035 .nr(8)
3036 .kr(1)
3037 .sr(1)
3038 .m(m)
3039 .n(n)
3040 .k(8)
3041 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003042 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003043 }
3044 }
3045 }
3046
Marat Dukhande06f492020-04-09 00:19:31 -07003047 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003048 TEST_REQUIRES_ARM_NEON_FMA;
3049 for (uint32_t m = 1; m <= 6; m++) {
3050 GemmMicrokernelTester()
3051 .mr(6)
3052 .nr(8)
3053 .kr(1)
3054 .sr(1)
3055 .m(m)
3056 .n(8)
3057 .k(8)
3058 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003059 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003060 }
3061 }
3062
Marat Dukhande06f492020-04-09 00:19:31 -07003063 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003064 TEST_REQUIRES_ARM_NEON_FMA;
3065 for (uint32_t n = 1; n <= 8; n++) {
3066 GemmMicrokernelTester()
3067 .mr(6)
3068 .nr(8)
3069 .kr(1)
3070 .sr(1)
3071 .m(6)
3072 .n(n)
3073 .k(8)
3074 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003075 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003076 }
3077 }
3078
Marat Dukhande06f492020-04-09 00:19:31 -07003079 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003080 TEST_REQUIRES_ARM_NEON_FMA;
3081 GemmMicrokernelTester()
3082 .mr(6)
3083 .nr(8)
3084 .kr(1)
3085 .sr(1)
3086 .m(6)
3087 .n(8)
3088 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003089 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003090 }
3091
Marat Dukhande06f492020-04-09 00:19:31 -07003092 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003093 TEST_REQUIRES_ARM_NEON_FMA;
3094 GemmMicrokernelTester()
3095 .mr(6)
3096 .nr(8)
3097 .kr(1)
3098 .sr(1)
3099 .m(6)
3100 .n(8)
3101 .k(16)
3102 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003103 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003104 }
3105
Marat Dukhande06f492020-04-09 00:19:31 -07003106 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003107 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003108 for (uint32_t n = 1; n <= 8; n++) {
3109 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003110 GemmMicrokernelTester()
3111 .mr(6)
3112 .nr(8)
3113 .kr(1)
3114 .sr(1)
3115 .m(m)
3116 .n(n)
3117 .k(16)
3118 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003119 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003120 }
3121 }
3122 }
3123
Marat Dukhande06f492020-04-09 00:19:31 -07003124 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003125 TEST_REQUIRES_ARM_NEON_FMA;
3126 for (size_t k = 1; k < 16; k++) {
3127 GemmMicrokernelTester()
3128 .mr(6)
3129 .nr(8)
3130 .kr(1)
3131 .sr(1)
3132 .m(6)
3133 .n(8)
3134 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003135 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003136 }
3137 }
3138
Marat Dukhande06f492020-04-09 00:19:31 -07003139 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003140 TEST_REQUIRES_ARM_NEON_FMA;
3141 for (size_t k = 1; k < 16; k++) {
3142 GemmMicrokernelTester()
3143 .mr(6)
3144 .nr(8)
3145 .kr(1)
3146 .sr(1)
3147 .m(6)
3148 .n(8)
3149 .k(k)
3150 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003151 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003152 }
3153 }
3154
Marat Dukhande06f492020-04-09 00:19:31 -07003155 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003156 TEST_REQUIRES_ARM_NEON_FMA;
3157 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003158 for (uint32_t n = 1; n <= 8; n++) {
3159 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003160 GemmMicrokernelTester()
3161 .mr(6)
3162 .nr(8)
3163 .kr(1)
3164 .sr(1)
3165 .m(m)
3166 .n(n)
3167 .k(k)
3168 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003169 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003170 }
3171 }
3172 }
3173 }
3174
Marat Dukhande06f492020-04-09 00:19:31 -07003175 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003176 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003177 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003178 GemmMicrokernelTester()
3179 .mr(6)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(6)
3184 .n(8)
3185 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003186 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003187 }
3188 }
3189
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003190 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003191 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003192 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003193 GemmMicrokernelTester()
3194 .mr(6)
3195 .nr(8)
3196 .kr(1)
3197 .sr(1)
3198 .m(6)
3199 .n(8)
3200 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003201 .a_stride(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003202 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003203 }
3204 }
3205
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003206 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003207 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003208 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003209 for (uint32_t n = 1; n <= 8; n++) {
3210 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003211 GemmMicrokernelTester()
3212 .mr(6)
3213 .nr(8)
3214 .kr(1)
3215 .sr(1)
3216 .m(m)
3217 .n(n)
3218 .k(k)
3219 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003220 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003221 }
3222 }
3223 }
3224 }
3225
Marat Dukhande06f492020-04-09 00:19:31 -07003226 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003227 TEST_REQUIRES_ARM_NEON_FMA;
3228 for (size_t k = 24; k <= 80; k += 8) {
3229 GemmMicrokernelTester()
3230 .mr(6)
3231 .nr(8)
3232 .kr(1)
3233 .sr(1)
3234 .m(6)
3235 .n(8)
3236 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003237 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003238 }
3239 }
3240
Marat Dukhande06f492020-04-09 00:19:31 -07003241 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003242 TEST_REQUIRES_ARM_NEON_FMA;
3243 for (size_t k = 24; k <= 80; k += 8) {
3244 GemmMicrokernelTester()
3245 .mr(6)
3246 .nr(8)
3247 .kr(1)
3248 .sr(1)
3249 .m(6)
3250 .n(8)
3251 .k(k)
3252 .a_stride(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003253 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003254 }
3255 }
3256
Marat Dukhande06f492020-04-09 00:19:31 -07003257 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003258 TEST_REQUIRES_ARM_NEON_FMA;
3259 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003260 for (uint32_t n = 1; n <= 8; n++) {
3261 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003262 GemmMicrokernelTester()
3263 .mr(6)
3264 .nr(8)
3265 .kr(1)
3266 .sr(1)
3267 .m(m)
3268 .n(n)
3269 .k(k)
3270 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003271 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003272 }
3273 }
3274 }
3275 }
3276
Marat Dukhande06f492020-04-09 00:19:31 -07003277 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003278 TEST_REQUIRES_ARM_NEON_FMA;
3279 for (uint32_t n = 9; n < 16; n++) {
3280 for (size_t k = 1; k <= 40; k += 9) {
3281 GemmMicrokernelTester()
3282 .mr(6)
3283 .nr(8)
3284 .kr(1)
3285 .sr(1)
3286 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003287 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003288 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003289 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003290 }
3291 }
3292 }
3293
Marat Dukhande06f492020-04-09 00:19:31 -07003294 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003295 TEST_REQUIRES_ARM_NEON_FMA;
3296 for (uint32_t n = 9; n < 16; n++) {
3297 for (size_t k = 1; k <= 40; k += 9) {
3298 GemmMicrokernelTester()
3299 .mr(6)
3300 .nr(8)
3301 .kr(1)
3302 .sr(1)
3303 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003304 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003305 .k(k)
3306 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003307 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003308 }
3309 }
3310 }
3311
Marat Dukhande06f492020-04-09 00:19:31 -07003312 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003313 TEST_REQUIRES_ARM_NEON_FMA;
3314 for (uint32_t n = 9; n < 16; n++) {
3315 for (size_t k = 1; k <= 40; k += 9) {
3316 GemmMicrokernelTester()
3317 .mr(6)
3318 .nr(8)
3319 .kr(1)
3320 .sr(1)
3321 .m(6)
3322 .n(n)
3323 .k(k)
3324 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003325 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003326 }
3327 }
3328 }
3329
Marat Dukhande06f492020-04-09 00:19:31 -07003330 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003331 TEST_REQUIRES_ARM_NEON_FMA;
3332 for (uint32_t n = 9; n < 16; n++) {
3333 for (size_t k = 1; k <= 40; k += 9) {
3334 for (uint32_t m = 1; m <= 6; m++) {
3335 GemmMicrokernelTester()
3336 .mr(6)
3337 .nr(8)
3338 .kr(1)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(k)
3343 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003344 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003345 }
3346 }
3347 }
3348 }
3349
Marat Dukhande06f492020-04-09 00:19:31 -07003350 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003351 TEST_REQUIRES_ARM_NEON_FMA;
3352 for (uint32_t n = 16; n <= 24; n += 8) {
3353 for (size_t k = 1; k <= 40; k += 9) {
3354 GemmMicrokernelTester()
3355 .mr(6)
3356 .nr(8)
3357 .kr(1)
3358 .sr(1)
3359 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003360 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003361 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003362 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003363 }
3364 }
3365 }
3366
Marat Dukhande06f492020-04-09 00:19:31 -07003367 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003368 TEST_REQUIRES_ARM_NEON_FMA;
3369 for (uint32_t n = 16; n <= 24; n += 8) {
3370 for (size_t k = 1; k <= 40; k += 9) {
3371 GemmMicrokernelTester()
3372 .mr(6)
3373 .nr(8)
3374 .kr(1)
3375 .sr(1)
3376 .m(6)
3377 .n(n)
3378 .k(k)
3379 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003380 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003381 }
3382 }
3383 }
3384
Marat Dukhande06f492020-04-09 00:19:31 -07003385 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003386 TEST_REQUIRES_ARM_NEON_FMA;
3387 for (uint32_t n = 16; n <= 24; n += 8) {
3388 for (size_t k = 1; k <= 40; k += 9) {
3389 GemmMicrokernelTester()
3390 .mr(6)
3391 .nr(8)
3392 .kr(1)
3393 .sr(1)
3394 .m(6)
3395 .n(n)
3396 .k(k)
3397 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003398 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003399 }
3400 }
3401 }
3402
Marat Dukhande06f492020-04-09 00:19:31 -07003403 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003404 TEST_REQUIRES_ARM_NEON_FMA;
3405 for (uint32_t n = 16; n <= 24; n += 8) {
3406 for (size_t k = 1; k <= 40; k += 9) {
3407 for (uint32_t m = 1; m <= 6; m++) {
3408 GemmMicrokernelTester()
3409 .mr(6)
3410 .nr(8)
3411 .kr(1)
3412 .sr(1)
3413 .m(m)
3414 .n(n)
3415 .k(k)
3416 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003417 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003418 }
3419 }
3420 }
3421 }
3422
Marat Dukhande06f492020-04-09 00:19:31 -07003423 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003424 TEST_REQUIRES_ARM_NEON_FMA;
3425 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003426 for (uint32_t n = 1; n <= 8; n++) {
3427 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003428 GemmMicrokernelTester()
3429 .mr(6)
3430 .nr(8)
3431 .kr(1)
3432 .sr(1)
3433 .m(m)
3434 .n(n)
3435 .k(k)
3436 .cm_stride(11)
3437 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003438 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003439 }
3440 }
3441 }
3442 }
3443
Marat Dukhande06f492020-04-09 00:19:31 -07003444 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003445 TEST_REQUIRES_ARM_NEON_FMA;
3446 GemmMicrokernelTester()
3447 .mr(6)
3448 .nr(8)
3449 .kr(1)
3450 .sr(1)
3451 .m(6)
3452 .n(8)
3453 .k(8)
3454 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003455 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003456 }
3457
Marat Dukhande06f492020-04-09 00:19:31 -07003458 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003459 TEST_REQUIRES_ARM_NEON_FMA;
3460 GemmMicrokernelTester()
3461 .mr(6)
3462 .nr(8)
3463 .kr(1)
3464 .sr(1)
3465 .m(6)
3466 .n(8)
3467 .k(8)
3468 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003469 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003470 }
3471
Marat Dukhande06f492020-04-09 00:19:31 -07003472 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003473 TEST_REQUIRES_ARM_NEON_FMA;
3474 GemmMicrokernelTester()
3475 .mr(6)
3476 .nr(8)
3477 .kr(1)
3478 .sr(1)
3479 .m(6)
3480 .n(8)
3481 .k(8)
3482 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003483 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003484 }
3485#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3486
3487
3488#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -07003489 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
3490 TEST_REQUIRES_ARM_NEON_FMA;
3491 GemmMicrokernelTester()
3492 .mr(6)
3493 .nr(8)
3494 .kr(1)
3495 .sr(1)
3496 .m(6)
3497 .n(8)
3498 .k(8)
3499 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3500 }
3501
3502 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
3503 TEST_REQUIRES_ARM_NEON_FMA;
3504 GemmMicrokernelTester()
3505 .mr(6)
3506 .nr(8)
3507 .kr(1)
3508 .sr(1)
3509 .m(6)
3510 .n(8)
3511 .k(8)
3512 .cn_stride(11)
3513 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3514 }
3515
3516 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
3517 TEST_REQUIRES_ARM_NEON_FMA;
3518 GemmMicrokernelTester()
3519 .mr(6)
3520 .nr(8)
3521 .kr(1)
3522 .sr(1)
3523 .m(6)
3524 .n(8)
3525 .k(8)
3526 .a_stride(11)
3527 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3528 }
3529
3530 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
3531 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003532 for (uint32_t n = 1; n <= 8; n++) {
3533 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003534 GemmMicrokernelTester()
3535 .mr(6)
3536 .nr(8)
3537 .kr(1)
3538 .sr(1)
3539 .m(m)
3540 .n(n)
3541 .k(8)
3542 .iterations(1)
3543 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3544 }
3545 }
3546 }
3547
3548 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
3549 TEST_REQUIRES_ARM_NEON_FMA;
3550 for (uint32_t m = 1; m <= 6; m++) {
3551 GemmMicrokernelTester()
3552 .mr(6)
3553 .nr(8)
3554 .kr(1)
3555 .sr(1)
3556 .m(m)
3557 .n(8)
3558 .k(8)
3559 .iterations(1)
3560 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3561 }
3562 }
3563
3564 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
3565 TEST_REQUIRES_ARM_NEON_FMA;
3566 for (uint32_t n = 1; n <= 8; n++) {
3567 GemmMicrokernelTester()
3568 .mr(6)
3569 .nr(8)
3570 .kr(1)
3571 .sr(1)
3572 .m(6)
3573 .n(n)
3574 .k(8)
3575 .iterations(1)
3576 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3577 }
3578 }
3579
3580 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
3581 TEST_REQUIRES_ARM_NEON_FMA;
3582 GemmMicrokernelTester()
3583 .mr(6)
3584 .nr(8)
3585 .kr(1)
3586 .sr(1)
3587 .m(6)
3588 .n(8)
3589 .k(16)
3590 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3591 }
3592
3593 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
3594 TEST_REQUIRES_ARM_NEON_FMA;
3595 GemmMicrokernelTester()
3596 .mr(6)
3597 .nr(8)
3598 .kr(1)
3599 .sr(1)
3600 .m(6)
3601 .n(8)
3602 .k(16)
3603 .a_stride(19)
3604 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3605 }
3606
3607 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
3608 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003609 for (uint32_t n = 1; n <= 8; n++) {
3610 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003611 GemmMicrokernelTester()
3612 .mr(6)
3613 .nr(8)
3614 .kr(1)
3615 .sr(1)
3616 .m(m)
3617 .n(n)
3618 .k(16)
3619 .iterations(1)
3620 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3621 }
3622 }
3623 }
3624
3625 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
3626 TEST_REQUIRES_ARM_NEON_FMA;
3627 for (size_t k = 1; k < 16; k++) {
3628 GemmMicrokernelTester()
3629 .mr(6)
3630 .nr(8)
3631 .kr(1)
3632 .sr(1)
3633 .m(6)
3634 .n(8)
3635 .k(k)
3636 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3637 }
3638 }
3639
3640 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
3641 TEST_REQUIRES_ARM_NEON_FMA;
3642 for (size_t k = 1; k < 16; k++) {
3643 GemmMicrokernelTester()
3644 .mr(6)
3645 .nr(8)
3646 .kr(1)
3647 .sr(1)
3648 .m(6)
3649 .n(8)
3650 .k(k)
3651 .a_stride(19)
3652 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3653 }
3654 }
3655
3656 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
3657 TEST_REQUIRES_ARM_NEON_FMA;
3658 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003659 for (uint32_t n = 1; n <= 8; n++) {
3660 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003661 GemmMicrokernelTester()
3662 .mr(6)
3663 .nr(8)
3664 .kr(1)
3665 .sr(1)
3666 .m(m)
3667 .n(n)
3668 .k(k)
3669 .iterations(1)
3670 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3671 }
3672 }
3673 }
3674 }
3675
3676 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
3677 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003678 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003679 GemmMicrokernelTester()
3680 .mr(6)
3681 .nr(8)
3682 .kr(1)
3683 .sr(1)
3684 .m(6)
3685 .n(8)
3686 .k(k)
3687 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3688 }
3689 }
3690
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003691 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
Frank Barchard143a1102021-06-15 09:15:34 -07003692 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003693 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003694 GemmMicrokernelTester()
3695 .mr(6)
3696 .nr(8)
3697 .kr(1)
3698 .sr(1)
3699 .m(6)
3700 .n(8)
3701 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003702 .a_stride(37)
Frank Barchard143a1102021-06-15 09:15:34 -07003703 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3704 }
3705 }
3706
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003707 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
Frank Barchard143a1102021-06-15 09:15:34 -07003708 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003709 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003710 for (uint32_t n = 1; n <= 8; n++) {
3711 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003712 GemmMicrokernelTester()
3713 .mr(6)
3714 .nr(8)
3715 .kr(1)
3716 .sr(1)
3717 .m(m)
3718 .n(n)
3719 .k(k)
3720 .iterations(1)
3721 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3722 }
3723 }
3724 }
3725 }
3726
3727 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
3728 TEST_REQUIRES_ARM_NEON_FMA;
3729 for (size_t k = 24; k <= 80; k += 8) {
3730 GemmMicrokernelTester()
3731 .mr(6)
3732 .nr(8)
3733 .kr(1)
3734 .sr(1)
3735 .m(6)
3736 .n(8)
3737 .k(k)
3738 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3739 }
3740 }
3741
3742 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
3743 TEST_REQUIRES_ARM_NEON_FMA;
3744 for (size_t k = 24; k <= 80; k += 8) {
3745 GemmMicrokernelTester()
3746 .mr(6)
3747 .nr(8)
3748 .kr(1)
3749 .sr(1)
3750 .m(6)
3751 .n(8)
3752 .k(k)
3753 .a_stride(83)
3754 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3755 }
3756 }
3757
3758 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
3759 TEST_REQUIRES_ARM_NEON_FMA;
3760 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003761 for (uint32_t n = 1; n <= 8; n++) {
3762 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003763 GemmMicrokernelTester()
3764 .mr(6)
3765 .nr(8)
3766 .kr(1)
3767 .sr(1)
3768 .m(m)
3769 .n(n)
3770 .k(k)
3771 .iterations(1)
3772 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3773 }
3774 }
3775 }
3776 }
3777
3778 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
3779 TEST_REQUIRES_ARM_NEON_FMA;
3780 for (uint32_t n = 9; n < 16; n++) {
3781 for (size_t k = 1; k <= 40; k += 9) {
3782 GemmMicrokernelTester()
3783 .mr(6)
3784 .nr(8)
3785 .kr(1)
3786 .sr(1)
3787 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003788 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07003789 .k(k)
3790 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3791 }
3792 }
3793 }
3794
3795 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
3796 TEST_REQUIRES_ARM_NEON_FMA;
3797 for (uint32_t n = 9; n < 16; n++) {
3798 for (size_t k = 1; k <= 40; k += 9) {
3799 GemmMicrokernelTester()
3800 .mr(6)
3801 .nr(8)
3802 .kr(1)
3803 .sr(1)
3804 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003805 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07003806 .k(k)
3807 .cn_stride(11)
3808 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3809 }
3810 }
3811 }
3812
3813 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
3814 TEST_REQUIRES_ARM_NEON_FMA;
3815 for (uint32_t n = 9; n < 16; n++) {
3816 for (size_t k = 1; k <= 40; k += 9) {
3817 GemmMicrokernelTester()
3818 .mr(6)
3819 .nr(8)
3820 .kr(1)
3821 .sr(1)
3822 .m(6)
3823 .n(n)
3824 .k(k)
3825 .a_stride(43)
3826 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3827 }
3828 }
3829 }
3830
3831 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
3832 TEST_REQUIRES_ARM_NEON_FMA;
3833 for (uint32_t n = 9; n < 16; n++) {
3834 for (size_t k = 1; k <= 40; k += 9) {
3835 for (uint32_t m = 1; m <= 6; m++) {
3836 GemmMicrokernelTester()
3837 .mr(6)
3838 .nr(8)
3839 .kr(1)
3840 .sr(1)
3841 .m(m)
3842 .n(n)
3843 .k(k)
3844 .iterations(1)
3845 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3846 }
3847 }
3848 }
3849 }
3850
3851 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
3852 TEST_REQUIRES_ARM_NEON_FMA;
3853 for (uint32_t n = 16; n <= 24; n += 8) {
3854 for (size_t k = 1; k <= 40; k += 9) {
3855 GemmMicrokernelTester()
3856 .mr(6)
3857 .nr(8)
3858 .kr(1)
3859 .sr(1)
3860 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003861 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07003862 .k(k)
3863 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3864 }
3865 }
3866 }
3867
3868 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
3869 TEST_REQUIRES_ARM_NEON_FMA;
3870 for (uint32_t n = 16; n <= 24; n += 8) {
3871 for (size_t k = 1; k <= 40; k += 9) {
3872 GemmMicrokernelTester()
3873 .mr(6)
3874 .nr(8)
3875 .kr(1)
3876 .sr(1)
3877 .m(6)
3878 .n(n)
3879 .k(k)
3880 .cn_stride(11)
3881 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3882 }
3883 }
3884 }
3885
3886 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
3887 TEST_REQUIRES_ARM_NEON_FMA;
3888 for (uint32_t n = 16; n <= 24; n += 8) {
3889 for (size_t k = 1; k <= 40; k += 9) {
3890 GemmMicrokernelTester()
3891 .mr(6)
3892 .nr(8)
3893 .kr(1)
3894 .sr(1)
3895 .m(6)
3896 .n(n)
3897 .k(k)
3898 .a_stride(43)
3899 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3900 }
3901 }
3902 }
3903
3904 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
3905 TEST_REQUIRES_ARM_NEON_FMA;
3906 for (uint32_t n = 16; n <= 24; n += 8) {
3907 for (size_t k = 1; k <= 40; k += 9) {
3908 for (uint32_t m = 1; m <= 6; m++) {
3909 GemmMicrokernelTester()
3910 .mr(6)
3911 .nr(8)
3912 .kr(1)
3913 .sr(1)
3914 .m(m)
3915 .n(n)
3916 .k(k)
3917 .iterations(1)
3918 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3919 }
3920 }
3921 }
3922 }
3923
3924 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
3925 TEST_REQUIRES_ARM_NEON_FMA;
3926 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003927 for (uint32_t n = 1; n <= 8; n++) {
3928 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07003929 GemmMicrokernelTester()
3930 .mr(6)
3931 .nr(8)
3932 .kr(1)
3933 .sr(1)
3934 .m(m)
3935 .n(n)
3936 .k(k)
3937 .cm_stride(11)
3938 .iterations(1)
3939 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3940 }
3941 }
3942 }
3943 }
3944
3945 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
3946 TEST_REQUIRES_ARM_NEON_FMA;
3947 GemmMicrokernelTester()
3948 .mr(6)
3949 .nr(8)
3950 .kr(1)
3951 .sr(1)
3952 .m(6)
3953 .n(8)
3954 .k(8)
3955 .qmin(128)
3956 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3957 }
3958
3959 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
3960 TEST_REQUIRES_ARM_NEON_FMA;
3961 GemmMicrokernelTester()
3962 .mr(6)
3963 .nr(8)
3964 .kr(1)
3965 .sr(1)
3966 .m(6)
3967 .n(8)
3968 .k(8)
3969 .qmax(128)
3970 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3971 }
3972
3973 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
3974 TEST_REQUIRES_ARM_NEON_FMA;
3975 GemmMicrokernelTester()
3976 .mr(6)
3977 .nr(8)
3978 .kr(1)
3979 .sr(1)
3980 .m(6)
3981 .n(8)
3982 .k(8)
3983 .cm_stride(11)
3984 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3985 }
3986#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3987
3988
Marat Dukhanf6068062020-05-17 04:42:19 -07003989#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07003990 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003991 TEST_REQUIRES_ARM_NEON;
3992 GemmMicrokernelTester()
3993 .mr(4)
3994 .nr(8)
3995 .kr(1)
3996 .sr(1)
3997 .m(4)
3998 .n(8)
3999 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004000 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004001 }
4002
Marat Dukhande06f492020-04-09 00:19:31 -07004003 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004004 TEST_REQUIRES_ARM_NEON;
4005 GemmMicrokernelTester()
4006 .mr(4)
4007 .nr(8)
4008 .kr(1)
4009 .sr(1)
4010 .m(4)
4011 .n(8)
4012 .k(4)
4013 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004014 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004015 }
4016
Marat Dukhande06f492020-04-09 00:19:31 -07004017 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004018 TEST_REQUIRES_ARM_NEON;
4019 GemmMicrokernelTester()
4020 .mr(4)
4021 .nr(8)
4022 .kr(1)
4023 .sr(1)
4024 .m(4)
4025 .n(8)
4026 .k(4)
4027 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004028 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004029 }
4030
Marat Dukhande06f492020-04-09 00:19:31 -07004031 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004032 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004033 for (uint32_t n = 1; n <= 8; n++) {
4034 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004035 GemmMicrokernelTester()
4036 .mr(4)
4037 .nr(8)
4038 .kr(1)
4039 .sr(1)
4040 .m(m)
4041 .n(n)
4042 .k(4)
4043 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004044 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004045 }
4046 }
4047 }
4048
Marat Dukhande06f492020-04-09 00:19:31 -07004049 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004050 TEST_REQUIRES_ARM_NEON;
4051 for (uint32_t m = 1; m <= 4; m++) {
4052 GemmMicrokernelTester()
4053 .mr(4)
4054 .nr(8)
4055 .kr(1)
4056 .sr(1)
4057 .m(m)
4058 .n(8)
4059 .k(4)
4060 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004061 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004062 }
4063 }
4064
Marat Dukhande06f492020-04-09 00:19:31 -07004065 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004066 TEST_REQUIRES_ARM_NEON;
4067 for (uint32_t n = 1; n <= 8; n++) {
4068 GemmMicrokernelTester()
4069 .mr(4)
4070 .nr(8)
4071 .kr(1)
4072 .sr(1)
4073 .m(4)
4074 .n(n)
4075 .k(4)
4076 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004077 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004078 }
4079 }
4080
Marat Dukhande06f492020-04-09 00:19:31 -07004081 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004082 TEST_REQUIRES_ARM_NEON;
4083 GemmMicrokernelTester()
4084 .mr(4)
4085 .nr(8)
4086 .kr(1)
4087 .sr(1)
4088 .m(4)
4089 .n(8)
4090 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004091 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004092 }
4093
Marat Dukhande06f492020-04-09 00:19:31 -07004094 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004095 TEST_REQUIRES_ARM_NEON;
4096 GemmMicrokernelTester()
4097 .mr(4)
4098 .nr(8)
4099 .kr(1)
4100 .sr(1)
4101 .m(4)
4102 .n(8)
4103 .k(8)
4104 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004105 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004106 }
4107
Marat Dukhande06f492020-04-09 00:19:31 -07004108 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004109 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004110 for (uint32_t n = 1; n <= 8; n++) {
4111 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004112 GemmMicrokernelTester()
4113 .mr(4)
4114 .nr(8)
4115 .kr(1)
4116 .sr(1)
4117 .m(m)
4118 .n(n)
4119 .k(8)
4120 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004121 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004122 }
4123 }
4124 }
4125
Marat Dukhande06f492020-04-09 00:19:31 -07004126 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004127 TEST_REQUIRES_ARM_NEON;
4128 for (size_t k = 1; k < 8; k++) {
4129 GemmMicrokernelTester()
4130 .mr(4)
4131 .nr(8)
4132 .kr(1)
4133 .sr(1)
4134 .m(4)
4135 .n(8)
4136 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004137 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004138 }
4139 }
4140
Marat Dukhande06f492020-04-09 00:19:31 -07004141 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004142 TEST_REQUIRES_ARM_NEON;
4143 for (size_t k = 1; k < 8; k++) {
4144 GemmMicrokernelTester()
4145 .mr(4)
4146 .nr(8)
4147 .kr(1)
4148 .sr(1)
4149 .m(4)
4150 .n(8)
4151 .k(k)
4152 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004153 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004154 }
4155 }
4156
Marat Dukhande06f492020-04-09 00:19:31 -07004157 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004158 TEST_REQUIRES_ARM_NEON;
4159 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004160 for (uint32_t n = 1; n <= 8; n++) {
4161 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004162 GemmMicrokernelTester()
4163 .mr(4)
4164 .nr(8)
4165 .kr(1)
4166 .sr(1)
4167 .m(m)
4168 .n(n)
4169 .k(k)
4170 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004171 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004172 }
4173 }
4174 }
4175 }
4176
Marat Dukhande06f492020-04-09 00:19:31 -07004177 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004178 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004179 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004180 GemmMicrokernelTester()
4181 .mr(4)
4182 .nr(8)
4183 .kr(1)
4184 .sr(1)
4185 .m(4)
4186 .n(8)
4187 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004188 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004189 }
4190 }
4191
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004192 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004193 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004194 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004195 GemmMicrokernelTester()
4196 .mr(4)
4197 .nr(8)
4198 .kr(1)
4199 .sr(1)
4200 .m(4)
4201 .n(8)
4202 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004203 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004204 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004205 }
4206 }
4207
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004208 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004209 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004210 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004211 for (uint32_t n = 1; n <= 8; n++) {
4212 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004213 GemmMicrokernelTester()
4214 .mr(4)
4215 .nr(8)
4216 .kr(1)
4217 .sr(1)
4218 .m(m)
4219 .n(n)
4220 .k(k)
4221 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004222 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004223 }
4224 }
4225 }
4226 }
4227
Marat Dukhande06f492020-04-09 00:19:31 -07004228 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004229 TEST_REQUIRES_ARM_NEON;
4230 for (size_t k = 12; k <= 40; k += 4) {
4231 GemmMicrokernelTester()
4232 .mr(4)
4233 .nr(8)
4234 .kr(1)
4235 .sr(1)
4236 .m(4)
4237 .n(8)
4238 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004239 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004240 }
4241 }
4242
Marat Dukhande06f492020-04-09 00:19:31 -07004243 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004244 TEST_REQUIRES_ARM_NEON;
4245 for (size_t k = 12; k <= 40; k += 4) {
4246 GemmMicrokernelTester()
4247 .mr(4)
4248 .nr(8)
4249 .kr(1)
4250 .sr(1)
4251 .m(4)
4252 .n(8)
4253 .k(k)
4254 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004255 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004256 }
4257 }
4258
Marat Dukhande06f492020-04-09 00:19:31 -07004259 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004260 TEST_REQUIRES_ARM_NEON;
4261 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004262 for (uint32_t n = 1; n <= 8; n++) {
4263 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004264 GemmMicrokernelTester()
4265 .mr(4)
4266 .nr(8)
4267 .kr(1)
4268 .sr(1)
4269 .m(m)
4270 .n(n)
4271 .k(k)
4272 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004273 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004274 }
4275 }
4276 }
4277 }
4278
Marat Dukhande06f492020-04-09 00:19:31 -07004279 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004280 TEST_REQUIRES_ARM_NEON;
4281 for (uint32_t n = 9; n < 16; n++) {
4282 for (size_t k = 1; k <= 20; k += 5) {
4283 GemmMicrokernelTester()
4284 .mr(4)
4285 .nr(8)
4286 .kr(1)
4287 .sr(1)
4288 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004289 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004290 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004291 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004292 }
4293 }
4294 }
4295
Marat Dukhande06f492020-04-09 00:19:31 -07004296 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004297 TEST_REQUIRES_ARM_NEON;
4298 for (uint32_t n = 9; n < 16; n++) {
4299 for (size_t k = 1; k <= 20; k += 5) {
4300 GemmMicrokernelTester()
4301 .mr(4)
4302 .nr(8)
4303 .kr(1)
4304 .sr(1)
4305 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004306 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004307 .k(k)
4308 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004309 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004310 }
4311 }
4312 }
4313
Marat Dukhande06f492020-04-09 00:19:31 -07004314 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004315 TEST_REQUIRES_ARM_NEON;
4316 for (uint32_t n = 9; n < 16; n++) {
4317 for (size_t k = 1; k <= 20; k += 5) {
4318 GemmMicrokernelTester()
4319 .mr(4)
4320 .nr(8)
4321 .kr(1)
4322 .sr(1)
4323 .m(4)
4324 .n(n)
4325 .k(k)
4326 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004327 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004328 }
4329 }
4330 }
4331
Marat Dukhande06f492020-04-09 00:19:31 -07004332 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004333 TEST_REQUIRES_ARM_NEON;
4334 for (uint32_t n = 9; n < 16; n++) {
4335 for (size_t k = 1; k <= 20; k += 5) {
4336 for (uint32_t m = 1; m <= 4; m++) {
4337 GemmMicrokernelTester()
4338 .mr(4)
4339 .nr(8)
4340 .kr(1)
4341 .sr(1)
4342 .m(m)
4343 .n(n)
4344 .k(k)
4345 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004346 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004347 }
4348 }
4349 }
4350 }
4351
Marat Dukhande06f492020-04-09 00:19:31 -07004352 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004353 TEST_REQUIRES_ARM_NEON;
4354 for (uint32_t n = 16; n <= 24; n += 8) {
4355 for (size_t k = 1; k <= 20; k += 5) {
4356 GemmMicrokernelTester()
4357 .mr(4)
4358 .nr(8)
4359 .kr(1)
4360 .sr(1)
4361 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004362 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004363 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004364 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004365 }
4366 }
4367 }
4368
Marat Dukhande06f492020-04-09 00:19:31 -07004369 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004370 TEST_REQUIRES_ARM_NEON;
4371 for (uint32_t n = 16; n <= 24; n += 8) {
4372 for (size_t k = 1; k <= 20; k += 5) {
4373 GemmMicrokernelTester()
4374 .mr(4)
4375 .nr(8)
4376 .kr(1)
4377 .sr(1)
4378 .m(4)
4379 .n(n)
4380 .k(k)
4381 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004382 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004383 }
4384 }
4385 }
4386
Marat Dukhande06f492020-04-09 00:19:31 -07004387 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004388 TEST_REQUIRES_ARM_NEON;
4389 for (uint32_t n = 16; n <= 24; n += 8) {
4390 for (size_t k = 1; k <= 20; k += 5) {
4391 GemmMicrokernelTester()
4392 .mr(4)
4393 .nr(8)
4394 .kr(1)
4395 .sr(1)
4396 .m(4)
4397 .n(n)
4398 .k(k)
4399 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004400 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004401 }
4402 }
4403 }
4404
Marat Dukhande06f492020-04-09 00:19:31 -07004405 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004406 TEST_REQUIRES_ARM_NEON;
4407 for (uint32_t n = 16; n <= 24; n += 8) {
4408 for (size_t k = 1; k <= 20; k += 5) {
4409 for (uint32_t m = 1; m <= 4; m++) {
4410 GemmMicrokernelTester()
4411 .mr(4)
4412 .nr(8)
4413 .kr(1)
4414 .sr(1)
4415 .m(m)
4416 .n(n)
4417 .k(k)
4418 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004419 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004420 }
4421 }
4422 }
4423 }
4424
Marat Dukhande06f492020-04-09 00:19:31 -07004425 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004426 TEST_REQUIRES_ARM_NEON;
4427 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004428 for (uint32_t n = 1; n <= 8; n++) {
4429 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004430 GemmMicrokernelTester()
4431 .mr(4)
4432 .nr(8)
4433 .kr(1)
4434 .sr(1)
4435 .m(m)
4436 .n(n)
4437 .k(k)
4438 .cm_stride(11)
4439 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004440 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004441 }
4442 }
4443 }
4444 }
4445
Marat Dukhande06f492020-04-09 00:19:31 -07004446 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004447 TEST_REQUIRES_ARM_NEON;
4448 GemmMicrokernelTester()
4449 .mr(4)
4450 .nr(8)
4451 .kr(1)
4452 .sr(1)
4453 .m(4)
4454 .n(8)
4455 .k(4)
4456 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004457 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004458 }
4459
Marat Dukhande06f492020-04-09 00:19:31 -07004460 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004461 TEST_REQUIRES_ARM_NEON;
4462 GemmMicrokernelTester()
4463 .mr(4)
4464 .nr(8)
4465 .kr(1)
4466 .sr(1)
4467 .m(4)
4468 .n(8)
4469 .k(4)
4470 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004471 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004472 }
4473
Marat Dukhande06f492020-04-09 00:19:31 -07004474 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004475 TEST_REQUIRES_ARM_NEON;
4476 GemmMicrokernelTester()
4477 .mr(4)
4478 .nr(8)
4479 .kr(1)
4480 .sr(1)
4481 .m(4)
4482 .n(8)
4483 .k(4)
4484 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004485 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004486 }
Marat Dukhanf6068062020-05-17 04:42:19 -07004487#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhan1c587112020-04-08 20:04:28 -07004488
4489
Marat Dukhanf6068062020-05-17 04:42:19 -07004490#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07004491 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004492 TEST_REQUIRES_ARM_NEON;
4493 GemmMicrokernelTester()
4494 .mr(4)
4495 .nr(8)
4496 .kr(1)
4497 .sr(1)
4498 .m(4)
4499 .n(8)
4500 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004501 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004502 }
4503
Marat Dukhande06f492020-04-09 00:19:31 -07004504 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004505 TEST_REQUIRES_ARM_NEON;
4506 GemmMicrokernelTester()
4507 .mr(4)
4508 .nr(8)
4509 .kr(1)
4510 .sr(1)
4511 .m(4)
4512 .n(8)
4513 .k(4)
4514 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004515 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004516 }
4517
Marat Dukhande06f492020-04-09 00:19:31 -07004518 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004519 TEST_REQUIRES_ARM_NEON;
4520 GemmMicrokernelTester()
4521 .mr(4)
4522 .nr(8)
4523 .kr(1)
4524 .sr(1)
4525 .m(4)
4526 .n(8)
4527 .k(4)
4528 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004529 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004530 }
4531
Marat Dukhande06f492020-04-09 00:19:31 -07004532 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004533 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004534 for (uint32_t n = 1; n <= 8; n++) {
4535 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004536 GemmMicrokernelTester()
4537 .mr(4)
4538 .nr(8)
4539 .kr(1)
4540 .sr(1)
4541 .m(m)
4542 .n(n)
4543 .k(4)
4544 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004545 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004546 }
4547 }
4548 }
4549
Marat Dukhande06f492020-04-09 00:19:31 -07004550 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004551 TEST_REQUIRES_ARM_NEON;
4552 for (uint32_t m = 1; m <= 4; m++) {
4553 GemmMicrokernelTester()
4554 .mr(4)
4555 .nr(8)
4556 .kr(1)
4557 .sr(1)
4558 .m(m)
4559 .n(8)
4560 .k(4)
4561 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004562 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004563 }
4564 }
4565
Marat Dukhande06f492020-04-09 00:19:31 -07004566 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004567 TEST_REQUIRES_ARM_NEON;
4568 for (uint32_t n = 1; n <= 8; n++) {
4569 GemmMicrokernelTester()
4570 .mr(4)
4571 .nr(8)
4572 .kr(1)
4573 .sr(1)
4574 .m(4)
4575 .n(n)
4576 .k(4)
4577 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004578 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004579 }
4580 }
4581
Marat Dukhande06f492020-04-09 00:19:31 -07004582 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004583 TEST_REQUIRES_ARM_NEON;
4584 GemmMicrokernelTester()
4585 .mr(4)
4586 .nr(8)
4587 .kr(1)
4588 .sr(1)
4589 .m(4)
4590 .n(8)
4591 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004592 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004593 }
4594
Marat Dukhande06f492020-04-09 00:19:31 -07004595 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004596 TEST_REQUIRES_ARM_NEON;
4597 GemmMicrokernelTester()
4598 .mr(4)
4599 .nr(8)
4600 .kr(1)
4601 .sr(1)
4602 .m(4)
4603 .n(8)
4604 .k(8)
4605 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004606 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004607 }
4608
Marat Dukhande06f492020-04-09 00:19:31 -07004609 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004610 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004611 for (uint32_t n = 1; n <= 8; n++) {
4612 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004613 GemmMicrokernelTester()
4614 .mr(4)
4615 .nr(8)
4616 .kr(1)
4617 .sr(1)
4618 .m(m)
4619 .n(n)
4620 .k(8)
4621 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004622 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004623 }
4624 }
4625 }
4626
Marat Dukhande06f492020-04-09 00:19:31 -07004627 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004628 TEST_REQUIRES_ARM_NEON;
4629 for (size_t k = 1; k < 8; k++) {
4630 GemmMicrokernelTester()
4631 .mr(4)
4632 .nr(8)
4633 .kr(1)
4634 .sr(1)
4635 .m(4)
4636 .n(8)
4637 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004638 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004639 }
4640 }
4641
Marat Dukhande06f492020-04-09 00:19:31 -07004642 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004643 TEST_REQUIRES_ARM_NEON;
4644 for (size_t k = 1; k < 8; k++) {
4645 GemmMicrokernelTester()
4646 .mr(4)
4647 .nr(8)
4648 .kr(1)
4649 .sr(1)
4650 .m(4)
4651 .n(8)
4652 .k(k)
4653 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004654 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004655 }
4656 }
4657
Marat Dukhande06f492020-04-09 00:19:31 -07004658 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004659 TEST_REQUIRES_ARM_NEON;
4660 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004661 for (uint32_t n = 1; n <= 8; n++) {
4662 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004663 GemmMicrokernelTester()
4664 .mr(4)
4665 .nr(8)
4666 .kr(1)
4667 .sr(1)
4668 .m(m)
4669 .n(n)
4670 .k(k)
4671 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004672 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004673 }
4674 }
4675 }
4676 }
4677
Marat Dukhande06f492020-04-09 00:19:31 -07004678 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004679 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004680 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004681 GemmMicrokernelTester()
4682 .mr(4)
4683 .nr(8)
4684 .kr(1)
4685 .sr(1)
4686 .m(4)
4687 .n(8)
4688 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004689 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004690 }
4691 }
4692
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004693 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004694 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004695 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004696 GemmMicrokernelTester()
4697 .mr(4)
4698 .nr(8)
4699 .kr(1)
4700 .sr(1)
4701 .m(4)
4702 .n(8)
4703 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004704 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004705 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004706 }
4707 }
4708
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004709 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004710 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004711 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004712 for (uint32_t n = 1; n <= 8; n++) {
4713 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004714 GemmMicrokernelTester()
4715 .mr(4)
4716 .nr(8)
4717 .kr(1)
4718 .sr(1)
4719 .m(m)
4720 .n(n)
4721 .k(k)
4722 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004723 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004724 }
4725 }
4726 }
4727 }
4728
Marat Dukhande06f492020-04-09 00:19:31 -07004729 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004730 TEST_REQUIRES_ARM_NEON;
4731 for (size_t k = 12; k <= 40; k += 4) {
4732 GemmMicrokernelTester()
4733 .mr(4)
4734 .nr(8)
4735 .kr(1)
4736 .sr(1)
4737 .m(4)
4738 .n(8)
4739 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004740 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004741 }
4742 }
4743
Marat Dukhande06f492020-04-09 00:19:31 -07004744 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004745 TEST_REQUIRES_ARM_NEON;
4746 for (size_t k = 12; k <= 40; k += 4) {
4747 GemmMicrokernelTester()
4748 .mr(4)
4749 .nr(8)
4750 .kr(1)
4751 .sr(1)
4752 .m(4)
4753 .n(8)
4754 .k(k)
4755 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004756 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004757 }
4758 }
4759
Marat Dukhande06f492020-04-09 00:19:31 -07004760 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004761 TEST_REQUIRES_ARM_NEON;
4762 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004763 for (uint32_t n = 1; n <= 8; n++) {
4764 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004765 GemmMicrokernelTester()
4766 .mr(4)
4767 .nr(8)
4768 .kr(1)
4769 .sr(1)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004774 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004775 }
4776 }
4777 }
4778 }
4779
Marat Dukhande06f492020-04-09 00:19:31 -07004780 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004781 TEST_REQUIRES_ARM_NEON;
4782 for (uint32_t n = 9; n < 16; n++) {
4783 for (size_t k = 1; k <= 20; k += 5) {
4784 GemmMicrokernelTester()
4785 .mr(4)
4786 .nr(8)
4787 .kr(1)
4788 .sr(1)
4789 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004790 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004791 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004792 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004793 }
4794 }
4795 }
4796
Marat Dukhande06f492020-04-09 00:19:31 -07004797 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004798 TEST_REQUIRES_ARM_NEON;
4799 for (uint32_t n = 9; n < 16; n++) {
4800 for (size_t k = 1; k <= 20; k += 5) {
4801 GemmMicrokernelTester()
4802 .mr(4)
4803 .nr(8)
4804 .kr(1)
4805 .sr(1)
4806 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004807 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004808 .k(k)
4809 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004810 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004811 }
4812 }
4813 }
4814
Marat Dukhande06f492020-04-09 00:19:31 -07004815 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004816 TEST_REQUIRES_ARM_NEON;
4817 for (uint32_t n = 9; n < 16; n++) {
4818 for (size_t k = 1; k <= 20; k += 5) {
4819 GemmMicrokernelTester()
4820 .mr(4)
4821 .nr(8)
4822 .kr(1)
4823 .sr(1)
4824 .m(4)
4825 .n(n)
4826 .k(k)
4827 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004828 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004829 }
4830 }
4831 }
4832
Marat Dukhande06f492020-04-09 00:19:31 -07004833 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004834 TEST_REQUIRES_ARM_NEON;
4835 for (uint32_t n = 9; n < 16; n++) {
4836 for (size_t k = 1; k <= 20; k += 5) {
4837 for (uint32_t m = 1; m <= 4; m++) {
4838 GemmMicrokernelTester()
4839 .mr(4)
4840 .nr(8)
4841 .kr(1)
4842 .sr(1)
4843 .m(m)
4844 .n(n)
4845 .k(k)
4846 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004847 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004848 }
4849 }
4850 }
4851 }
4852
Marat Dukhande06f492020-04-09 00:19:31 -07004853 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004854 TEST_REQUIRES_ARM_NEON;
4855 for (uint32_t n = 16; n <= 24; n += 8) {
4856 for (size_t k = 1; k <= 20; k += 5) {
4857 GemmMicrokernelTester()
4858 .mr(4)
4859 .nr(8)
4860 .kr(1)
4861 .sr(1)
4862 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004863 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004864 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004865 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004866 }
4867 }
4868 }
4869
Marat Dukhande06f492020-04-09 00:19:31 -07004870 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004871 TEST_REQUIRES_ARM_NEON;
4872 for (uint32_t n = 16; n <= 24; n += 8) {
4873 for (size_t k = 1; k <= 20; k += 5) {
4874 GemmMicrokernelTester()
4875 .mr(4)
4876 .nr(8)
4877 .kr(1)
4878 .sr(1)
4879 .m(4)
4880 .n(n)
4881 .k(k)
4882 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004883 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004884 }
4885 }
4886 }
4887
Marat Dukhande06f492020-04-09 00:19:31 -07004888 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004889 TEST_REQUIRES_ARM_NEON;
4890 for (uint32_t n = 16; n <= 24; n += 8) {
4891 for (size_t k = 1; k <= 20; k += 5) {
4892 GemmMicrokernelTester()
4893 .mr(4)
4894 .nr(8)
4895 .kr(1)
4896 .sr(1)
4897 .m(4)
4898 .n(n)
4899 .k(k)
4900 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004901 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004902 }
4903 }
4904 }
4905
Marat Dukhande06f492020-04-09 00:19:31 -07004906 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004907 TEST_REQUIRES_ARM_NEON;
4908 for (uint32_t n = 16; n <= 24; n += 8) {
4909 for (size_t k = 1; k <= 20; k += 5) {
4910 for (uint32_t m = 1; m <= 4; m++) {
4911 GemmMicrokernelTester()
4912 .mr(4)
4913 .nr(8)
4914 .kr(1)
4915 .sr(1)
4916 .m(m)
4917 .n(n)
4918 .k(k)
4919 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004920 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004921 }
4922 }
4923 }
4924 }
4925
Marat Dukhande06f492020-04-09 00:19:31 -07004926 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004927 TEST_REQUIRES_ARM_NEON;
4928 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004929 for (uint32_t n = 1; n <= 8; n++) {
4930 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004931 GemmMicrokernelTester()
4932 .mr(4)
4933 .nr(8)
4934 .kr(1)
4935 .sr(1)
4936 .m(m)
4937 .n(n)
4938 .k(k)
4939 .cm_stride(11)
4940 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004941 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004942 }
4943 }
4944 }
4945 }
4946
Marat Dukhande06f492020-04-09 00:19:31 -07004947 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004948 TEST_REQUIRES_ARM_NEON;
4949 GemmMicrokernelTester()
4950 .mr(4)
4951 .nr(8)
4952 .kr(1)
4953 .sr(1)
4954 .m(4)
4955 .n(8)
4956 .k(4)
4957 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004958 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004959 }
4960
Marat Dukhande06f492020-04-09 00:19:31 -07004961 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004962 TEST_REQUIRES_ARM_NEON;
4963 GemmMicrokernelTester()
4964 .mr(4)
4965 .nr(8)
4966 .kr(1)
4967 .sr(1)
4968 .m(4)
4969 .n(8)
4970 .k(4)
4971 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004972 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004973 }
4974
Marat Dukhande06f492020-04-09 00:19:31 -07004975 TEST(F32_GEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004976 TEST_REQUIRES_ARM_NEON;
4977 GemmMicrokernelTester()
4978 .mr(4)
4979 .nr(8)
4980 .kr(1)
4981 .sr(1)
4982 .m(4)
4983 .n(8)
4984 .k(4)
4985 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004986 .Test(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004987 }
Marat Dukhanf6068062020-05-17 04:42:19 -07004988#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhan1c587112020-04-08 20:04:28 -07004989
4990
Marat Dukhan1c587112020-04-08 20:04:28 -07004991#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07004992 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004993 TEST_REQUIRES_ARM_NEON_FMA;
4994 GemmMicrokernelTester()
4995 .mr(6)
4996 .nr(8)
4997 .kr(1)
4998 .sr(1)
4999 .m(6)
5000 .n(8)
5001 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005002 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005003 }
5004
Marat Dukhande06f492020-04-09 00:19:31 -07005005 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005006 TEST_REQUIRES_ARM_NEON_FMA;
5007 GemmMicrokernelTester()
5008 .mr(6)
5009 .nr(8)
5010 .kr(1)
5011 .sr(1)
5012 .m(6)
5013 .n(8)
5014 .k(2)
5015 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005016 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005017 }
5018
Marat Dukhande06f492020-04-09 00:19:31 -07005019 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005020 TEST_REQUIRES_ARM_NEON_FMA;
5021 GemmMicrokernelTester()
5022 .mr(6)
5023 .nr(8)
5024 .kr(1)
5025 .sr(1)
5026 .m(6)
5027 .n(8)
5028 .k(2)
5029 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005030 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005031 }
5032
Marat Dukhande06f492020-04-09 00:19:31 -07005033 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005034 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005035 for (uint32_t n = 1; n <= 8; n++) {
5036 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005037 GemmMicrokernelTester()
5038 .mr(6)
5039 .nr(8)
5040 .kr(1)
5041 .sr(1)
5042 .m(m)
5043 .n(n)
5044 .k(2)
5045 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005046 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005047 }
5048 }
5049 }
5050
Marat Dukhande06f492020-04-09 00:19:31 -07005051 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005052 TEST_REQUIRES_ARM_NEON_FMA;
5053 for (uint32_t m = 1; m <= 6; m++) {
5054 GemmMicrokernelTester()
5055 .mr(6)
5056 .nr(8)
5057 .kr(1)
5058 .sr(1)
5059 .m(m)
5060 .n(8)
5061 .k(2)
5062 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005063 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005064 }
5065 }
5066
Marat Dukhande06f492020-04-09 00:19:31 -07005067 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005068 TEST_REQUIRES_ARM_NEON_FMA;
5069 for (uint32_t n = 1; n <= 8; n++) {
5070 GemmMicrokernelTester()
5071 .mr(6)
5072 .nr(8)
5073 .kr(1)
5074 .sr(1)
5075 .m(6)
5076 .n(n)
5077 .k(2)
5078 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005079 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005080 }
5081 }
5082
Marat Dukhande06f492020-04-09 00:19:31 -07005083 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005084 TEST_REQUIRES_ARM_NEON_FMA;
5085 for (size_t k = 1; k < 2; k++) {
5086 GemmMicrokernelTester()
5087 .mr(6)
5088 .nr(8)
5089 .kr(1)
5090 .sr(1)
5091 .m(6)
5092 .n(8)
5093 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005094 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005095 }
5096 }
5097
Marat Dukhande06f492020-04-09 00:19:31 -07005098 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005099 TEST_REQUIRES_ARM_NEON_FMA;
5100 for (size_t k = 1; k < 2; k++) {
5101 GemmMicrokernelTester()
5102 .mr(6)
5103 .nr(8)
5104 .kr(1)
5105 .sr(1)
5106 .m(6)
5107 .n(8)
5108 .k(k)
5109 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005110 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005111 }
5112 }
5113
Marat Dukhande06f492020-04-09 00:19:31 -07005114 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005115 TEST_REQUIRES_ARM_NEON_FMA;
5116 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005117 for (uint32_t n = 1; n <= 8; n++) {
5118 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005119 GemmMicrokernelTester()
5120 .mr(6)
5121 .nr(8)
5122 .kr(1)
5123 .sr(1)
5124 .m(m)
5125 .n(n)
5126 .k(k)
5127 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005128 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005129 }
5130 }
5131 }
5132 }
5133
Marat Dukhande06f492020-04-09 00:19:31 -07005134 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005135 TEST_REQUIRES_ARM_NEON_FMA;
5136 for (size_t k = 3; k < 4; k++) {
5137 GemmMicrokernelTester()
5138 .mr(6)
5139 .nr(8)
5140 .kr(1)
5141 .sr(1)
5142 .m(6)
5143 .n(8)
5144 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005145 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005146 }
5147 }
5148
Marat Dukhande06f492020-04-09 00:19:31 -07005149 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005150 TEST_REQUIRES_ARM_NEON_FMA;
5151 for (size_t k = 3; k < 4; k++) {
5152 GemmMicrokernelTester()
5153 .mr(6)
5154 .nr(8)
5155 .kr(1)
5156 .sr(1)
5157 .m(6)
5158 .n(8)
5159 .k(k)
5160 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005161 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005162 }
5163 }
5164
Marat Dukhande06f492020-04-09 00:19:31 -07005165 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005166 TEST_REQUIRES_ARM_NEON_FMA;
5167 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005168 for (uint32_t n = 1; n <= 8; n++) {
5169 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005170 GemmMicrokernelTester()
5171 .mr(6)
5172 .nr(8)
5173 .kr(1)
5174 .sr(1)
5175 .m(m)
5176 .n(n)
5177 .k(k)
5178 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005179 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005180 }
5181 }
5182 }
5183 }
5184
Marat Dukhande06f492020-04-09 00:19:31 -07005185 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005186 TEST_REQUIRES_ARM_NEON_FMA;
5187 for (size_t k = 4; k <= 20; k += 2) {
5188 GemmMicrokernelTester()
5189 .mr(6)
5190 .nr(8)
5191 .kr(1)
5192 .sr(1)
5193 .m(6)
5194 .n(8)
5195 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005196 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005197 }
5198 }
5199
Marat Dukhande06f492020-04-09 00:19:31 -07005200 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005201 TEST_REQUIRES_ARM_NEON_FMA;
5202 for (size_t k = 4; k <= 20; k += 2) {
5203 GemmMicrokernelTester()
5204 .mr(6)
5205 .nr(8)
5206 .kr(1)
5207 .sr(1)
5208 .m(6)
5209 .n(8)
5210 .k(k)
5211 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005212 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005213 }
5214 }
5215
Marat Dukhande06f492020-04-09 00:19:31 -07005216 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005217 TEST_REQUIRES_ARM_NEON_FMA;
5218 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005219 for (uint32_t n = 1; n <= 8; n++) {
5220 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005221 GemmMicrokernelTester()
5222 .mr(6)
5223 .nr(8)
5224 .kr(1)
5225 .sr(1)
5226 .m(m)
5227 .n(n)
5228 .k(k)
5229 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005230 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005231 }
5232 }
5233 }
5234 }
5235
Marat Dukhande06f492020-04-09 00:19:31 -07005236 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005237 TEST_REQUIRES_ARM_NEON_FMA;
5238 for (uint32_t n = 9; n < 16; n++) {
5239 for (size_t k = 1; k <= 10; k += 3) {
5240 GemmMicrokernelTester()
5241 .mr(6)
5242 .nr(8)
5243 .kr(1)
5244 .sr(1)
5245 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005246 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005247 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005248 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005249 }
5250 }
5251 }
5252
Marat Dukhande06f492020-04-09 00:19:31 -07005253 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005254 TEST_REQUIRES_ARM_NEON_FMA;
5255 for (uint32_t n = 9; n < 16; n++) {
5256 for (size_t k = 1; k <= 10; k += 3) {
5257 GemmMicrokernelTester()
5258 .mr(6)
5259 .nr(8)
5260 .kr(1)
5261 .sr(1)
5262 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005263 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005264 .k(k)
5265 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005266 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005267 }
5268 }
5269 }
5270
Marat Dukhande06f492020-04-09 00:19:31 -07005271 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005272 TEST_REQUIRES_ARM_NEON_FMA;
5273 for (uint32_t n = 9; n < 16; n++) {
5274 for (size_t k = 1; k <= 10; k += 3) {
5275 GemmMicrokernelTester()
5276 .mr(6)
5277 .nr(8)
5278 .kr(1)
5279 .sr(1)
5280 .m(6)
5281 .n(n)
5282 .k(k)
5283 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005284 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005285 }
5286 }
5287 }
5288
Marat Dukhande06f492020-04-09 00:19:31 -07005289 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005290 TEST_REQUIRES_ARM_NEON_FMA;
5291 for (uint32_t n = 9; n < 16; n++) {
5292 for (size_t k = 1; k <= 10; k += 3) {
5293 for (uint32_t m = 1; m <= 6; m++) {
5294 GemmMicrokernelTester()
5295 .mr(6)
5296 .nr(8)
5297 .kr(1)
5298 .sr(1)
5299 .m(m)
5300 .n(n)
5301 .k(k)
5302 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005303 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005304 }
5305 }
5306 }
5307 }
5308
Marat Dukhande06f492020-04-09 00:19:31 -07005309 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005310 TEST_REQUIRES_ARM_NEON_FMA;
5311 for (uint32_t n = 16; n <= 24; n += 8) {
5312 for (size_t k = 1; k <= 10; k += 3) {
5313 GemmMicrokernelTester()
5314 .mr(6)
5315 .nr(8)
5316 .kr(1)
5317 .sr(1)
5318 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005319 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005320 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005321 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005322 }
5323 }
5324 }
5325
Marat Dukhande06f492020-04-09 00:19:31 -07005326 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005327 TEST_REQUIRES_ARM_NEON_FMA;
5328 for (uint32_t n = 16; n <= 24; n += 8) {
5329 for (size_t k = 1; k <= 10; k += 3) {
5330 GemmMicrokernelTester()
5331 .mr(6)
5332 .nr(8)
5333 .kr(1)
5334 .sr(1)
5335 .m(6)
5336 .n(n)
5337 .k(k)
5338 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005339 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005340 }
5341 }
5342 }
5343
Marat Dukhande06f492020-04-09 00:19:31 -07005344 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005345 TEST_REQUIRES_ARM_NEON_FMA;
5346 for (uint32_t n = 16; n <= 24; n += 8) {
5347 for (size_t k = 1; k <= 10; k += 3) {
5348 GemmMicrokernelTester()
5349 .mr(6)
5350 .nr(8)
5351 .kr(1)
5352 .sr(1)
5353 .m(6)
5354 .n(n)
5355 .k(k)
5356 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005357 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005358 }
5359 }
5360 }
5361
Marat Dukhande06f492020-04-09 00:19:31 -07005362 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005363 TEST_REQUIRES_ARM_NEON_FMA;
5364 for (uint32_t n = 16; n <= 24; n += 8) {
5365 for (size_t k = 1; k <= 10; k += 3) {
5366 for (uint32_t m = 1; m <= 6; m++) {
5367 GemmMicrokernelTester()
5368 .mr(6)
5369 .nr(8)
5370 .kr(1)
5371 .sr(1)
5372 .m(m)
5373 .n(n)
5374 .k(k)
5375 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005376 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005377 }
5378 }
5379 }
5380 }
5381
Marat Dukhande06f492020-04-09 00:19:31 -07005382 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005383 TEST_REQUIRES_ARM_NEON_FMA;
5384 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005385 for (uint32_t n = 1; n <= 8; n++) {
5386 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005387 GemmMicrokernelTester()
5388 .mr(6)
5389 .nr(8)
5390 .kr(1)
5391 .sr(1)
5392 .m(m)
5393 .n(n)
5394 .k(k)
5395 .cm_stride(11)
5396 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005397 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005398 }
5399 }
5400 }
5401 }
5402
Marat Dukhande06f492020-04-09 00:19:31 -07005403 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005404 TEST_REQUIRES_ARM_NEON_FMA;
5405 GemmMicrokernelTester()
5406 .mr(6)
5407 .nr(8)
5408 .kr(1)
5409 .sr(1)
5410 .m(6)
5411 .n(8)
5412 .k(2)
5413 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005414 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005415 }
5416
Marat Dukhande06f492020-04-09 00:19:31 -07005417 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005418 TEST_REQUIRES_ARM_NEON_FMA;
5419 GemmMicrokernelTester()
5420 .mr(6)
5421 .nr(8)
5422 .kr(1)
5423 .sr(1)
5424 .m(6)
5425 .n(8)
5426 .k(2)
5427 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005428 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005429 }
5430
Marat Dukhande06f492020-04-09 00:19:31 -07005431 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005432 TEST_REQUIRES_ARM_NEON_FMA;
5433 GemmMicrokernelTester()
5434 .mr(6)
5435 .nr(8)
5436 .kr(1)
5437 .sr(1)
5438 .m(6)
5439 .n(8)
5440 .k(2)
5441 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005442 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005443 }
5444#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5445
5446
5447#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07005448 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005449 TEST_REQUIRES_ARM_NEON_FMA;
5450 GemmMicrokernelTester()
5451 .mr(6)
5452 .nr(8)
5453 .kr(1)
5454 .sr(1)
5455 .m(6)
5456 .n(8)
5457 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005458 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005459 }
5460
Marat Dukhande06f492020-04-09 00:19:31 -07005461 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005462 TEST_REQUIRES_ARM_NEON_FMA;
5463 GemmMicrokernelTester()
5464 .mr(6)
5465 .nr(8)
5466 .kr(1)
5467 .sr(1)
5468 .m(6)
5469 .n(8)
5470 .k(4)
5471 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005472 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005473 }
5474
Marat Dukhande06f492020-04-09 00:19:31 -07005475 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005476 TEST_REQUIRES_ARM_NEON_FMA;
5477 GemmMicrokernelTester()
5478 .mr(6)
5479 .nr(8)
5480 .kr(1)
5481 .sr(1)
5482 .m(6)
5483 .n(8)
5484 .k(4)
5485 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005486 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005487 }
5488
Marat Dukhande06f492020-04-09 00:19:31 -07005489 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005490 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005491 for (uint32_t n = 1; n <= 8; n++) {
5492 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005493 GemmMicrokernelTester()
5494 .mr(6)
5495 .nr(8)
5496 .kr(1)
5497 .sr(1)
5498 .m(m)
5499 .n(n)
5500 .k(4)
5501 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005502 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005503 }
5504 }
5505 }
5506
Marat Dukhande06f492020-04-09 00:19:31 -07005507 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005508 TEST_REQUIRES_ARM_NEON_FMA;
5509 for (uint32_t m = 1; m <= 6; m++) {
5510 GemmMicrokernelTester()
5511 .mr(6)
5512 .nr(8)
5513 .kr(1)
5514 .sr(1)
5515 .m(m)
5516 .n(8)
5517 .k(4)
5518 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005519 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005520 }
5521 }
5522
Marat Dukhande06f492020-04-09 00:19:31 -07005523 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005524 TEST_REQUIRES_ARM_NEON_FMA;
5525 for (uint32_t n = 1; n <= 8; n++) {
5526 GemmMicrokernelTester()
5527 .mr(6)
5528 .nr(8)
5529 .kr(1)
5530 .sr(1)
5531 .m(6)
5532 .n(n)
5533 .k(4)
5534 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005535 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005536 }
5537 }
5538
Marat Dukhande06f492020-04-09 00:19:31 -07005539 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005540 TEST_REQUIRES_ARM_NEON_FMA;
5541 for (size_t k = 1; k < 4; k++) {
5542 GemmMicrokernelTester()
5543 .mr(6)
5544 .nr(8)
5545 .kr(1)
5546 .sr(1)
5547 .m(6)
5548 .n(8)
5549 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005550 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005551 }
5552 }
5553
Marat Dukhande06f492020-04-09 00:19:31 -07005554 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005555 TEST_REQUIRES_ARM_NEON_FMA;
5556 for (size_t k = 1; k < 4; k++) {
5557 GemmMicrokernelTester()
5558 .mr(6)
5559 .nr(8)
5560 .kr(1)
5561 .sr(1)
5562 .m(6)
5563 .n(8)
5564 .k(k)
5565 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005566 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005567 }
5568 }
5569
Marat Dukhande06f492020-04-09 00:19:31 -07005570 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005571 TEST_REQUIRES_ARM_NEON_FMA;
5572 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005573 for (uint32_t n = 1; n <= 8; n++) {
5574 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005575 GemmMicrokernelTester()
5576 .mr(6)
5577 .nr(8)
5578 .kr(1)
5579 .sr(1)
5580 .m(m)
5581 .n(n)
5582 .k(k)
5583 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005584 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005585 }
5586 }
5587 }
5588 }
5589
Marat Dukhande06f492020-04-09 00:19:31 -07005590 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005591 TEST_REQUIRES_ARM_NEON_FMA;
5592 for (size_t k = 5; k < 8; k++) {
5593 GemmMicrokernelTester()
5594 .mr(6)
5595 .nr(8)
5596 .kr(1)
5597 .sr(1)
5598 .m(6)
5599 .n(8)
5600 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005601 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005602 }
5603 }
5604
Marat Dukhande06f492020-04-09 00:19:31 -07005605 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005606 TEST_REQUIRES_ARM_NEON_FMA;
5607 for (size_t k = 5; k < 8; k++) {
5608 GemmMicrokernelTester()
5609 .mr(6)
5610 .nr(8)
5611 .kr(1)
5612 .sr(1)
5613 .m(6)
5614 .n(8)
5615 .k(k)
5616 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005617 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005618 }
5619 }
5620
Marat Dukhande06f492020-04-09 00:19:31 -07005621 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005622 TEST_REQUIRES_ARM_NEON_FMA;
5623 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005624 for (uint32_t n = 1; n <= 8; n++) {
5625 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005626 GemmMicrokernelTester()
5627 .mr(6)
5628 .nr(8)
5629 .kr(1)
5630 .sr(1)
5631 .m(m)
5632 .n(n)
5633 .k(k)
5634 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005635 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005636 }
5637 }
5638 }
5639 }
5640
Marat Dukhande06f492020-04-09 00:19:31 -07005641 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005642 TEST_REQUIRES_ARM_NEON_FMA;
5643 for (size_t k = 8; k <= 40; k += 4) {
5644 GemmMicrokernelTester()
5645 .mr(6)
5646 .nr(8)
5647 .kr(1)
5648 .sr(1)
5649 .m(6)
5650 .n(8)
5651 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005652 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005653 }
5654 }
5655
Marat Dukhande06f492020-04-09 00:19:31 -07005656 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005657 TEST_REQUIRES_ARM_NEON_FMA;
5658 for (size_t k = 8; k <= 40; k += 4) {
5659 GemmMicrokernelTester()
5660 .mr(6)
5661 .nr(8)
5662 .kr(1)
5663 .sr(1)
5664 .m(6)
5665 .n(8)
5666 .k(k)
5667 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005668 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005669 }
5670 }
5671
Marat Dukhande06f492020-04-09 00:19:31 -07005672 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005673 TEST_REQUIRES_ARM_NEON_FMA;
5674 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005675 for (uint32_t n = 1; n <= 8; n++) {
5676 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005677 GemmMicrokernelTester()
5678 .mr(6)
5679 .nr(8)
5680 .kr(1)
5681 .sr(1)
5682 .m(m)
5683 .n(n)
5684 .k(k)
5685 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005686 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005687 }
5688 }
5689 }
5690 }
5691
Marat Dukhande06f492020-04-09 00:19:31 -07005692 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005693 TEST_REQUIRES_ARM_NEON_FMA;
5694 for (uint32_t n = 9; n < 16; n++) {
5695 for (size_t k = 1; k <= 20; k += 5) {
5696 GemmMicrokernelTester()
5697 .mr(6)
5698 .nr(8)
5699 .kr(1)
5700 .sr(1)
5701 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005702 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005703 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005704 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005705 }
5706 }
5707 }
5708
Marat Dukhande06f492020-04-09 00:19:31 -07005709 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005710 TEST_REQUIRES_ARM_NEON_FMA;
5711 for (uint32_t n = 9; n < 16; n++) {
5712 for (size_t k = 1; k <= 20; k += 5) {
5713 GemmMicrokernelTester()
5714 .mr(6)
5715 .nr(8)
5716 .kr(1)
5717 .sr(1)
5718 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005719 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005720 .k(k)
5721 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005722 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005723 }
5724 }
5725 }
5726
Marat Dukhande06f492020-04-09 00:19:31 -07005727 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005728 TEST_REQUIRES_ARM_NEON_FMA;
5729 for (uint32_t n = 9; n < 16; n++) {
5730 for (size_t k = 1; k <= 20; k += 5) {
5731 GemmMicrokernelTester()
5732 .mr(6)
5733 .nr(8)
5734 .kr(1)
5735 .sr(1)
5736 .m(6)
5737 .n(n)
5738 .k(k)
5739 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005740 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005741 }
5742 }
5743 }
5744
Marat Dukhande06f492020-04-09 00:19:31 -07005745 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005746 TEST_REQUIRES_ARM_NEON_FMA;
5747 for (uint32_t n = 9; n < 16; n++) {
5748 for (size_t k = 1; k <= 20; k += 5) {
5749 for (uint32_t m = 1; m <= 6; m++) {
5750 GemmMicrokernelTester()
5751 .mr(6)
5752 .nr(8)
5753 .kr(1)
5754 .sr(1)
5755 .m(m)
5756 .n(n)
5757 .k(k)
5758 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005759 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005760 }
5761 }
5762 }
5763 }
5764
Marat Dukhande06f492020-04-09 00:19:31 -07005765 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005766 TEST_REQUIRES_ARM_NEON_FMA;
5767 for (uint32_t n = 16; n <= 24; n += 8) {
5768 for (size_t k = 1; k <= 20; k += 5) {
5769 GemmMicrokernelTester()
5770 .mr(6)
5771 .nr(8)
5772 .kr(1)
5773 .sr(1)
5774 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005775 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005776 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005777 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005778 }
5779 }
5780 }
5781
Marat Dukhande06f492020-04-09 00:19:31 -07005782 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005783 TEST_REQUIRES_ARM_NEON_FMA;
5784 for (uint32_t n = 16; n <= 24; n += 8) {
5785 for (size_t k = 1; k <= 20; k += 5) {
5786 GemmMicrokernelTester()
5787 .mr(6)
5788 .nr(8)
5789 .kr(1)
5790 .sr(1)
5791 .m(6)
5792 .n(n)
5793 .k(k)
5794 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005795 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005796 }
5797 }
5798 }
5799
Marat Dukhande06f492020-04-09 00:19:31 -07005800 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005801 TEST_REQUIRES_ARM_NEON_FMA;
5802 for (uint32_t n = 16; n <= 24; n += 8) {
5803 for (size_t k = 1; k <= 20; k += 5) {
5804 GemmMicrokernelTester()
5805 .mr(6)
5806 .nr(8)
5807 .kr(1)
5808 .sr(1)
5809 .m(6)
5810 .n(n)
5811 .k(k)
5812 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005813 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005814 }
5815 }
5816 }
5817
Marat Dukhande06f492020-04-09 00:19:31 -07005818 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005819 TEST_REQUIRES_ARM_NEON_FMA;
5820 for (uint32_t n = 16; n <= 24; n += 8) {
5821 for (size_t k = 1; k <= 20; k += 5) {
5822 for (uint32_t m = 1; m <= 6; m++) {
5823 GemmMicrokernelTester()
5824 .mr(6)
5825 .nr(8)
5826 .kr(1)
5827 .sr(1)
5828 .m(m)
5829 .n(n)
5830 .k(k)
5831 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005832 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005833 }
5834 }
5835 }
5836 }
5837
Marat Dukhande06f492020-04-09 00:19:31 -07005838 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005839 TEST_REQUIRES_ARM_NEON_FMA;
5840 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005841 for (uint32_t n = 1; n <= 8; n++) {
5842 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005843 GemmMicrokernelTester()
5844 .mr(6)
5845 .nr(8)
5846 .kr(1)
5847 .sr(1)
5848 .m(m)
5849 .n(n)
5850 .k(k)
5851 .cm_stride(11)
5852 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005853 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005854 }
5855 }
5856 }
5857 }
5858
Marat Dukhande06f492020-04-09 00:19:31 -07005859 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005860 TEST_REQUIRES_ARM_NEON_FMA;
5861 GemmMicrokernelTester()
5862 .mr(6)
5863 .nr(8)
5864 .kr(1)
5865 .sr(1)
5866 .m(6)
5867 .n(8)
5868 .k(4)
5869 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005870 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005871 }
5872
Marat Dukhande06f492020-04-09 00:19:31 -07005873 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005874 TEST_REQUIRES_ARM_NEON_FMA;
5875 GemmMicrokernelTester()
5876 .mr(6)
5877 .nr(8)
5878 .kr(1)
5879 .sr(1)
5880 .m(6)
5881 .n(8)
5882 .k(4)
5883 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005884 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005885 }
5886
Marat Dukhande06f492020-04-09 00:19:31 -07005887 TEST(F32_GEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005888 TEST_REQUIRES_ARM_NEON_FMA;
5889 GemmMicrokernelTester()
5890 .mr(6)
5891 .nr(8)
5892 .kr(1)
5893 .sr(1)
5894 .m(6)
5895 .n(8)
5896 .k(4)
5897 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005898 .Test(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005899 }
5900#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5901
5902
5903#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07005904 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005905 TEST_REQUIRES_ARM_NEON;
5906 GemmMicrokernelTester()
5907 .mr(1)
5908 .nr(8)
5909 .kr(1)
5910 .sr(1)
5911 .m(1)
5912 .n(8)
5913 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005914 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005915 }
5916
Marat Dukhande06f492020-04-09 00:19:31 -07005917 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005918 TEST_REQUIRES_ARM_NEON;
5919 GemmMicrokernelTester()
5920 .mr(1)
5921 .nr(8)
5922 .kr(1)
5923 .sr(1)
5924 .m(1)
5925 .n(8)
5926 .k(2)
5927 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005928 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005929 }
5930
Marat Dukhande06f492020-04-09 00:19:31 -07005931 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005932 TEST_REQUIRES_ARM_NEON;
5933 GemmMicrokernelTester()
5934 .mr(1)
5935 .nr(8)
5936 .kr(1)
5937 .sr(1)
5938 .m(1)
5939 .n(8)
5940 .k(2)
5941 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005942 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005943 }
5944
Marat Dukhande06f492020-04-09 00:19:31 -07005945 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005946 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005947 for (uint32_t n = 1; n <= 8; n++) {
5948 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005949 GemmMicrokernelTester()
5950 .mr(1)
5951 .nr(8)
5952 .kr(1)
5953 .sr(1)
5954 .m(m)
5955 .n(n)
5956 .k(2)
5957 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005958 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005959 }
5960 }
5961 }
5962
Marat Dukhande06f492020-04-09 00:19:31 -07005963 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005964 TEST_REQUIRES_ARM_NEON;
5965 for (uint32_t m = 1; m <= 1; m++) {
5966 GemmMicrokernelTester()
5967 .mr(1)
5968 .nr(8)
5969 .kr(1)
5970 .sr(1)
5971 .m(m)
5972 .n(8)
5973 .k(2)
5974 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005975 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005976 }
5977 }
5978
Marat Dukhande06f492020-04-09 00:19:31 -07005979 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005980 TEST_REQUIRES_ARM_NEON;
5981 for (uint32_t n = 1; n <= 8; n++) {
5982 GemmMicrokernelTester()
5983 .mr(1)
5984 .nr(8)
5985 .kr(1)
5986 .sr(1)
5987 .m(1)
5988 .n(n)
5989 .k(2)
5990 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005991 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005992 }
5993 }
5994
Marat Dukhande06f492020-04-09 00:19:31 -07005995 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005996 TEST_REQUIRES_ARM_NEON;
5997 for (size_t k = 1; k < 2; k++) {
5998 GemmMicrokernelTester()
5999 .mr(1)
6000 .nr(8)
6001 .kr(1)
6002 .sr(1)
6003 .m(1)
6004 .n(8)
6005 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006006 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006007 }
6008 }
6009
Marat Dukhande06f492020-04-09 00:19:31 -07006010 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006011 TEST_REQUIRES_ARM_NEON;
6012 for (size_t k = 1; k < 2; k++) {
6013 GemmMicrokernelTester()
6014 .mr(1)
6015 .nr(8)
6016 .kr(1)
6017 .sr(1)
6018 .m(1)
6019 .n(8)
6020 .k(k)
6021 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006022 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006023 }
6024 }
6025
Marat Dukhande06f492020-04-09 00:19:31 -07006026 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006027 TEST_REQUIRES_ARM_NEON;
6028 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006029 for (uint32_t n = 1; n <= 8; n++) {
6030 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006031 GemmMicrokernelTester()
6032 .mr(1)
6033 .nr(8)
6034 .kr(1)
6035 .sr(1)
6036 .m(m)
6037 .n(n)
6038 .k(k)
6039 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006040 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006041 }
6042 }
6043 }
6044 }
6045
Marat Dukhande06f492020-04-09 00:19:31 -07006046 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006047 TEST_REQUIRES_ARM_NEON;
6048 for (size_t k = 3; k < 4; k++) {
6049 GemmMicrokernelTester()
6050 .mr(1)
6051 .nr(8)
6052 .kr(1)
6053 .sr(1)
6054 .m(1)
6055 .n(8)
6056 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006057 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006058 }
6059 }
6060
Marat Dukhande06f492020-04-09 00:19:31 -07006061 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006062 TEST_REQUIRES_ARM_NEON;
6063 for (size_t k = 3; k < 4; k++) {
6064 GemmMicrokernelTester()
6065 .mr(1)
6066 .nr(8)
6067 .kr(1)
6068 .sr(1)
6069 .m(1)
6070 .n(8)
6071 .k(k)
6072 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006073 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006074 }
6075 }
6076
Marat Dukhande06f492020-04-09 00:19:31 -07006077 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006078 TEST_REQUIRES_ARM_NEON;
6079 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006080 for (uint32_t n = 1; n <= 8; n++) {
6081 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006082 GemmMicrokernelTester()
6083 .mr(1)
6084 .nr(8)
6085 .kr(1)
6086 .sr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006091 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006092 }
6093 }
6094 }
6095 }
6096
Marat Dukhande06f492020-04-09 00:19:31 -07006097 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006098 TEST_REQUIRES_ARM_NEON;
6099 for (size_t k = 4; k <= 20; k += 2) {
6100 GemmMicrokernelTester()
6101 .mr(1)
6102 .nr(8)
6103 .kr(1)
6104 .sr(1)
6105 .m(1)
6106 .n(8)
6107 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006108 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006109 }
6110 }
6111
Marat Dukhande06f492020-04-09 00:19:31 -07006112 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006113 TEST_REQUIRES_ARM_NEON;
6114 for (size_t k = 4; k <= 20; k += 2) {
6115 GemmMicrokernelTester()
6116 .mr(1)
6117 .nr(8)
6118 .kr(1)
6119 .sr(1)
6120 .m(1)
6121 .n(8)
6122 .k(k)
6123 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006124 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006125 }
6126 }
6127
Marat Dukhande06f492020-04-09 00:19:31 -07006128 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006129 TEST_REQUIRES_ARM_NEON;
6130 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006131 for (uint32_t n = 1; n <= 8; n++) {
6132 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006133 GemmMicrokernelTester()
6134 .mr(1)
6135 .nr(8)
6136 .kr(1)
6137 .sr(1)
6138 .m(m)
6139 .n(n)
6140 .k(k)
6141 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006142 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006143 }
6144 }
6145 }
6146 }
6147
Marat Dukhande06f492020-04-09 00:19:31 -07006148 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006149 TEST_REQUIRES_ARM_NEON;
6150 for (uint32_t n = 9; n < 16; n++) {
6151 for (size_t k = 1; k <= 10; k += 3) {
6152 GemmMicrokernelTester()
6153 .mr(1)
6154 .nr(8)
6155 .kr(1)
6156 .sr(1)
6157 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006158 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006159 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006160 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006161 }
6162 }
6163 }
6164
Marat Dukhande06f492020-04-09 00:19:31 -07006165 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006166 TEST_REQUIRES_ARM_NEON;
6167 for (uint32_t n = 9; n < 16; n++) {
6168 for (size_t k = 1; k <= 10; k += 3) {
6169 GemmMicrokernelTester()
6170 .mr(1)
6171 .nr(8)
6172 .kr(1)
6173 .sr(1)
6174 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006175 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006176 .k(k)
6177 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006178 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006179 }
6180 }
6181 }
6182
Marat Dukhande06f492020-04-09 00:19:31 -07006183 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006184 TEST_REQUIRES_ARM_NEON;
6185 for (uint32_t n = 9; n < 16; n++) {
6186 for (size_t k = 1; k <= 10; k += 3) {
6187 GemmMicrokernelTester()
6188 .mr(1)
6189 .nr(8)
6190 .kr(1)
6191 .sr(1)
6192 .m(1)
6193 .n(n)
6194 .k(k)
6195 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006196 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006197 }
6198 }
6199 }
6200
Marat Dukhande06f492020-04-09 00:19:31 -07006201 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006202 TEST_REQUIRES_ARM_NEON;
6203 for (uint32_t n = 9; n < 16; n++) {
6204 for (size_t k = 1; k <= 10; k += 3) {
6205 for (uint32_t m = 1; m <= 1; m++) {
6206 GemmMicrokernelTester()
6207 .mr(1)
6208 .nr(8)
6209 .kr(1)
6210 .sr(1)
6211 .m(m)
6212 .n(n)
6213 .k(k)
6214 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006215 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006216 }
6217 }
6218 }
6219 }
6220
Marat Dukhande06f492020-04-09 00:19:31 -07006221 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006222 TEST_REQUIRES_ARM_NEON;
6223 for (uint32_t n = 16; n <= 24; n += 8) {
6224 for (size_t k = 1; k <= 10; k += 3) {
6225 GemmMicrokernelTester()
6226 .mr(1)
6227 .nr(8)
6228 .kr(1)
6229 .sr(1)
6230 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006231 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006232 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006233 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006234 }
6235 }
6236 }
6237
Marat Dukhande06f492020-04-09 00:19:31 -07006238 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006239 TEST_REQUIRES_ARM_NEON;
6240 for (uint32_t n = 16; n <= 24; n += 8) {
6241 for (size_t k = 1; k <= 10; k += 3) {
6242 GemmMicrokernelTester()
6243 .mr(1)
6244 .nr(8)
6245 .kr(1)
6246 .sr(1)
6247 .m(1)
6248 .n(n)
6249 .k(k)
6250 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006251 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006252 }
6253 }
6254 }
6255
Marat Dukhande06f492020-04-09 00:19:31 -07006256 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006257 TEST_REQUIRES_ARM_NEON;
6258 for (uint32_t n = 16; n <= 24; n += 8) {
6259 for (size_t k = 1; k <= 10; k += 3) {
6260 GemmMicrokernelTester()
6261 .mr(1)
6262 .nr(8)
6263 .kr(1)
6264 .sr(1)
6265 .m(1)
6266 .n(n)
6267 .k(k)
6268 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006269 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006270 }
6271 }
6272 }
6273
Marat Dukhande06f492020-04-09 00:19:31 -07006274 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006275 TEST_REQUIRES_ARM_NEON;
6276 for (uint32_t n = 16; n <= 24; n += 8) {
6277 for (size_t k = 1; k <= 10; k += 3) {
6278 for (uint32_t m = 1; m <= 1; m++) {
6279 GemmMicrokernelTester()
6280 .mr(1)
6281 .nr(8)
6282 .kr(1)
6283 .sr(1)
6284 .m(m)
6285 .n(n)
6286 .k(k)
6287 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006288 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006289 }
6290 }
6291 }
6292 }
6293
Marat Dukhande06f492020-04-09 00:19:31 -07006294 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006295 TEST_REQUIRES_ARM_NEON;
6296 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006297 for (uint32_t n = 1; n <= 8; n++) {
6298 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006299 GemmMicrokernelTester()
6300 .mr(1)
6301 .nr(8)
6302 .kr(1)
6303 .sr(1)
6304 .m(m)
6305 .n(n)
6306 .k(k)
6307 .cm_stride(11)
6308 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006309 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006310 }
6311 }
6312 }
6313 }
6314
Marat Dukhande06f492020-04-09 00:19:31 -07006315 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006316 TEST_REQUIRES_ARM_NEON;
6317 GemmMicrokernelTester()
6318 .mr(1)
6319 .nr(8)
6320 .kr(1)
6321 .sr(1)
6322 .m(1)
6323 .n(8)
6324 .k(2)
6325 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006326 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006327 }
6328
Marat Dukhande06f492020-04-09 00:19:31 -07006329 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006330 TEST_REQUIRES_ARM_NEON;
6331 GemmMicrokernelTester()
6332 .mr(1)
6333 .nr(8)
6334 .kr(1)
6335 .sr(1)
6336 .m(1)
6337 .n(8)
6338 .k(2)
6339 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006340 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006341 }
6342
Marat Dukhande06f492020-04-09 00:19:31 -07006343 TEST(F32_GEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006344 TEST_REQUIRES_ARM_NEON;
6345 GemmMicrokernelTester()
6346 .mr(1)
6347 .nr(8)
6348 .kr(1)
6349 .sr(1)
6350 .m(1)
6351 .n(8)
6352 .k(2)
6353 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006354 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006355 }
6356#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6357
6358
6359#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07006360 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006361 TEST_REQUIRES_ARM_NEON;
6362 GemmMicrokernelTester()
6363 .mr(4)
6364 .nr(8)
6365 .kr(1)
6366 .sr(1)
6367 .m(4)
6368 .n(8)
6369 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006370 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006371 }
6372
Marat Dukhande06f492020-04-09 00:19:31 -07006373 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006374 TEST_REQUIRES_ARM_NEON;
6375 GemmMicrokernelTester()
6376 .mr(4)
6377 .nr(8)
6378 .kr(1)
6379 .sr(1)
6380 .m(4)
6381 .n(8)
6382 .k(2)
6383 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006384 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006385 }
6386
Marat Dukhande06f492020-04-09 00:19:31 -07006387 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006388 TEST_REQUIRES_ARM_NEON;
6389 GemmMicrokernelTester()
6390 .mr(4)
6391 .nr(8)
6392 .kr(1)
6393 .sr(1)
6394 .m(4)
6395 .n(8)
6396 .k(2)
6397 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006398 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006399 }
6400
Marat Dukhande06f492020-04-09 00:19:31 -07006401 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006402 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006403 for (uint32_t n = 1; n <= 8; n++) {
6404 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006405 GemmMicrokernelTester()
6406 .mr(4)
6407 .nr(8)
6408 .kr(1)
6409 .sr(1)
6410 .m(m)
6411 .n(n)
6412 .k(2)
6413 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006414 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006415 }
6416 }
6417 }
6418
Marat Dukhande06f492020-04-09 00:19:31 -07006419 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006420 TEST_REQUIRES_ARM_NEON;
6421 for (uint32_t m = 1; m <= 4; m++) {
6422 GemmMicrokernelTester()
6423 .mr(4)
6424 .nr(8)
6425 .kr(1)
6426 .sr(1)
6427 .m(m)
6428 .n(8)
6429 .k(2)
6430 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006431 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006432 }
6433 }
6434
Marat Dukhande06f492020-04-09 00:19:31 -07006435 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006436 TEST_REQUIRES_ARM_NEON;
6437 for (uint32_t n = 1; n <= 8; n++) {
6438 GemmMicrokernelTester()
6439 .mr(4)
6440 .nr(8)
6441 .kr(1)
6442 .sr(1)
6443 .m(4)
6444 .n(n)
6445 .k(2)
6446 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006447 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006448 }
6449 }
6450
Marat Dukhande06f492020-04-09 00:19:31 -07006451 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006452 TEST_REQUIRES_ARM_NEON;
6453 for (size_t k = 1; k < 2; k++) {
6454 GemmMicrokernelTester()
6455 .mr(4)
6456 .nr(8)
6457 .kr(1)
6458 .sr(1)
6459 .m(4)
6460 .n(8)
6461 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006462 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006463 }
6464 }
6465
Marat Dukhande06f492020-04-09 00:19:31 -07006466 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006467 TEST_REQUIRES_ARM_NEON;
6468 for (size_t k = 1; k < 2; k++) {
6469 GemmMicrokernelTester()
6470 .mr(4)
6471 .nr(8)
6472 .kr(1)
6473 .sr(1)
6474 .m(4)
6475 .n(8)
6476 .k(k)
6477 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006478 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006479 }
6480 }
6481
Marat Dukhande06f492020-04-09 00:19:31 -07006482 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006483 TEST_REQUIRES_ARM_NEON;
6484 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006485 for (uint32_t n = 1; n <= 8; n++) {
6486 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006487 GemmMicrokernelTester()
6488 .mr(4)
6489 .nr(8)
6490 .kr(1)
6491 .sr(1)
6492 .m(m)
6493 .n(n)
6494 .k(k)
6495 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006496 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006497 }
6498 }
6499 }
6500 }
6501
Marat Dukhande06f492020-04-09 00:19:31 -07006502 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006503 TEST_REQUIRES_ARM_NEON;
6504 for (size_t k = 3; k < 4; k++) {
6505 GemmMicrokernelTester()
6506 .mr(4)
6507 .nr(8)
6508 .kr(1)
6509 .sr(1)
6510 .m(4)
6511 .n(8)
6512 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006513 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006514 }
6515 }
6516
Marat Dukhande06f492020-04-09 00:19:31 -07006517 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006518 TEST_REQUIRES_ARM_NEON;
6519 for (size_t k = 3; k < 4; k++) {
6520 GemmMicrokernelTester()
6521 .mr(4)
6522 .nr(8)
6523 .kr(1)
6524 .sr(1)
6525 .m(4)
6526 .n(8)
6527 .k(k)
6528 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006529 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006530 }
6531 }
6532
Marat Dukhande06f492020-04-09 00:19:31 -07006533 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006534 TEST_REQUIRES_ARM_NEON;
6535 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006536 for (uint32_t n = 1; n <= 8; n++) {
6537 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006538 GemmMicrokernelTester()
6539 .mr(4)
6540 .nr(8)
6541 .kr(1)
6542 .sr(1)
6543 .m(m)
6544 .n(n)
6545 .k(k)
6546 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006547 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006548 }
6549 }
6550 }
6551 }
6552
Marat Dukhande06f492020-04-09 00:19:31 -07006553 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006554 TEST_REQUIRES_ARM_NEON;
6555 for (size_t k = 4; k <= 20; k += 2) {
6556 GemmMicrokernelTester()
6557 .mr(4)
6558 .nr(8)
6559 .kr(1)
6560 .sr(1)
6561 .m(4)
6562 .n(8)
6563 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006564 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006565 }
6566 }
6567
Marat Dukhande06f492020-04-09 00:19:31 -07006568 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006569 TEST_REQUIRES_ARM_NEON;
6570 for (size_t k = 4; k <= 20; k += 2) {
6571 GemmMicrokernelTester()
6572 .mr(4)
6573 .nr(8)
6574 .kr(1)
6575 .sr(1)
6576 .m(4)
6577 .n(8)
6578 .k(k)
6579 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006580 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006581 }
6582 }
6583
Marat Dukhande06f492020-04-09 00:19:31 -07006584 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006585 TEST_REQUIRES_ARM_NEON;
6586 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006587 for (uint32_t n = 1; n <= 8; n++) {
6588 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006589 GemmMicrokernelTester()
6590 .mr(4)
6591 .nr(8)
6592 .kr(1)
6593 .sr(1)
6594 .m(m)
6595 .n(n)
6596 .k(k)
6597 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006598 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006599 }
6600 }
6601 }
6602 }
6603
Marat Dukhande06f492020-04-09 00:19:31 -07006604 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006605 TEST_REQUIRES_ARM_NEON;
6606 for (uint32_t n = 9; n < 16; n++) {
6607 for (size_t k = 1; k <= 10; k += 3) {
6608 GemmMicrokernelTester()
6609 .mr(4)
6610 .nr(8)
6611 .kr(1)
6612 .sr(1)
6613 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006614 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006615 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006616 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006617 }
6618 }
6619 }
6620
Marat Dukhande06f492020-04-09 00:19:31 -07006621 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006622 TEST_REQUIRES_ARM_NEON;
6623 for (uint32_t n = 9; n < 16; n++) {
6624 for (size_t k = 1; k <= 10; k += 3) {
6625 GemmMicrokernelTester()
6626 .mr(4)
6627 .nr(8)
6628 .kr(1)
6629 .sr(1)
6630 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006631 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006632 .k(k)
6633 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006634 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006635 }
6636 }
6637 }
6638
Marat Dukhande06f492020-04-09 00:19:31 -07006639 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006640 TEST_REQUIRES_ARM_NEON;
6641 for (uint32_t n = 9; n < 16; n++) {
6642 for (size_t k = 1; k <= 10; k += 3) {
6643 GemmMicrokernelTester()
6644 .mr(4)
6645 .nr(8)
6646 .kr(1)
6647 .sr(1)
6648 .m(4)
6649 .n(n)
6650 .k(k)
6651 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006652 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006653 }
6654 }
6655 }
6656
Marat Dukhande06f492020-04-09 00:19:31 -07006657 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006658 TEST_REQUIRES_ARM_NEON;
6659 for (uint32_t n = 9; n < 16; n++) {
6660 for (size_t k = 1; k <= 10; k += 3) {
6661 for (uint32_t m = 1; m <= 4; m++) {
6662 GemmMicrokernelTester()
6663 .mr(4)
6664 .nr(8)
6665 .kr(1)
6666 .sr(1)
6667 .m(m)
6668 .n(n)
6669 .k(k)
6670 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006671 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006672 }
6673 }
6674 }
6675 }
6676
Marat Dukhande06f492020-04-09 00:19:31 -07006677 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006678 TEST_REQUIRES_ARM_NEON;
6679 for (uint32_t n = 16; n <= 24; n += 8) {
6680 for (size_t k = 1; k <= 10; k += 3) {
6681 GemmMicrokernelTester()
6682 .mr(4)
6683 .nr(8)
6684 .kr(1)
6685 .sr(1)
6686 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006687 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006688 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006689 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006690 }
6691 }
6692 }
6693
Marat Dukhande06f492020-04-09 00:19:31 -07006694 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006695 TEST_REQUIRES_ARM_NEON;
6696 for (uint32_t n = 16; n <= 24; n += 8) {
6697 for (size_t k = 1; k <= 10; k += 3) {
6698 GemmMicrokernelTester()
6699 .mr(4)
6700 .nr(8)
6701 .kr(1)
6702 .sr(1)
6703 .m(4)
6704 .n(n)
6705 .k(k)
6706 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006707 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006708 }
6709 }
6710 }
6711
Marat Dukhande06f492020-04-09 00:19:31 -07006712 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006713 TEST_REQUIRES_ARM_NEON;
6714 for (uint32_t n = 16; n <= 24; n += 8) {
6715 for (size_t k = 1; k <= 10; k += 3) {
6716 GemmMicrokernelTester()
6717 .mr(4)
6718 .nr(8)
6719 .kr(1)
6720 .sr(1)
6721 .m(4)
6722 .n(n)
6723 .k(k)
6724 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006725 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006726 }
6727 }
6728 }
6729
Marat Dukhande06f492020-04-09 00:19:31 -07006730 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006731 TEST_REQUIRES_ARM_NEON;
6732 for (uint32_t n = 16; n <= 24; n += 8) {
6733 for (size_t k = 1; k <= 10; k += 3) {
6734 for (uint32_t m = 1; m <= 4; m++) {
6735 GemmMicrokernelTester()
6736 .mr(4)
6737 .nr(8)
6738 .kr(1)
6739 .sr(1)
6740 .m(m)
6741 .n(n)
6742 .k(k)
6743 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006744 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006745 }
6746 }
6747 }
6748 }
6749
Marat Dukhande06f492020-04-09 00:19:31 -07006750 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006751 TEST_REQUIRES_ARM_NEON;
6752 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006753 for (uint32_t n = 1; n <= 8; n++) {
6754 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006755 GemmMicrokernelTester()
6756 .mr(4)
6757 .nr(8)
6758 .kr(1)
6759 .sr(1)
6760 .m(m)
6761 .n(n)
6762 .k(k)
6763 .cm_stride(11)
6764 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006765 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006766 }
6767 }
6768 }
6769 }
6770
Marat Dukhande06f492020-04-09 00:19:31 -07006771 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006772 TEST_REQUIRES_ARM_NEON;
6773 GemmMicrokernelTester()
6774 .mr(4)
6775 .nr(8)
6776 .kr(1)
6777 .sr(1)
6778 .m(4)
6779 .n(8)
6780 .k(2)
6781 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006782 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006783 }
6784
Marat Dukhande06f492020-04-09 00:19:31 -07006785 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006786 TEST_REQUIRES_ARM_NEON;
6787 GemmMicrokernelTester()
6788 .mr(4)
6789 .nr(8)
6790 .kr(1)
6791 .sr(1)
6792 .m(4)
6793 .n(8)
6794 .k(2)
6795 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006796 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006797 }
6798
Marat Dukhande06f492020-04-09 00:19:31 -07006799 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006800 TEST_REQUIRES_ARM_NEON;
6801 GemmMicrokernelTester()
6802 .mr(4)
6803 .nr(8)
6804 .kr(1)
6805 .sr(1)
6806 .m(4)
6807 .n(8)
6808 .k(2)
6809 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006810 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006811 }
6812#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6813
6814
6815#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07006816 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006817 TEST_REQUIRES_ARM_NEON;
6818 GemmMicrokernelTester()
6819 .mr(4)
6820 .nr(8)
6821 .kr(1)
6822 .sr(1)
6823 .m(4)
6824 .n(8)
6825 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006826 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006827 }
6828
Marat Dukhande06f492020-04-09 00:19:31 -07006829 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006830 TEST_REQUIRES_ARM_NEON;
6831 GemmMicrokernelTester()
6832 .mr(4)
6833 .nr(8)
6834 .kr(1)
6835 .sr(1)
6836 .m(4)
6837 .n(8)
6838 .k(4)
6839 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006840 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006841 }
6842
Marat Dukhande06f492020-04-09 00:19:31 -07006843 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006844 TEST_REQUIRES_ARM_NEON;
6845 GemmMicrokernelTester()
6846 .mr(4)
6847 .nr(8)
6848 .kr(1)
6849 .sr(1)
6850 .m(4)
6851 .n(8)
6852 .k(4)
6853 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006854 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006855 }
6856
Marat Dukhande06f492020-04-09 00:19:31 -07006857 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006858 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006859 for (uint32_t n = 1; n <= 8; n++) {
6860 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006861 GemmMicrokernelTester()
6862 .mr(4)
6863 .nr(8)
6864 .kr(1)
6865 .sr(1)
6866 .m(m)
6867 .n(n)
6868 .k(4)
6869 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006870 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006871 }
6872 }
6873 }
6874
Marat Dukhande06f492020-04-09 00:19:31 -07006875 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006876 TEST_REQUIRES_ARM_NEON;
6877 for (uint32_t m = 1; m <= 4; m++) {
6878 GemmMicrokernelTester()
6879 .mr(4)
6880 .nr(8)
6881 .kr(1)
6882 .sr(1)
6883 .m(m)
6884 .n(8)
6885 .k(4)
6886 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006887 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006888 }
6889 }
6890
Marat Dukhande06f492020-04-09 00:19:31 -07006891 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006892 TEST_REQUIRES_ARM_NEON;
6893 for (uint32_t n = 1; n <= 8; n++) {
6894 GemmMicrokernelTester()
6895 .mr(4)
6896 .nr(8)
6897 .kr(1)
6898 .sr(1)
6899 .m(4)
6900 .n(n)
6901 .k(4)
6902 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006903 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006904 }
6905 }
6906
Marat Dukhande06f492020-04-09 00:19:31 -07006907 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006908 TEST_REQUIRES_ARM_NEON;
6909 for (size_t k = 1; k < 4; k++) {
6910 GemmMicrokernelTester()
6911 .mr(4)
6912 .nr(8)
6913 .kr(1)
6914 .sr(1)
6915 .m(4)
6916 .n(8)
6917 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006918 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006919 }
6920 }
6921
Marat Dukhande06f492020-04-09 00:19:31 -07006922 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006923 TEST_REQUIRES_ARM_NEON;
6924 for (size_t k = 1; k < 4; k++) {
6925 GemmMicrokernelTester()
6926 .mr(4)
6927 .nr(8)
6928 .kr(1)
6929 .sr(1)
6930 .m(4)
6931 .n(8)
6932 .k(k)
6933 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006934 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006935 }
6936 }
6937
Marat Dukhande06f492020-04-09 00:19:31 -07006938 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006939 TEST_REQUIRES_ARM_NEON;
6940 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006941 for (uint32_t n = 1; n <= 8; n++) {
6942 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006943 GemmMicrokernelTester()
6944 .mr(4)
6945 .nr(8)
6946 .kr(1)
6947 .sr(1)
6948 .m(m)
6949 .n(n)
6950 .k(k)
6951 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006952 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006953 }
6954 }
6955 }
6956 }
6957
Marat Dukhande06f492020-04-09 00:19:31 -07006958 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006959 TEST_REQUIRES_ARM_NEON;
6960 for (size_t k = 5; k < 8; k++) {
6961 GemmMicrokernelTester()
6962 .mr(4)
6963 .nr(8)
6964 .kr(1)
6965 .sr(1)
6966 .m(4)
6967 .n(8)
6968 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006969 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006970 }
6971 }
6972
Marat Dukhande06f492020-04-09 00:19:31 -07006973 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006974 TEST_REQUIRES_ARM_NEON;
6975 for (size_t k = 5; k < 8; k++) {
6976 GemmMicrokernelTester()
6977 .mr(4)
6978 .nr(8)
6979 .kr(1)
6980 .sr(1)
6981 .m(4)
6982 .n(8)
6983 .k(k)
6984 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006985 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006986 }
6987 }
6988
Marat Dukhande06f492020-04-09 00:19:31 -07006989 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006990 TEST_REQUIRES_ARM_NEON;
6991 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006992 for (uint32_t n = 1; n <= 8; n++) {
6993 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006994 GemmMicrokernelTester()
6995 .mr(4)
6996 .nr(8)
6997 .kr(1)
6998 .sr(1)
6999 .m(m)
7000 .n(n)
7001 .k(k)
7002 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007003 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007004 }
7005 }
7006 }
7007 }
7008
Marat Dukhande06f492020-04-09 00:19:31 -07007009 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007010 TEST_REQUIRES_ARM_NEON;
7011 for (size_t k = 8; k <= 40; k += 4) {
7012 GemmMicrokernelTester()
7013 .mr(4)
7014 .nr(8)
7015 .kr(1)
7016 .sr(1)
7017 .m(4)
7018 .n(8)
7019 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007020 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007021 }
7022 }
7023
Marat Dukhande06f492020-04-09 00:19:31 -07007024 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007025 TEST_REQUIRES_ARM_NEON;
7026 for (size_t k = 8; k <= 40; k += 4) {
7027 GemmMicrokernelTester()
7028 .mr(4)
7029 .nr(8)
7030 .kr(1)
7031 .sr(1)
7032 .m(4)
7033 .n(8)
7034 .k(k)
7035 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007036 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007037 }
7038 }
7039
Marat Dukhande06f492020-04-09 00:19:31 -07007040 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007041 TEST_REQUIRES_ARM_NEON;
7042 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007043 for (uint32_t n = 1; n <= 8; n++) {
7044 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007045 GemmMicrokernelTester()
7046 .mr(4)
7047 .nr(8)
7048 .kr(1)
7049 .sr(1)
7050 .m(m)
7051 .n(n)
7052 .k(k)
7053 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007054 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007055 }
7056 }
7057 }
7058 }
7059
Marat Dukhande06f492020-04-09 00:19:31 -07007060 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007061 TEST_REQUIRES_ARM_NEON;
7062 for (uint32_t n = 9; n < 16; n++) {
7063 for (size_t k = 1; k <= 20; k += 5) {
7064 GemmMicrokernelTester()
7065 .mr(4)
7066 .nr(8)
7067 .kr(1)
7068 .sr(1)
7069 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007070 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007071 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007072 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007073 }
7074 }
7075 }
7076
Marat Dukhande06f492020-04-09 00:19:31 -07007077 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007078 TEST_REQUIRES_ARM_NEON;
7079 for (uint32_t n = 9; n < 16; n++) {
7080 for (size_t k = 1; k <= 20; k += 5) {
7081 GemmMicrokernelTester()
7082 .mr(4)
7083 .nr(8)
7084 .kr(1)
7085 .sr(1)
7086 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007087 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007088 .k(k)
7089 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007090 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007091 }
7092 }
7093 }
7094
Marat Dukhande06f492020-04-09 00:19:31 -07007095 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007096 TEST_REQUIRES_ARM_NEON;
7097 for (uint32_t n = 9; n < 16; n++) {
7098 for (size_t k = 1; k <= 20; k += 5) {
7099 GemmMicrokernelTester()
7100 .mr(4)
7101 .nr(8)
7102 .kr(1)
7103 .sr(1)
7104 .m(4)
7105 .n(n)
7106 .k(k)
7107 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007108 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007109 }
7110 }
7111 }
7112
Marat Dukhande06f492020-04-09 00:19:31 -07007113 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007114 TEST_REQUIRES_ARM_NEON;
7115 for (uint32_t n = 9; n < 16; n++) {
7116 for (size_t k = 1; k <= 20; k += 5) {
7117 for (uint32_t m = 1; m <= 4; m++) {
7118 GemmMicrokernelTester()
7119 .mr(4)
7120 .nr(8)
7121 .kr(1)
7122 .sr(1)
7123 .m(m)
7124 .n(n)
7125 .k(k)
7126 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007127 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007128 }
7129 }
7130 }
7131 }
7132
Marat Dukhande06f492020-04-09 00:19:31 -07007133 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007134 TEST_REQUIRES_ARM_NEON;
7135 for (uint32_t n = 16; n <= 24; n += 8) {
7136 for (size_t k = 1; k <= 20; k += 5) {
7137 GemmMicrokernelTester()
7138 .mr(4)
7139 .nr(8)
7140 .kr(1)
7141 .sr(1)
7142 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007143 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007144 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007145 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007146 }
7147 }
7148 }
7149
Marat Dukhande06f492020-04-09 00:19:31 -07007150 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007151 TEST_REQUIRES_ARM_NEON;
7152 for (uint32_t n = 16; n <= 24; n += 8) {
7153 for (size_t k = 1; k <= 20; k += 5) {
7154 GemmMicrokernelTester()
7155 .mr(4)
7156 .nr(8)
7157 .kr(1)
7158 .sr(1)
7159 .m(4)
7160 .n(n)
7161 .k(k)
7162 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007163 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007164 }
7165 }
7166 }
7167
Marat Dukhande06f492020-04-09 00:19:31 -07007168 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007169 TEST_REQUIRES_ARM_NEON;
7170 for (uint32_t n = 16; n <= 24; n += 8) {
7171 for (size_t k = 1; k <= 20; k += 5) {
7172 GemmMicrokernelTester()
7173 .mr(4)
7174 .nr(8)
7175 .kr(1)
7176 .sr(1)
7177 .m(4)
7178 .n(n)
7179 .k(k)
7180 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007181 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007182 }
7183 }
7184 }
7185
Marat Dukhande06f492020-04-09 00:19:31 -07007186 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007187 TEST_REQUIRES_ARM_NEON;
7188 for (uint32_t n = 16; n <= 24; n += 8) {
7189 for (size_t k = 1; k <= 20; k += 5) {
7190 for (uint32_t m = 1; m <= 4; m++) {
7191 GemmMicrokernelTester()
7192 .mr(4)
7193 .nr(8)
7194 .kr(1)
7195 .sr(1)
7196 .m(m)
7197 .n(n)
7198 .k(k)
7199 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007200 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007201 }
7202 }
7203 }
7204 }
7205
Marat Dukhande06f492020-04-09 00:19:31 -07007206 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007207 TEST_REQUIRES_ARM_NEON;
7208 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007209 for (uint32_t n = 1; n <= 8; n++) {
7210 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007211 GemmMicrokernelTester()
7212 .mr(4)
7213 .nr(8)
7214 .kr(1)
7215 .sr(1)
7216 .m(m)
7217 .n(n)
7218 .k(k)
7219 .cm_stride(11)
7220 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007221 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007222 }
7223 }
7224 }
7225 }
7226
Marat Dukhande06f492020-04-09 00:19:31 -07007227 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007228 TEST_REQUIRES_ARM_NEON;
7229 GemmMicrokernelTester()
7230 .mr(4)
7231 .nr(8)
7232 .kr(1)
7233 .sr(1)
7234 .m(4)
7235 .n(8)
7236 .k(4)
7237 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007238 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007239 }
7240
Marat Dukhande06f492020-04-09 00:19:31 -07007241 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007242 TEST_REQUIRES_ARM_NEON;
7243 GemmMicrokernelTester()
7244 .mr(4)
7245 .nr(8)
7246 .kr(1)
7247 .sr(1)
7248 .m(4)
7249 .n(8)
7250 .k(4)
7251 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007252 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007253 }
7254
Marat Dukhande06f492020-04-09 00:19:31 -07007255 TEST(F32_GEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007256 TEST_REQUIRES_ARM_NEON;
7257 GemmMicrokernelTester()
7258 .mr(4)
7259 .nr(8)
7260 .kr(1)
7261 .sr(1)
7262 .m(4)
7263 .n(8)
7264 .k(4)
7265 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007266 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007267 }
7268#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7269
7270
7271#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07007272 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007273 TEST_REQUIRES_ARM_NEON;
7274 GemmMicrokernelTester()
7275 .mr(5)
7276 .nr(8)
7277 .kr(1)
7278 .sr(1)
7279 .m(5)
7280 .n(8)
7281 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007282 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007283 }
7284
Marat Dukhande06f492020-04-09 00:19:31 -07007285 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007286 TEST_REQUIRES_ARM_NEON;
7287 GemmMicrokernelTester()
7288 .mr(5)
7289 .nr(8)
7290 .kr(1)
7291 .sr(1)
7292 .m(5)
7293 .n(8)
7294 .k(2)
7295 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007296 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007297 }
7298
Marat Dukhande06f492020-04-09 00:19:31 -07007299 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007300 TEST_REQUIRES_ARM_NEON;
7301 GemmMicrokernelTester()
7302 .mr(5)
7303 .nr(8)
7304 .kr(1)
7305 .sr(1)
7306 .m(5)
7307 .n(8)
7308 .k(2)
7309 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007310 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007311 }
7312
Marat Dukhande06f492020-04-09 00:19:31 -07007313 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007314 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007315 for (uint32_t n = 1; n <= 8; n++) {
7316 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007317 GemmMicrokernelTester()
7318 .mr(5)
7319 .nr(8)
7320 .kr(1)
7321 .sr(1)
7322 .m(m)
7323 .n(n)
7324 .k(2)
7325 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007326 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007327 }
7328 }
7329 }
7330
Marat Dukhande06f492020-04-09 00:19:31 -07007331 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007332 TEST_REQUIRES_ARM_NEON;
7333 for (uint32_t m = 1; m <= 5; m++) {
7334 GemmMicrokernelTester()
7335 .mr(5)
7336 .nr(8)
7337 .kr(1)
7338 .sr(1)
7339 .m(m)
7340 .n(8)
7341 .k(2)
7342 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007343 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007344 }
7345 }
7346
Marat Dukhande06f492020-04-09 00:19:31 -07007347 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007348 TEST_REQUIRES_ARM_NEON;
7349 for (uint32_t n = 1; n <= 8; n++) {
7350 GemmMicrokernelTester()
7351 .mr(5)
7352 .nr(8)
7353 .kr(1)
7354 .sr(1)
7355 .m(5)
7356 .n(n)
7357 .k(2)
7358 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007359 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007360 }
7361 }
7362
Marat Dukhande06f492020-04-09 00:19:31 -07007363 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007364 TEST_REQUIRES_ARM_NEON;
7365 for (size_t k = 1; k < 2; k++) {
7366 GemmMicrokernelTester()
7367 .mr(5)
7368 .nr(8)
7369 .kr(1)
7370 .sr(1)
7371 .m(5)
7372 .n(8)
7373 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007374 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007375 }
7376 }
7377
Marat Dukhande06f492020-04-09 00:19:31 -07007378 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007379 TEST_REQUIRES_ARM_NEON;
7380 for (size_t k = 1; k < 2; k++) {
7381 GemmMicrokernelTester()
7382 .mr(5)
7383 .nr(8)
7384 .kr(1)
7385 .sr(1)
7386 .m(5)
7387 .n(8)
7388 .k(k)
7389 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007390 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007391 }
7392 }
7393
Marat Dukhande06f492020-04-09 00:19:31 -07007394 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007395 TEST_REQUIRES_ARM_NEON;
7396 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007397 for (uint32_t n = 1; n <= 8; n++) {
7398 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007399 GemmMicrokernelTester()
7400 .mr(5)
7401 .nr(8)
7402 .kr(1)
7403 .sr(1)
7404 .m(m)
7405 .n(n)
7406 .k(k)
7407 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007408 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007409 }
7410 }
7411 }
7412 }
7413
Marat Dukhande06f492020-04-09 00:19:31 -07007414 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007415 TEST_REQUIRES_ARM_NEON;
7416 for (size_t k = 3; k < 4; k++) {
7417 GemmMicrokernelTester()
7418 .mr(5)
7419 .nr(8)
7420 .kr(1)
7421 .sr(1)
7422 .m(5)
7423 .n(8)
7424 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007425 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007426 }
7427 }
7428
Marat Dukhande06f492020-04-09 00:19:31 -07007429 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007430 TEST_REQUIRES_ARM_NEON;
7431 for (size_t k = 3; k < 4; k++) {
7432 GemmMicrokernelTester()
7433 .mr(5)
7434 .nr(8)
7435 .kr(1)
7436 .sr(1)
7437 .m(5)
7438 .n(8)
7439 .k(k)
7440 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007441 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007442 }
7443 }
7444
Marat Dukhande06f492020-04-09 00:19:31 -07007445 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007446 TEST_REQUIRES_ARM_NEON;
7447 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007448 for (uint32_t n = 1; n <= 8; n++) {
7449 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007450 GemmMicrokernelTester()
7451 .mr(5)
7452 .nr(8)
7453 .kr(1)
7454 .sr(1)
7455 .m(m)
7456 .n(n)
7457 .k(k)
7458 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007459 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007460 }
7461 }
7462 }
7463 }
7464
Marat Dukhande06f492020-04-09 00:19:31 -07007465 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007466 TEST_REQUIRES_ARM_NEON;
7467 for (size_t k = 4; k <= 20; k += 2) {
7468 GemmMicrokernelTester()
7469 .mr(5)
7470 .nr(8)
7471 .kr(1)
7472 .sr(1)
7473 .m(5)
7474 .n(8)
7475 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007476 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007477 }
7478 }
7479
Marat Dukhande06f492020-04-09 00:19:31 -07007480 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007481 TEST_REQUIRES_ARM_NEON;
7482 for (size_t k = 4; k <= 20; k += 2) {
7483 GemmMicrokernelTester()
7484 .mr(5)
7485 .nr(8)
7486 .kr(1)
7487 .sr(1)
7488 .m(5)
7489 .n(8)
7490 .k(k)
7491 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007492 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007493 }
7494 }
7495
Marat Dukhande06f492020-04-09 00:19:31 -07007496 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007497 TEST_REQUIRES_ARM_NEON;
7498 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007499 for (uint32_t n = 1; n <= 8; n++) {
7500 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007501 GemmMicrokernelTester()
7502 .mr(5)
7503 .nr(8)
7504 .kr(1)
7505 .sr(1)
7506 .m(m)
7507 .n(n)
7508 .k(k)
7509 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007510 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007511 }
7512 }
7513 }
7514 }
7515
Marat Dukhande06f492020-04-09 00:19:31 -07007516 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007517 TEST_REQUIRES_ARM_NEON;
7518 for (uint32_t n = 9; n < 16; n++) {
7519 for (size_t k = 1; k <= 10; k += 3) {
7520 GemmMicrokernelTester()
7521 .mr(5)
7522 .nr(8)
7523 .kr(1)
7524 .sr(1)
7525 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007526 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007527 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007528 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007529 }
7530 }
7531 }
7532
Marat Dukhande06f492020-04-09 00:19:31 -07007533 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007534 TEST_REQUIRES_ARM_NEON;
7535 for (uint32_t n = 9; n < 16; n++) {
7536 for (size_t k = 1; k <= 10; k += 3) {
7537 GemmMicrokernelTester()
7538 .mr(5)
7539 .nr(8)
7540 .kr(1)
7541 .sr(1)
7542 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007543 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007544 .k(k)
7545 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007546 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007547 }
7548 }
7549 }
7550
Marat Dukhande06f492020-04-09 00:19:31 -07007551 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007552 TEST_REQUIRES_ARM_NEON;
7553 for (uint32_t n = 9; n < 16; n++) {
7554 for (size_t k = 1; k <= 10; k += 3) {
7555 GemmMicrokernelTester()
7556 .mr(5)
7557 .nr(8)
7558 .kr(1)
7559 .sr(1)
7560 .m(5)
7561 .n(n)
7562 .k(k)
7563 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007564 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007565 }
7566 }
7567 }
7568
Marat Dukhande06f492020-04-09 00:19:31 -07007569 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007570 TEST_REQUIRES_ARM_NEON;
7571 for (uint32_t n = 9; n < 16; n++) {
7572 for (size_t k = 1; k <= 10; k += 3) {
7573 for (uint32_t m = 1; m <= 5; m++) {
7574 GemmMicrokernelTester()
7575 .mr(5)
7576 .nr(8)
7577 .kr(1)
7578 .sr(1)
7579 .m(m)
7580 .n(n)
7581 .k(k)
7582 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007583 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007584 }
7585 }
7586 }
7587 }
7588
Marat Dukhande06f492020-04-09 00:19:31 -07007589 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007590 TEST_REQUIRES_ARM_NEON;
7591 for (uint32_t n = 16; n <= 24; n += 8) {
7592 for (size_t k = 1; k <= 10; k += 3) {
7593 GemmMicrokernelTester()
7594 .mr(5)
7595 .nr(8)
7596 .kr(1)
7597 .sr(1)
7598 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007599 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007600 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007601 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007602 }
7603 }
7604 }
7605
Marat Dukhande06f492020-04-09 00:19:31 -07007606 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007607 TEST_REQUIRES_ARM_NEON;
7608 for (uint32_t n = 16; n <= 24; n += 8) {
7609 for (size_t k = 1; k <= 10; k += 3) {
7610 GemmMicrokernelTester()
7611 .mr(5)
7612 .nr(8)
7613 .kr(1)
7614 .sr(1)
7615 .m(5)
7616 .n(n)
7617 .k(k)
7618 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007619 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007620 }
7621 }
7622 }
7623
Marat Dukhande06f492020-04-09 00:19:31 -07007624 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007625 TEST_REQUIRES_ARM_NEON;
7626 for (uint32_t n = 16; n <= 24; n += 8) {
7627 for (size_t k = 1; k <= 10; k += 3) {
7628 GemmMicrokernelTester()
7629 .mr(5)
7630 .nr(8)
7631 .kr(1)
7632 .sr(1)
7633 .m(5)
7634 .n(n)
7635 .k(k)
7636 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007637 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007638 }
7639 }
7640 }
7641
Marat Dukhande06f492020-04-09 00:19:31 -07007642 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007643 TEST_REQUIRES_ARM_NEON;
7644 for (uint32_t n = 16; n <= 24; n += 8) {
7645 for (size_t k = 1; k <= 10; k += 3) {
7646 for (uint32_t m = 1; m <= 5; m++) {
7647 GemmMicrokernelTester()
7648 .mr(5)
7649 .nr(8)
7650 .kr(1)
7651 .sr(1)
7652 .m(m)
7653 .n(n)
7654 .k(k)
7655 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007656 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007657 }
7658 }
7659 }
7660 }
7661
Marat Dukhande06f492020-04-09 00:19:31 -07007662 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007663 TEST_REQUIRES_ARM_NEON;
7664 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007665 for (uint32_t n = 1; n <= 8; n++) {
7666 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007667 GemmMicrokernelTester()
7668 .mr(5)
7669 .nr(8)
7670 .kr(1)
7671 .sr(1)
7672 .m(m)
7673 .n(n)
7674 .k(k)
7675 .cm_stride(11)
7676 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007677 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007678 }
7679 }
7680 }
7681 }
7682
Marat Dukhande06f492020-04-09 00:19:31 -07007683 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007684 TEST_REQUIRES_ARM_NEON;
7685 GemmMicrokernelTester()
7686 .mr(5)
7687 .nr(8)
7688 .kr(1)
7689 .sr(1)
7690 .m(5)
7691 .n(8)
7692 .k(2)
7693 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007694 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007695 }
7696
Marat Dukhande06f492020-04-09 00:19:31 -07007697 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007698 TEST_REQUIRES_ARM_NEON;
7699 GemmMicrokernelTester()
7700 .mr(5)
7701 .nr(8)
7702 .kr(1)
7703 .sr(1)
7704 .m(5)
7705 .n(8)
7706 .k(2)
7707 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007708 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007709 }
7710
Marat Dukhande06f492020-04-09 00:19:31 -07007711 TEST(F32_GEMM_MINMAX_5X8__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007712 TEST_REQUIRES_ARM_NEON;
7713 GemmMicrokernelTester()
7714 .mr(5)
7715 .nr(8)
7716 .kr(1)
7717 .sr(1)
7718 .m(5)
7719 .n(8)
7720 .k(2)
7721 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007722 .Test(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007723 }
7724#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7725
7726
7727#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07007728 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007729 TEST_REQUIRES_ARM_NEON;
7730 GemmMicrokernelTester()
7731 .mr(6)
7732 .nr(8)
7733 .kr(1)
7734 .sr(1)
7735 .m(6)
7736 .n(8)
7737 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007738 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007739 }
7740
Marat Dukhande06f492020-04-09 00:19:31 -07007741 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007742 TEST_REQUIRES_ARM_NEON;
7743 GemmMicrokernelTester()
7744 .mr(6)
7745 .nr(8)
7746 .kr(1)
7747 .sr(1)
7748 .m(6)
7749 .n(8)
7750 .k(4)
7751 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007752 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007753 }
7754
Marat Dukhande06f492020-04-09 00:19:31 -07007755 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007756 TEST_REQUIRES_ARM_NEON;
7757 GemmMicrokernelTester()
7758 .mr(6)
7759 .nr(8)
7760 .kr(1)
7761 .sr(1)
7762 .m(6)
7763 .n(8)
7764 .k(4)
7765 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007766 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007767 }
7768
Marat Dukhande06f492020-04-09 00:19:31 -07007769 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007770 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007771 for (uint32_t n = 1; n <= 8; n++) {
7772 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007773 GemmMicrokernelTester()
7774 .mr(6)
7775 .nr(8)
7776 .kr(1)
7777 .sr(1)
7778 .m(m)
7779 .n(n)
7780 .k(4)
7781 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007782 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007783 }
7784 }
7785 }
7786
Marat Dukhande06f492020-04-09 00:19:31 -07007787 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007788 TEST_REQUIRES_ARM_NEON;
7789 for (uint32_t m = 1; m <= 6; m++) {
7790 GemmMicrokernelTester()
7791 .mr(6)
7792 .nr(8)
7793 .kr(1)
7794 .sr(1)
7795 .m(m)
7796 .n(8)
7797 .k(4)
7798 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007799 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007800 }
7801 }
7802
Marat Dukhande06f492020-04-09 00:19:31 -07007803 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007804 TEST_REQUIRES_ARM_NEON;
7805 for (uint32_t n = 1; n <= 8; n++) {
7806 GemmMicrokernelTester()
7807 .mr(6)
7808 .nr(8)
7809 .kr(1)
7810 .sr(1)
7811 .m(6)
7812 .n(n)
7813 .k(4)
7814 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007815 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007816 }
7817 }
7818
Marat Dukhande06f492020-04-09 00:19:31 -07007819 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007820 TEST_REQUIRES_ARM_NEON;
7821 for (size_t k = 1; k < 4; k++) {
7822 GemmMicrokernelTester()
7823 .mr(6)
7824 .nr(8)
7825 .kr(1)
7826 .sr(1)
7827 .m(6)
7828 .n(8)
7829 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007830 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007831 }
7832 }
7833
Marat Dukhande06f492020-04-09 00:19:31 -07007834 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007835 TEST_REQUIRES_ARM_NEON;
7836 for (size_t k = 1; k < 4; k++) {
7837 GemmMicrokernelTester()
7838 .mr(6)
7839 .nr(8)
7840 .kr(1)
7841 .sr(1)
7842 .m(6)
7843 .n(8)
7844 .k(k)
7845 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007846 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007847 }
7848 }
7849
Marat Dukhande06f492020-04-09 00:19:31 -07007850 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007851 TEST_REQUIRES_ARM_NEON;
7852 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007853 for (uint32_t n = 1; n <= 8; n++) {
7854 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007855 GemmMicrokernelTester()
7856 .mr(6)
7857 .nr(8)
7858 .kr(1)
7859 .sr(1)
7860 .m(m)
7861 .n(n)
7862 .k(k)
7863 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007864 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007865 }
7866 }
7867 }
7868 }
7869
Marat Dukhande06f492020-04-09 00:19:31 -07007870 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007871 TEST_REQUIRES_ARM_NEON;
7872 for (size_t k = 5; k < 8; k++) {
7873 GemmMicrokernelTester()
7874 .mr(6)
7875 .nr(8)
7876 .kr(1)
7877 .sr(1)
7878 .m(6)
7879 .n(8)
7880 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007881 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007882 }
7883 }
7884
Marat Dukhande06f492020-04-09 00:19:31 -07007885 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007886 TEST_REQUIRES_ARM_NEON;
7887 for (size_t k = 5; k < 8; k++) {
7888 GemmMicrokernelTester()
7889 .mr(6)
7890 .nr(8)
7891 .kr(1)
7892 .sr(1)
7893 .m(6)
7894 .n(8)
7895 .k(k)
7896 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007897 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007898 }
7899 }
7900
Marat Dukhande06f492020-04-09 00:19:31 -07007901 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007902 TEST_REQUIRES_ARM_NEON;
7903 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007904 for (uint32_t n = 1; n <= 8; n++) {
7905 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007906 GemmMicrokernelTester()
7907 .mr(6)
7908 .nr(8)
7909 .kr(1)
7910 .sr(1)
7911 .m(m)
7912 .n(n)
7913 .k(k)
7914 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007915 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007916 }
7917 }
7918 }
7919 }
7920
Marat Dukhande06f492020-04-09 00:19:31 -07007921 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007922 TEST_REQUIRES_ARM_NEON;
7923 for (size_t k = 8; k <= 40; k += 4) {
7924 GemmMicrokernelTester()
7925 .mr(6)
7926 .nr(8)
7927 .kr(1)
7928 .sr(1)
7929 .m(6)
7930 .n(8)
7931 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007932 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007933 }
7934 }
7935
Marat Dukhande06f492020-04-09 00:19:31 -07007936 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007937 TEST_REQUIRES_ARM_NEON;
7938 for (size_t k = 8; k <= 40; k += 4) {
7939 GemmMicrokernelTester()
7940 .mr(6)
7941 .nr(8)
7942 .kr(1)
7943 .sr(1)
7944 .m(6)
7945 .n(8)
7946 .k(k)
7947 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007948 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007949 }
7950 }
7951
Marat Dukhande06f492020-04-09 00:19:31 -07007952 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007953 TEST_REQUIRES_ARM_NEON;
7954 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007955 for (uint32_t n = 1; n <= 8; n++) {
7956 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007957 GemmMicrokernelTester()
7958 .mr(6)
7959 .nr(8)
7960 .kr(1)
7961 .sr(1)
7962 .m(m)
7963 .n(n)
7964 .k(k)
7965 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007966 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007967 }
7968 }
7969 }
7970 }
7971
Marat Dukhande06f492020-04-09 00:19:31 -07007972 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007973 TEST_REQUIRES_ARM_NEON;
7974 for (uint32_t n = 9; n < 16; n++) {
7975 for (size_t k = 1; k <= 20; k += 5) {
7976 GemmMicrokernelTester()
7977 .mr(6)
7978 .nr(8)
7979 .kr(1)
7980 .sr(1)
7981 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007982 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007983 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007984 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007985 }
7986 }
7987 }
7988
Marat Dukhande06f492020-04-09 00:19:31 -07007989 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007990 TEST_REQUIRES_ARM_NEON;
7991 for (uint32_t n = 9; n < 16; n++) {
7992 for (size_t k = 1; k <= 20; k += 5) {
7993 GemmMicrokernelTester()
7994 .mr(6)
7995 .nr(8)
7996 .kr(1)
7997 .sr(1)
7998 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007999 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008000 .k(k)
8001 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008002 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008003 }
8004 }
8005 }
8006
Marat Dukhande06f492020-04-09 00:19:31 -07008007 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008008 TEST_REQUIRES_ARM_NEON;
8009 for (uint32_t n = 9; n < 16; n++) {
8010 for (size_t k = 1; k <= 20; k += 5) {
8011 GemmMicrokernelTester()
8012 .mr(6)
8013 .nr(8)
8014 .kr(1)
8015 .sr(1)
8016 .m(6)
8017 .n(n)
8018 .k(k)
8019 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008020 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008021 }
8022 }
8023 }
8024
Marat Dukhande06f492020-04-09 00:19:31 -07008025 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008026 TEST_REQUIRES_ARM_NEON;
8027 for (uint32_t n = 9; n < 16; n++) {
8028 for (size_t k = 1; k <= 20; k += 5) {
8029 for (uint32_t m = 1; m <= 6; m++) {
8030 GemmMicrokernelTester()
8031 .mr(6)
8032 .nr(8)
8033 .kr(1)
8034 .sr(1)
8035 .m(m)
8036 .n(n)
8037 .k(k)
8038 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008039 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008040 }
8041 }
8042 }
8043 }
8044
Marat Dukhande06f492020-04-09 00:19:31 -07008045 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008046 TEST_REQUIRES_ARM_NEON;
8047 for (uint32_t n = 16; n <= 24; n += 8) {
8048 for (size_t k = 1; k <= 20; k += 5) {
8049 GemmMicrokernelTester()
8050 .mr(6)
8051 .nr(8)
8052 .kr(1)
8053 .sr(1)
8054 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008055 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008056 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008057 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008058 }
8059 }
8060 }
8061
Marat Dukhande06f492020-04-09 00:19:31 -07008062 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008063 TEST_REQUIRES_ARM_NEON;
8064 for (uint32_t n = 16; n <= 24; n += 8) {
8065 for (size_t k = 1; k <= 20; k += 5) {
8066 GemmMicrokernelTester()
8067 .mr(6)
8068 .nr(8)
8069 .kr(1)
8070 .sr(1)
8071 .m(6)
8072 .n(n)
8073 .k(k)
8074 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008075 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008076 }
8077 }
8078 }
8079
Marat Dukhande06f492020-04-09 00:19:31 -07008080 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008081 TEST_REQUIRES_ARM_NEON;
8082 for (uint32_t n = 16; n <= 24; n += 8) {
8083 for (size_t k = 1; k <= 20; k += 5) {
8084 GemmMicrokernelTester()
8085 .mr(6)
8086 .nr(8)
8087 .kr(1)
8088 .sr(1)
8089 .m(6)
8090 .n(n)
8091 .k(k)
8092 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008093 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008094 }
8095 }
8096 }
8097
Marat Dukhande06f492020-04-09 00:19:31 -07008098 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008099 TEST_REQUIRES_ARM_NEON;
8100 for (uint32_t n = 16; n <= 24; n += 8) {
8101 for (size_t k = 1; k <= 20; k += 5) {
8102 for (uint32_t m = 1; m <= 6; m++) {
8103 GemmMicrokernelTester()
8104 .mr(6)
8105 .nr(8)
8106 .kr(1)
8107 .sr(1)
8108 .m(m)
8109 .n(n)
8110 .k(k)
8111 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008112 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008113 }
8114 }
8115 }
8116 }
8117
Marat Dukhande06f492020-04-09 00:19:31 -07008118 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008119 TEST_REQUIRES_ARM_NEON;
8120 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008121 for (uint32_t n = 1; n <= 8; n++) {
8122 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008123 GemmMicrokernelTester()
8124 .mr(6)
8125 .nr(8)
8126 .kr(1)
8127 .sr(1)
8128 .m(m)
8129 .n(n)
8130 .k(k)
8131 .cm_stride(11)
8132 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008133 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008134 }
8135 }
8136 }
8137 }
8138
Marat Dukhande06f492020-04-09 00:19:31 -07008139 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008140 TEST_REQUIRES_ARM_NEON;
8141 GemmMicrokernelTester()
8142 .mr(6)
8143 .nr(8)
8144 .kr(1)
8145 .sr(1)
8146 .m(6)
8147 .n(8)
8148 .k(4)
8149 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008150 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008151 }
8152
Marat Dukhande06f492020-04-09 00:19:31 -07008153 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008154 TEST_REQUIRES_ARM_NEON;
8155 GemmMicrokernelTester()
8156 .mr(6)
8157 .nr(8)
8158 .kr(1)
8159 .sr(1)
8160 .m(6)
8161 .n(8)
8162 .k(4)
8163 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008164 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008165 }
8166
Marat Dukhande06f492020-04-09 00:19:31 -07008167 TEST(F32_GEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008168 TEST_REQUIRES_ARM_NEON;
8169 GemmMicrokernelTester()
8170 .mr(6)
8171 .nr(8)
8172 .kr(1)
8173 .sr(1)
8174 .m(6)
8175 .n(8)
8176 .k(4)
8177 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008178 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008179 }
8180#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8181
8182
8183#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07008184 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008185 TEST_REQUIRES_ARM_NEON_FMA;
8186 GemmMicrokernelTester()
8187 .mr(4)
8188 .nr(8)
8189 .kr(1)
8190 .sr(1)
8191 .m(4)
8192 .n(8)
8193 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008194 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008195 }
8196
Marat Dukhande06f492020-04-09 00:19:31 -07008197 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008198 TEST_REQUIRES_ARM_NEON_FMA;
8199 GemmMicrokernelTester()
8200 .mr(4)
8201 .nr(8)
8202 .kr(1)
8203 .sr(1)
8204 .m(4)
8205 .n(8)
8206 .k(4)
8207 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008208 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008209 }
8210
Marat Dukhande06f492020-04-09 00:19:31 -07008211 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008212 TEST_REQUIRES_ARM_NEON_FMA;
8213 GemmMicrokernelTester()
8214 .mr(4)
8215 .nr(8)
8216 .kr(1)
8217 .sr(1)
8218 .m(4)
8219 .n(8)
8220 .k(4)
8221 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008222 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008223 }
8224
Marat Dukhande06f492020-04-09 00:19:31 -07008225 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008226 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008227 for (uint32_t n = 1; n <= 8; n++) {
8228 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008229 GemmMicrokernelTester()
8230 .mr(4)
8231 .nr(8)
8232 .kr(1)
8233 .sr(1)
8234 .m(m)
8235 .n(n)
8236 .k(4)
8237 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008238 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008239 }
8240 }
8241 }
8242
Marat Dukhande06f492020-04-09 00:19:31 -07008243 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008244 TEST_REQUIRES_ARM_NEON_FMA;
8245 for (uint32_t m = 1; m <= 4; m++) {
8246 GemmMicrokernelTester()
8247 .mr(4)
8248 .nr(8)
8249 .kr(1)
8250 .sr(1)
8251 .m(m)
8252 .n(8)
8253 .k(4)
8254 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008255 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008256 }
8257 }
8258
Marat Dukhande06f492020-04-09 00:19:31 -07008259 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008260 TEST_REQUIRES_ARM_NEON_FMA;
8261 for (uint32_t n = 1; n <= 8; n++) {
8262 GemmMicrokernelTester()
8263 .mr(4)
8264 .nr(8)
8265 .kr(1)
8266 .sr(1)
8267 .m(4)
8268 .n(n)
8269 .k(4)
8270 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008271 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008272 }
8273 }
8274
Marat Dukhande06f492020-04-09 00:19:31 -07008275 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008276 TEST_REQUIRES_ARM_NEON_FMA;
8277 for (size_t k = 1; k < 4; k++) {
8278 GemmMicrokernelTester()
8279 .mr(4)
8280 .nr(8)
8281 .kr(1)
8282 .sr(1)
8283 .m(4)
8284 .n(8)
8285 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008286 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008287 }
8288 }
8289
Marat Dukhande06f492020-04-09 00:19:31 -07008290 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008291 TEST_REQUIRES_ARM_NEON_FMA;
8292 for (size_t k = 1; k < 4; k++) {
8293 GemmMicrokernelTester()
8294 .mr(4)
8295 .nr(8)
8296 .kr(1)
8297 .sr(1)
8298 .m(4)
8299 .n(8)
8300 .k(k)
8301 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008302 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008303 }
8304 }
8305
Marat Dukhande06f492020-04-09 00:19:31 -07008306 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008307 TEST_REQUIRES_ARM_NEON_FMA;
8308 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008309 for (uint32_t n = 1; n <= 8; n++) {
8310 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008311 GemmMicrokernelTester()
8312 .mr(4)
8313 .nr(8)
8314 .kr(1)
8315 .sr(1)
8316 .m(m)
8317 .n(n)
8318 .k(k)
8319 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008320 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008321 }
8322 }
8323 }
8324 }
8325
Marat Dukhande06f492020-04-09 00:19:31 -07008326 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008327 TEST_REQUIRES_ARM_NEON_FMA;
8328 for (size_t k = 5; k < 8; k++) {
8329 GemmMicrokernelTester()
8330 .mr(4)
8331 .nr(8)
8332 .kr(1)
8333 .sr(1)
8334 .m(4)
8335 .n(8)
8336 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008337 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008338 }
8339 }
8340
Marat Dukhande06f492020-04-09 00:19:31 -07008341 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008342 TEST_REQUIRES_ARM_NEON_FMA;
8343 for (size_t k = 5; k < 8; k++) {
8344 GemmMicrokernelTester()
8345 .mr(4)
8346 .nr(8)
8347 .kr(1)
8348 .sr(1)
8349 .m(4)
8350 .n(8)
8351 .k(k)
8352 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008353 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008354 }
8355 }
8356
Marat Dukhande06f492020-04-09 00:19:31 -07008357 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008358 TEST_REQUIRES_ARM_NEON_FMA;
8359 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008360 for (uint32_t n = 1; n <= 8; n++) {
8361 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008362 GemmMicrokernelTester()
8363 .mr(4)
8364 .nr(8)
8365 .kr(1)
8366 .sr(1)
8367 .m(m)
8368 .n(n)
8369 .k(k)
8370 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008371 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008372 }
8373 }
8374 }
8375 }
8376
Marat Dukhande06f492020-04-09 00:19:31 -07008377 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008378 TEST_REQUIRES_ARM_NEON_FMA;
8379 for (size_t k = 8; k <= 40; k += 4) {
8380 GemmMicrokernelTester()
8381 .mr(4)
8382 .nr(8)
8383 .kr(1)
8384 .sr(1)
8385 .m(4)
8386 .n(8)
8387 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008388 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008389 }
8390 }
8391
Marat Dukhande06f492020-04-09 00:19:31 -07008392 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008393 TEST_REQUIRES_ARM_NEON_FMA;
8394 for (size_t k = 8; k <= 40; k += 4) {
8395 GemmMicrokernelTester()
8396 .mr(4)
8397 .nr(8)
8398 .kr(1)
8399 .sr(1)
8400 .m(4)
8401 .n(8)
8402 .k(k)
8403 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008404 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008405 }
8406 }
8407
Marat Dukhande06f492020-04-09 00:19:31 -07008408 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008409 TEST_REQUIRES_ARM_NEON_FMA;
8410 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008411 for (uint32_t n = 1; n <= 8; n++) {
8412 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008413 GemmMicrokernelTester()
8414 .mr(4)
8415 .nr(8)
8416 .kr(1)
8417 .sr(1)
8418 .m(m)
8419 .n(n)
8420 .k(k)
8421 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008422 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008423 }
8424 }
8425 }
8426 }
8427
Marat Dukhande06f492020-04-09 00:19:31 -07008428 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008429 TEST_REQUIRES_ARM_NEON_FMA;
8430 for (uint32_t n = 9; n < 16; n++) {
8431 for (size_t k = 1; k <= 20; k += 5) {
8432 GemmMicrokernelTester()
8433 .mr(4)
8434 .nr(8)
8435 .kr(1)
8436 .sr(1)
8437 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008438 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008439 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008440 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008441 }
8442 }
8443 }
8444
Marat Dukhande06f492020-04-09 00:19:31 -07008445 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008446 TEST_REQUIRES_ARM_NEON_FMA;
8447 for (uint32_t n = 9; n < 16; n++) {
8448 for (size_t k = 1; k <= 20; k += 5) {
8449 GemmMicrokernelTester()
8450 .mr(4)
8451 .nr(8)
8452 .kr(1)
8453 .sr(1)
8454 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008455 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008456 .k(k)
8457 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008458 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008459 }
8460 }
8461 }
8462
Marat Dukhande06f492020-04-09 00:19:31 -07008463 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008464 TEST_REQUIRES_ARM_NEON_FMA;
8465 for (uint32_t n = 9; n < 16; n++) {
8466 for (size_t k = 1; k <= 20; k += 5) {
8467 GemmMicrokernelTester()
8468 .mr(4)
8469 .nr(8)
8470 .kr(1)
8471 .sr(1)
8472 .m(4)
8473 .n(n)
8474 .k(k)
8475 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008476 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008477 }
8478 }
8479 }
8480
Marat Dukhande06f492020-04-09 00:19:31 -07008481 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008482 TEST_REQUIRES_ARM_NEON_FMA;
8483 for (uint32_t n = 9; n < 16; n++) {
8484 for (size_t k = 1; k <= 20; k += 5) {
8485 for (uint32_t m = 1; m <= 4; m++) {
8486 GemmMicrokernelTester()
8487 .mr(4)
8488 .nr(8)
8489 .kr(1)
8490 .sr(1)
8491 .m(m)
8492 .n(n)
8493 .k(k)
8494 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008495 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008496 }
8497 }
8498 }
8499 }
8500
Marat Dukhande06f492020-04-09 00:19:31 -07008501 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008502 TEST_REQUIRES_ARM_NEON_FMA;
8503 for (uint32_t n = 16; n <= 24; n += 8) {
8504 for (size_t k = 1; k <= 20; k += 5) {
8505 GemmMicrokernelTester()
8506 .mr(4)
8507 .nr(8)
8508 .kr(1)
8509 .sr(1)
8510 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008511 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008512 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008513 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008514 }
8515 }
8516 }
8517
Marat Dukhande06f492020-04-09 00:19:31 -07008518 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008519 TEST_REQUIRES_ARM_NEON_FMA;
8520 for (uint32_t n = 16; n <= 24; n += 8) {
8521 for (size_t k = 1; k <= 20; k += 5) {
8522 GemmMicrokernelTester()
8523 .mr(4)
8524 .nr(8)
8525 .kr(1)
8526 .sr(1)
8527 .m(4)
8528 .n(n)
8529 .k(k)
8530 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008531 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008532 }
8533 }
8534 }
8535
Marat Dukhande06f492020-04-09 00:19:31 -07008536 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008537 TEST_REQUIRES_ARM_NEON_FMA;
8538 for (uint32_t n = 16; n <= 24; n += 8) {
8539 for (size_t k = 1; k <= 20; k += 5) {
8540 GemmMicrokernelTester()
8541 .mr(4)
8542 .nr(8)
8543 .kr(1)
8544 .sr(1)
8545 .m(4)
8546 .n(n)
8547 .k(k)
8548 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008549 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008550 }
8551 }
8552 }
8553
Marat Dukhande06f492020-04-09 00:19:31 -07008554 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008555 TEST_REQUIRES_ARM_NEON_FMA;
8556 for (uint32_t n = 16; n <= 24; n += 8) {
8557 for (size_t k = 1; k <= 20; k += 5) {
8558 for (uint32_t m = 1; m <= 4; m++) {
8559 GemmMicrokernelTester()
8560 .mr(4)
8561 .nr(8)
8562 .kr(1)
8563 .sr(1)
8564 .m(m)
8565 .n(n)
8566 .k(k)
8567 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008568 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008569 }
8570 }
8571 }
8572 }
8573
Marat Dukhande06f492020-04-09 00:19:31 -07008574 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008575 TEST_REQUIRES_ARM_NEON_FMA;
8576 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008577 for (uint32_t n = 1; n <= 8; n++) {
8578 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008579 GemmMicrokernelTester()
8580 .mr(4)
8581 .nr(8)
8582 .kr(1)
8583 .sr(1)
8584 .m(m)
8585 .n(n)
8586 .k(k)
8587 .cm_stride(11)
8588 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008589 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008590 }
8591 }
8592 }
8593 }
8594
Marat Dukhande06f492020-04-09 00:19:31 -07008595 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008596 TEST_REQUIRES_ARM_NEON_FMA;
8597 GemmMicrokernelTester()
8598 .mr(4)
8599 .nr(8)
8600 .kr(1)
8601 .sr(1)
8602 .m(4)
8603 .n(8)
8604 .k(4)
8605 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008606 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008607 }
8608
Marat Dukhande06f492020-04-09 00:19:31 -07008609 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008610 TEST_REQUIRES_ARM_NEON_FMA;
8611 GemmMicrokernelTester()
8612 .mr(4)
8613 .nr(8)
8614 .kr(1)
8615 .sr(1)
8616 .m(4)
8617 .n(8)
8618 .k(4)
8619 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008620 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008621 }
8622
Marat Dukhande06f492020-04-09 00:19:31 -07008623 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008624 TEST_REQUIRES_ARM_NEON_FMA;
8625 GemmMicrokernelTester()
8626 .mr(4)
8627 .nr(8)
8628 .kr(1)
8629 .sr(1)
8630 .m(4)
8631 .n(8)
8632 .k(4)
8633 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008634 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008635 }
8636#endif // XNN_ARCH_ARM64
8637
8638
Marat Dukhan1c587112020-04-08 20:04:28 -07008639#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07008640 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008641 TEST_REQUIRES_ARM_NEON;
8642 GemmMicrokernelTester()
8643 .mr(1)
8644 .nr(8)
8645 .kr(1)
8646 .sr(1)
8647 .m(1)
8648 .n(8)
8649 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008650 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008651 }
8652
Marat Dukhande06f492020-04-09 00:19:31 -07008653 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008654 TEST_REQUIRES_ARM_NEON;
8655 GemmMicrokernelTester()
8656 .mr(1)
8657 .nr(8)
8658 .kr(1)
8659 .sr(1)
8660 .m(1)
8661 .n(8)
8662 .k(2)
8663 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008664 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008665 }
8666
Marat Dukhande06f492020-04-09 00:19:31 -07008667 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008668 TEST_REQUIRES_ARM_NEON;
8669 GemmMicrokernelTester()
8670 .mr(1)
8671 .nr(8)
8672 .kr(1)
8673 .sr(1)
8674 .m(1)
8675 .n(8)
8676 .k(2)
8677 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008678 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008679 }
8680
Marat Dukhande06f492020-04-09 00:19:31 -07008681 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008682 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008683 for (uint32_t n = 1; n <= 8; n++) {
8684 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008685 GemmMicrokernelTester()
8686 .mr(1)
8687 .nr(8)
8688 .kr(1)
8689 .sr(1)
8690 .m(m)
8691 .n(n)
8692 .k(2)
8693 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008694 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008695 }
8696 }
8697 }
8698
Marat Dukhande06f492020-04-09 00:19:31 -07008699 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008700 TEST_REQUIRES_ARM_NEON;
8701 for (uint32_t m = 1; m <= 1; m++) {
8702 GemmMicrokernelTester()
8703 .mr(1)
8704 .nr(8)
8705 .kr(1)
8706 .sr(1)
8707 .m(m)
8708 .n(8)
8709 .k(2)
8710 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008711 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008712 }
8713 }
8714
Marat Dukhande06f492020-04-09 00:19:31 -07008715 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008716 TEST_REQUIRES_ARM_NEON;
8717 for (uint32_t n = 1; n <= 8; n++) {
8718 GemmMicrokernelTester()
8719 .mr(1)
8720 .nr(8)
8721 .kr(1)
8722 .sr(1)
8723 .m(1)
8724 .n(n)
8725 .k(2)
8726 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008727 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008728 }
8729 }
8730
Marat Dukhande06f492020-04-09 00:19:31 -07008731 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008732 TEST_REQUIRES_ARM_NEON;
8733 for (size_t k = 1; k < 2; k++) {
8734 GemmMicrokernelTester()
8735 .mr(1)
8736 .nr(8)
8737 .kr(1)
8738 .sr(1)
8739 .m(1)
8740 .n(8)
8741 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008742 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008743 }
8744 }
8745
Marat Dukhande06f492020-04-09 00:19:31 -07008746 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008747 TEST_REQUIRES_ARM_NEON;
8748 for (size_t k = 1; k < 2; k++) {
8749 GemmMicrokernelTester()
8750 .mr(1)
8751 .nr(8)
8752 .kr(1)
8753 .sr(1)
8754 .m(1)
8755 .n(8)
8756 .k(k)
8757 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008758 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008759 }
8760 }
8761
Marat Dukhande06f492020-04-09 00:19:31 -07008762 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008763 TEST_REQUIRES_ARM_NEON;
8764 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008765 for (uint32_t n = 1; n <= 8; n++) {
8766 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008767 GemmMicrokernelTester()
8768 .mr(1)
8769 .nr(8)
8770 .kr(1)
8771 .sr(1)
8772 .m(m)
8773 .n(n)
8774 .k(k)
8775 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008776 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008777 }
8778 }
8779 }
8780 }
8781
Marat Dukhande06f492020-04-09 00:19:31 -07008782 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008783 TEST_REQUIRES_ARM_NEON;
8784 for (size_t k = 3; k < 4; k++) {
8785 GemmMicrokernelTester()
8786 .mr(1)
8787 .nr(8)
8788 .kr(1)
8789 .sr(1)
8790 .m(1)
8791 .n(8)
8792 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008793 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008794 }
8795 }
8796
Marat Dukhande06f492020-04-09 00:19:31 -07008797 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008798 TEST_REQUIRES_ARM_NEON;
8799 for (size_t k = 3; k < 4; k++) {
8800 GemmMicrokernelTester()
8801 .mr(1)
8802 .nr(8)
8803 .kr(1)
8804 .sr(1)
8805 .m(1)
8806 .n(8)
8807 .k(k)
8808 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008809 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008810 }
8811 }
8812
Marat Dukhande06f492020-04-09 00:19:31 -07008813 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008814 TEST_REQUIRES_ARM_NEON;
8815 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008816 for (uint32_t n = 1; n <= 8; n++) {
8817 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008818 GemmMicrokernelTester()
8819 .mr(1)
8820 .nr(8)
8821 .kr(1)
8822 .sr(1)
8823 .m(m)
8824 .n(n)
8825 .k(k)
8826 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008827 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008828 }
8829 }
8830 }
8831 }
8832
Marat Dukhande06f492020-04-09 00:19:31 -07008833 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008834 TEST_REQUIRES_ARM_NEON;
8835 for (size_t k = 4; k <= 20; k += 2) {
8836 GemmMicrokernelTester()
8837 .mr(1)
8838 .nr(8)
8839 .kr(1)
8840 .sr(1)
8841 .m(1)
8842 .n(8)
8843 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008844 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008845 }
8846 }
8847
Marat Dukhande06f492020-04-09 00:19:31 -07008848 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008849 TEST_REQUIRES_ARM_NEON;
8850 for (size_t k = 4; k <= 20; k += 2) {
8851 GemmMicrokernelTester()
8852 .mr(1)
8853 .nr(8)
8854 .kr(1)
8855 .sr(1)
8856 .m(1)
8857 .n(8)
8858 .k(k)
8859 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008860 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008861 }
8862 }
8863
Marat Dukhande06f492020-04-09 00:19:31 -07008864 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008865 TEST_REQUIRES_ARM_NEON;
8866 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008867 for (uint32_t n = 1; n <= 8; n++) {
8868 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008869 GemmMicrokernelTester()
8870 .mr(1)
8871 .nr(8)
8872 .kr(1)
8873 .sr(1)
8874 .m(m)
8875 .n(n)
8876 .k(k)
8877 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008878 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008879 }
8880 }
8881 }
8882 }
8883
Marat Dukhande06f492020-04-09 00:19:31 -07008884 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008885 TEST_REQUIRES_ARM_NEON;
8886 for (uint32_t n = 9; n < 16; n++) {
8887 for (size_t k = 1; k <= 10; k += 3) {
8888 GemmMicrokernelTester()
8889 .mr(1)
8890 .nr(8)
8891 .kr(1)
8892 .sr(1)
8893 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008894 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008895 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008896 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008897 }
8898 }
8899 }
8900
Marat Dukhande06f492020-04-09 00:19:31 -07008901 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008902 TEST_REQUIRES_ARM_NEON;
8903 for (uint32_t n = 9; n < 16; n++) {
8904 for (size_t k = 1; k <= 10; k += 3) {
8905 GemmMicrokernelTester()
8906 .mr(1)
8907 .nr(8)
8908 .kr(1)
8909 .sr(1)
8910 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008911 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008912 .k(k)
8913 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008914 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008915 }
8916 }
8917 }
8918
Marat Dukhande06f492020-04-09 00:19:31 -07008919 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008920 TEST_REQUIRES_ARM_NEON;
8921 for (uint32_t n = 9; n < 16; n++) {
8922 for (size_t k = 1; k <= 10; k += 3) {
8923 GemmMicrokernelTester()
8924 .mr(1)
8925 .nr(8)
8926 .kr(1)
8927 .sr(1)
8928 .m(1)
8929 .n(n)
8930 .k(k)
8931 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008932 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008933 }
8934 }
8935 }
8936
Marat Dukhande06f492020-04-09 00:19:31 -07008937 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008938 TEST_REQUIRES_ARM_NEON;
8939 for (uint32_t n = 9; n < 16; n++) {
8940 for (size_t k = 1; k <= 10; k += 3) {
8941 for (uint32_t m = 1; m <= 1; m++) {
8942 GemmMicrokernelTester()
8943 .mr(1)
8944 .nr(8)
8945 .kr(1)
8946 .sr(1)
8947 .m(m)
8948 .n(n)
8949 .k(k)
8950 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008951 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008952 }
8953 }
8954 }
8955 }
8956
Marat Dukhande06f492020-04-09 00:19:31 -07008957 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008958 TEST_REQUIRES_ARM_NEON;
8959 for (uint32_t n = 16; n <= 24; n += 8) {
8960 for (size_t k = 1; k <= 10; k += 3) {
8961 GemmMicrokernelTester()
8962 .mr(1)
8963 .nr(8)
8964 .kr(1)
8965 .sr(1)
8966 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008967 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008968 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008969 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008970 }
8971 }
8972 }
8973
Marat Dukhande06f492020-04-09 00:19:31 -07008974 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008975 TEST_REQUIRES_ARM_NEON;
8976 for (uint32_t n = 16; n <= 24; n += 8) {
8977 for (size_t k = 1; k <= 10; k += 3) {
8978 GemmMicrokernelTester()
8979 .mr(1)
8980 .nr(8)
8981 .kr(1)
8982 .sr(1)
8983 .m(1)
8984 .n(n)
8985 .k(k)
8986 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008987 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008988 }
8989 }
8990 }
8991
Marat Dukhande06f492020-04-09 00:19:31 -07008992 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008993 TEST_REQUIRES_ARM_NEON;
8994 for (uint32_t n = 16; n <= 24; n += 8) {
8995 for (size_t k = 1; k <= 10; k += 3) {
8996 GemmMicrokernelTester()
8997 .mr(1)
8998 .nr(8)
8999 .kr(1)
9000 .sr(1)
9001 .m(1)
9002 .n(n)
9003 .k(k)
9004 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009005 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009006 }
9007 }
9008 }
9009
Marat Dukhande06f492020-04-09 00:19:31 -07009010 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009011 TEST_REQUIRES_ARM_NEON;
9012 for (uint32_t n = 16; n <= 24; n += 8) {
9013 for (size_t k = 1; k <= 10; k += 3) {
9014 for (uint32_t m = 1; m <= 1; m++) {
9015 GemmMicrokernelTester()
9016 .mr(1)
9017 .nr(8)
9018 .kr(1)
9019 .sr(1)
9020 .m(m)
9021 .n(n)
9022 .k(k)
9023 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009024 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009025 }
9026 }
9027 }
9028 }
9029
Marat Dukhande06f492020-04-09 00:19:31 -07009030 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009031 TEST_REQUIRES_ARM_NEON;
9032 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009033 for (uint32_t n = 1; n <= 8; n++) {
9034 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009035 GemmMicrokernelTester()
9036 .mr(1)
9037 .nr(8)
9038 .kr(1)
9039 .sr(1)
9040 .m(m)
9041 .n(n)
9042 .k(k)
9043 .cm_stride(11)
9044 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009045 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009046 }
9047 }
9048 }
9049 }
9050
Marat Dukhande06f492020-04-09 00:19:31 -07009051 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009052 TEST_REQUIRES_ARM_NEON;
9053 GemmMicrokernelTester()
9054 .mr(1)
9055 .nr(8)
9056 .kr(1)
9057 .sr(1)
9058 .m(1)
9059 .n(8)
9060 .k(2)
9061 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009062 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009063 }
9064
Marat Dukhande06f492020-04-09 00:19:31 -07009065 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009066 TEST_REQUIRES_ARM_NEON;
9067 GemmMicrokernelTester()
9068 .mr(1)
9069 .nr(8)
9070 .kr(1)
9071 .sr(1)
9072 .m(1)
9073 .n(8)
9074 .k(2)
9075 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009076 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009077 }
9078
Marat Dukhande06f492020-04-09 00:19:31 -07009079 TEST(F32_GEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009080 TEST_REQUIRES_ARM_NEON;
9081 GemmMicrokernelTester()
9082 .mr(1)
9083 .nr(8)
9084 .kr(1)
9085 .sr(1)
9086 .m(1)
9087 .n(8)
9088 .k(2)
9089 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009090 .Test(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009091 }
9092#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9093
9094
9095#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009096 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009097 TEST_REQUIRES_ARM_NEON;
9098 GemmMicrokernelTester()
9099 .mr(4)
9100 .nr(8)
9101 .kr(1)
9102 .sr(1)
9103 .m(4)
9104 .n(8)
9105 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009106 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009107 }
9108
Marat Dukhande06f492020-04-09 00:19:31 -07009109 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009110 TEST_REQUIRES_ARM_NEON;
9111 GemmMicrokernelTester()
9112 .mr(4)
9113 .nr(8)
9114 .kr(1)
9115 .sr(1)
9116 .m(4)
9117 .n(8)
9118 .k(4)
9119 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009120 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009121 }
9122
Marat Dukhande06f492020-04-09 00:19:31 -07009123 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009124 TEST_REQUIRES_ARM_NEON;
9125 GemmMicrokernelTester()
9126 .mr(4)
9127 .nr(8)
9128 .kr(1)
9129 .sr(1)
9130 .m(4)
9131 .n(8)
9132 .k(4)
9133 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009134 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009135 }
9136
Marat Dukhande06f492020-04-09 00:19:31 -07009137 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009138 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009139 for (uint32_t n = 1; n <= 8; n++) {
9140 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009141 GemmMicrokernelTester()
9142 .mr(4)
9143 .nr(8)
9144 .kr(1)
9145 .sr(1)
9146 .m(m)
9147 .n(n)
9148 .k(4)
9149 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009150 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009151 }
9152 }
9153 }
9154
Marat Dukhande06f492020-04-09 00:19:31 -07009155 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009156 TEST_REQUIRES_ARM_NEON;
9157 for (uint32_t m = 1; m <= 4; m++) {
9158 GemmMicrokernelTester()
9159 .mr(4)
9160 .nr(8)
9161 .kr(1)
9162 .sr(1)
9163 .m(m)
9164 .n(8)
9165 .k(4)
9166 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009167 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009168 }
9169 }
9170
Marat Dukhande06f492020-04-09 00:19:31 -07009171 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009172 TEST_REQUIRES_ARM_NEON;
9173 for (uint32_t n = 1; n <= 8; n++) {
9174 GemmMicrokernelTester()
9175 .mr(4)
9176 .nr(8)
9177 .kr(1)
9178 .sr(1)
9179 .m(4)
9180 .n(n)
9181 .k(4)
9182 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009183 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009184 }
9185 }
9186
Marat Dukhande06f492020-04-09 00:19:31 -07009187 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009188 TEST_REQUIRES_ARM_NEON;
9189 for (size_t k = 1; k < 4; k++) {
9190 GemmMicrokernelTester()
9191 .mr(4)
9192 .nr(8)
9193 .kr(1)
9194 .sr(1)
9195 .m(4)
9196 .n(8)
9197 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009198 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009199 }
9200 }
9201
Marat Dukhande06f492020-04-09 00:19:31 -07009202 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009203 TEST_REQUIRES_ARM_NEON;
9204 for (size_t k = 1; k < 4; k++) {
9205 GemmMicrokernelTester()
9206 .mr(4)
9207 .nr(8)
9208 .kr(1)
9209 .sr(1)
9210 .m(4)
9211 .n(8)
9212 .k(k)
9213 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009214 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009215 }
9216 }
9217
Marat Dukhande06f492020-04-09 00:19:31 -07009218 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009219 TEST_REQUIRES_ARM_NEON;
9220 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009221 for (uint32_t n = 1; n <= 8; n++) {
9222 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009223 GemmMicrokernelTester()
9224 .mr(4)
9225 .nr(8)
9226 .kr(1)
9227 .sr(1)
9228 .m(m)
9229 .n(n)
9230 .k(k)
9231 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009232 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009233 }
9234 }
9235 }
9236 }
9237
Marat Dukhande06f492020-04-09 00:19:31 -07009238 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009239 TEST_REQUIRES_ARM_NEON;
9240 for (size_t k = 5; k < 8; k++) {
9241 GemmMicrokernelTester()
9242 .mr(4)
9243 .nr(8)
9244 .kr(1)
9245 .sr(1)
9246 .m(4)
9247 .n(8)
9248 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009249 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009250 }
9251 }
9252
Marat Dukhande06f492020-04-09 00:19:31 -07009253 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009254 TEST_REQUIRES_ARM_NEON;
9255 for (size_t k = 5; k < 8; k++) {
9256 GemmMicrokernelTester()
9257 .mr(4)
9258 .nr(8)
9259 .kr(1)
9260 .sr(1)
9261 .m(4)
9262 .n(8)
9263 .k(k)
9264 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009265 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009266 }
9267 }
9268
Marat Dukhande06f492020-04-09 00:19:31 -07009269 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009270 TEST_REQUIRES_ARM_NEON;
9271 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009272 for (uint32_t n = 1; n <= 8; n++) {
9273 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009274 GemmMicrokernelTester()
9275 .mr(4)
9276 .nr(8)
9277 .kr(1)
9278 .sr(1)
9279 .m(m)
9280 .n(n)
9281 .k(k)
9282 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009283 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009284 }
9285 }
9286 }
9287 }
9288
Marat Dukhande06f492020-04-09 00:19:31 -07009289 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009290 TEST_REQUIRES_ARM_NEON;
9291 for (size_t k = 8; k <= 40; k += 4) {
9292 GemmMicrokernelTester()
9293 .mr(4)
9294 .nr(8)
9295 .kr(1)
9296 .sr(1)
9297 .m(4)
9298 .n(8)
9299 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009300 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009301 }
9302 }
9303
Marat Dukhande06f492020-04-09 00:19:31 -07009304 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009305 TEST_REQUIRES_ARM_NEON;
9306 for (size_t k = 8; k <= 40; k += 4) {
9307 GemmMicrokernelTester()
9308 .mr(4)
9309 .nr(8)
9310 .kr(1)
9311 .sr(1)
9312 .m(4)
9313 .n(8)
9314 .k(k)
9315 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009316 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009317 }
9318 }
9319
Marat Dukhande06f492020-04-09 00:19:31 -07009320 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009321 TEST_REQUIRES_ARM_NEON;
9322 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009323 for (uint32_t n = 1; n <= 8; n++) {
9324 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009325 GemmMicrokernelTester()
9326 .mr(4)
9327 .nr(8)
9328 .kr(1)
9329 .sr(1)
9330 .m(m)
9331 .n(n)
9332 .k(k)
9333 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009334 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009335 }
9336 }
9337 }
9338 }
9339
Marat Dukhande06f492020-04-09 00:19:31 -07009340 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009341 TEST_REQUIRES_ARM_NEON;
9342 for (uint32_t n = 9; n < 16; n++) {
9343 for (size_t k = 1; k <= 20; k += 5) {
9344 GemmMicrokernelTester()
9345 .mr(4)
9346 .nr(8)
9347 .kr(1)
9348 .sr(1)
9349 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009350 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009351 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009352 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009353 }
9354 }
9355 }
9356
Marat Dukhande06f492020-04-09 00:19:31 -07009357 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009358 TEST_REQUIRES_ARM_NEON;
9359 for (uint32_t n = 9; n < 16; n++) {
9360 for (size_t k = 1; k <= 20; k += 5) {
9361 GemmMicrokernelTester()
9362 .mr(4)
9363 .nr(8)
9364 .kr(1)
9365 .sr(1)
9366 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009367 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009368 .k(k)
9369 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009370 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009371 }
9372 }
9373 }
9374
Marat Dukhande06f492020-04-09 00:19:31 -07009375 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009376 TEST_REQUIRES_ARM_NEON;
9377 for (uint32_t n = 9; n < 16; n++) {
9378 for (size_t k = 1; k <= 20; k += 5) {
9379 GemmMicrokernelTester()
9380 .mr(4)
9381 .nr(8)
9382 .kr(1)
9383 .sr(1)
9384 .m(4)
9385 .n(n)
9386 .k(k)
9387 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009388 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009389 }
9390 }
9391 }
9392
Marat Dukhande06f492020-04-09 00:19:31 -07009393 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009394 TEST_REQUIRES_ARM_NEON;
9395 for (uint32_t n = 9; n < 16; n++) {
9396 for (size_t k = 1; k <= 20; k += 5) {
9397 for (uint32_t m = 1; m <= 4; m++) {
9398 GemmMicrokernelTester()
9399 .mr(4)
9400 .nr(8)
9401 .kr(1)
9402 .sr(1)
9403 .m(m)
9404 .n(n)
9405 .k(k)
9406 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009407 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009408 }
9409 }
9410 }
9411 }
9412
Marat Dukhande06f492020-04-09 00:19:31 -07009413 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009414 TEST_REQUIRES_ARM_NEON;
9415 for (uint32_t n = 16; n <= 24; n += 8) {
9416 for (size_t k = 1; k <= 20; k += 5) {
9417 GemmMicrokernelTester()
9418 .mr(4)
9419 .nr(8)
9420 .kr(1)
9421 .sr(1)
9422 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009423 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009424 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009425 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009426 }
9427 }
9428 }
9429
Marat Dukhande06f492020-04-09 00:19:31 -07009430 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009431 TEST_REQUIRES_ARM_NEON;
9432 for (uint32_t n = 16; n <= 24; n += 8) {
9433 for (size_t k = 1; k <= 20; k += 5) {
9434 GemmMicrokernelTester()
9435 .mr(4)
9436 .nr(8)
9437 .kr(1)
9438 .sr(1)
9439 .m(4)
9440 .n(n)
9441 .k(k)
9442 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009443 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009444 }
9445 }
9446 }
9447
Marat Dukhande06f492020-04-09 00:19:31 -07009448 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009449 TEST_REQUIRES_ARM_NEON;
9450 for (uint32_t n = 16; n <= 24; n += 8) {
9451 for (size_t k = 1; k <= 20; k += 5) {
9452 GemmMicrokernelTester()
9453 .mr(4)
9454 .nr(8)
9455 .kr(1)
9456 .sr(1)
9457 .m(4)
9458 .n(n)
9459 .k(k)
9460 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009461 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009462 }
9463 }
9464 }
9465
Marat Dukhande06f492020-04-09 00:19:31 -07009466 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009467 TEST_REQUIRES_ARM_NEON;
9468 for (uint32_t n = 16; n <= 24; n += 8) {
9469 for (size_t k = 1; k <= 20; k += 5) {
9470 for (uint32_t m = 1; m <= 4; m++) {
9471 GemmMicrokernelTester()
9472 .mr(4)
9473 .nr(8)
9474 .kr(1)
9475 .sr(1)
9476 .m(m)
9477 .n(n)
9478 .k(k)
9479 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009480 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009481 }
9482 }
9483 }
9484 }
9485
Marat Dukhande06f492020-04-09 00:19:31 -07009486 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009487 TEST_REQUIRES_ARM_NEON;
9488 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009489 for (uint32_t n = 1; n <= 8; n++) {
9490 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009491 GemmMicrokernelTester()
9492 .mr(4)
9493 .nr(8)
9494 .kr(1)
9495 .sr(1)
9496 .m(m)
9497 .n(n)
9498 .k(k)
9499 .cm_stride(11)
9500 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009501 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009502 }
9503 }
9504 }
9505 }
9506
Marat Dukhande06f492020-04-09 00:19:31 -07009507 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009508 TEST_REQUIRES_ARM_NEON;
9509 GemmMicrokernelTester()
9510 .mr(4)
9511 .nr(8)
9512 .kr(1)
9513 .sr(1)
9514 .m(4)
9515 .n(8)
9516 .k(4)
9517 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009518 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009519 }
9520
Marat Dukhande06f492020-04-09 00:19:31 -07009521 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009522 TEST_REQUIRES_ARM_NEON;
9523 GemmMicrokernelTester()
9524 .mr(4)
9525 .nr(8)
9526 .kr(1)
9527 .sr(1)
9528 .m(4)
9529 .n(8)
9530 .k(4)
9531 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009532 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009533 }
9534
Marat Dukhande06f492020-04-09 00:19:31 -07009535 TEST(F32_GEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009536 TEST_REQUIRES_ARM_NEON;
9537 GemmMicrokernelTester()
9538 .mr(4)
9539 .nr(8)
9540 .kr(1)
9541 .sr(1)
9542 .m(4)
9543 .n(8)
9544 .k(4)
9545 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009546 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009547 }
9548#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9549
9550
9551#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009552 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009553 TEST_REQUIRES_ARM_NEON;
9554 GemmMicrokernelTester()
9555 .mr(6)
9556 .nr(8)
9557 .kr(1)
9558 .sr(1)
9559 .m(6)
9560 .n(8)
9561 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009562 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009563 }
9564
Marat Dukhande06f492020-04-09 00:19:31 -07009565 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009566 TEST_REQUIRES_ARM_NEON;
9567 GemmMicrokernelTester()
9568 .mr(6)
9569 .nr(8)
9570 .kr(1)
9571 .sr(1)
9572 .m(6)
9573 .n(8)
9574 .k(2)
9575 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009576 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009577 }
9578
Marat Dukhande06f492020-04-09 00:19:31 -07009579 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009580 TEST_REQUIRES_ARM_NEON;
9581 GemmMicrokernelTester()
9582 .mr(6)
9583 .nr(8)
9584 .kr(1)
9585 .sr(1)
9586 .m(6)
9587 .n(8)
9588 .k(2)
9589 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009590 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009591 }
9592
Marat Dukhande06f492020-04-09 00:19:31 -07009593 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009594 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009595 for (uint32_t n = 1; n <= 8; n++) {
9596 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009597 GemmMicrokernelTester()
9598 .mr(6)
9599 .nr(8)
9600 .kr(1)
9601 .sr(1)
9602 .m(m)
9603 .n(n)
9604 .k(2)
9605 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009606 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009607 }
9608 }
9609 }
9610
Marat Dukhande06f492020-04-09 00:19:31 -07009611 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009612 TEST_REQUIRES_ARM_NEON;
9613 for (uint32_t m = 1; m <= 6; m++) {
9614 GemmMicrokernelTester()
9615 .mr(6)
9616 .nr(8)
9617 .kr(1)
9618 .sr(1)
9619 .m(m)
9620 .n(8)
9621 .k(2)
9622 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009623 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009624 }
9625 }
9626
Marat Dukhande06f492020-04-09 00:19:31 -07009627 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009628 TEST_REQUIRES_ARM_NEON;
9629 for (uint32_t n = 1; n <= 8; n++) {
9630 GemmMicrokernelTester()
9631 .mr(6)
9632 .nr(8)
9633 .kr(1)
9634 .sr(1)
9635 .m(6)
9636 .n(n)
9637 .k(2)
9638 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009639 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009640 }
9641 }
9642
Marat Dukhande06f492020-04-09 00:19:31 -07009643 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009644 TEST_REQUIRES_ARM_NEON;
9645 for (size_t k = 1; k < 2; k++) {
9646 GemmMicrokernelTester()
9647 .mr(6)
9648 .nr(8)
9649 .kr(1)
9650 .sr(1)
9651 .m(6)
9652 .n(8)
9653 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009654 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009655 }
9656 }
9657
Marat Dukhande06f492020-04-09 00:19:31 -07009658 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009659 TEST_REQUIRES_ARM_NEON;
9660 for (size_t k = 1; k < 2; k++) {
9661 GemmMicrokernelTester()
9662 .mr(6)
9663 .nr(8)
9664 .kr(1)
9665 .sr(1)
9666 .m(6)
9667 .n(8)
9668 .k(k)
9669 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009670 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009671 }
9672 }
9673
Marat Dukhande06f492020-04-09 00:19:31 -07009674 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009675 TEST_REQUIRES_ARM_NEON;
9676 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009677 for (uint32_t n = 1; n <= 8; n++) {
9678 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009679 GemmMicrokernelTester()
9680 .mr(6)
9681 .nr(8)
9682 .kr(1)
9683 .sr(1)
9684 .m(m)
9685 .n(n)
9686 .k(k)
9687 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009688 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009689 }
9690 }
9691 }
9692 }
9693
Marat Dukhande06f492020-04-09 00:19:31 -07009694 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009695 TEST_REQUIRES_ARM_NEON;
9696 for (size_t k = 3; k < 4; k++) {
9697 GemmMicrokernelTester()
9698 .mr(6)
9699 .nr(8)
9700 .kr(1)
9701 .sr(1)
9702 .m(6)
9703 .n(8)
9704 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009705 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009706 }
9707 }
9708
Marat Dukhande06f492020-04-09 00:19:31 -07009709 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009710 TEST_REQUIRES_ARM_NEON;
9711 for (size_t k = 3; k < 4; k++) {
9712 GemmMicrokernelTester()
9713 .mr(6)
9714 .nr(8)
9715 .kr(1)
9716 .sr(1)
9717 .m(6)
9718 .n(8)
9719 .k(k)
9720 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009721 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009722 }
9723 }
9724
Marat Dukhande06f492020-04-09 00:19:31 -07009725 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009726 TEST_REQUIRES_ARM_NEON;
9727 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009728 for (uint32_t n = 1; n <= 8; n++) {
9729 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009730 GemmMicrokernelTester()
9731 .mr(6)
9732 .nr(8)
9733 .kr(1)
9734 .sr(1)
9735 .m(m)
9736 .n(n)
9737 .k(k)
9738 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009739 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009740 }
9741 }
9742 }
9743 }
9744
Marat Dukhande06f492020-04-09 00:19:31 -07009745 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009746 TEST_REQUIRES_ARM_NEON;
9747 for (size_t k = 4; k <= 20; k += 2) {
9748 GemmMicrokernelTester()
9749 .mr(6)
9750 .nr(8)
9751 .kr(1)
9752 .sr(1)
9753 .m(6)
9754 .n(8)
9755 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009756 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009757 }
9758 }
9759
Marat Dukhande06f492020-04-09 00:19:31 -07009760 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009761 TEST_REQUIRES_ARM_NEON;
9762 for (size_t k = 4; k <= 20; k += 2) {
9763 GemmMicrokernelTester()
9764 .mr(6)
9765 .nr(8)
9766 .kr(1)
9767 .sr(1)
9768 .m(6)
9769 .n(8)
9770 .k(k)
9771 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009772 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009773 }
9774 }
9775
Marat Dukhande06f492020-04-09 00:19:31 -07009776 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009777 TEST_REQUIRES_ARM_NEON;
9778 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009779 for (uint32_t n = 1; n <= 8; n++) {
9780 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009781 GemmMicrokernelTester()
9782 .mr(6)
9783 .nr(8)
9784 .kr(1)
9785 .sr(1)
9786 .m(m)
9787 .n(n)
9788 .k(k)
9789 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009790 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009791 }
9792 }
9793 }
9794 }
9795
Marat Dukhande06f492020-04-09 00:19:31 -07009796 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009797 TEST_REQUIRES_ARM_NEON;
9798 for (uint32_t n = 9; n < 16; n++) {
9799 for (size_t k = 1; k <= 10; k += 3) {
9800 GemmMicrokernelTester()
9801 .mr(6)
9802 .nr(8)
9803 .kr(1)
9804 .sr(1)
9805 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009806 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009807 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009808 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009809 }
9810 }
9811 }
9812
Marat Dukhande06f492020-04-09 00:19:31 -07009813 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009814 TEST_REQUIRES_ARM_NEON;
9815 for (uint32_t n = 9; n < 16; n++) {
9816 for (size_t k = 1; k <= 10; k += 3) {
9817 GemmMicrokernelTester()
9818 .mr(6)
9819 .nr(8)
9820 .kr(1)
9821 .sr(1)
9822 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009823 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009824 .k(k)
9825 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009826 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009827 }
9828 }
9829 }
9830
Marat Dukhande06f492020-04-09 00:19:31 -07009831 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009832 TEST_REQUIRES_ARM_NEON;
9833 for (uint32_t n = 9; n < 16; n++) {
9834 for (size_t k = 1; k <= 10; k += 3) {
9835 GemmMicrokernelTester()
9836 .mr(6)
9837 .nr(8)
9838 .kr(1)
9839 .sr(1)
9840 .m(6)
9841 .n(n)
9842 .k(k)
9843 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009844 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009845 }
9846 }
9847 }
9848
Marat Dukhande06f492020-04-09 00:19:31 -07009849 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009850 TEST_REQUIRES_ARM_NEON;
9851 for (uint32_t n = 9; n < 16; n++) {
9852 for (size_t k = 1; k <= 10; k += 3) {
9853 for (uint32_t m = 1; m <= 6; m++) {
9854 GemmMicrokernelTester()
9855 .mr(6)
9856 .nr(8)
9857 .kr(1)
9858 .sr(1)
9859 .m(m)
9860 .n(n)
9861 .k(k)
9862 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009863 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009864 }
9865 }
9866 }
9867 }
9868
Marat Dukhande06f492020-04-09 00:19:31 -07009869 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009870 TEST_REQUIRES_ARM_NEON;
9871 for (uint32_t n = 16; n <= 24; n += 8) {
9872 for (size_t k = 1; k <= 10; k += 3) {
9873 GemmMicrokernelTester()
9874 .mr(6)
9875 .nr(8)
9876 .kr(1)
9877 .sr(1)
9878 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009879 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009880 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009881 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009882 }
9883 }
9884 }
9885
Marat Dukhande06f492020-04-09 00:19:31 -07009886 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009887 TEST_REQUIRES_ARM_NEON;
9888 for (uint32_t n = 16; n <= 24; n += 8) {
9889 for (size_t k = 1; k <= 10; k += 3) {
9890 GemmMicrokernelTester()
9891 .mr(6)
9892 .nr(8)
9893 .kr(1)
9894 .sr(1)
9895 .m(6)
9896 .n(n)
9897 .k(k)
9898 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009899 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009900 }
9901 }
9902 }
9903
Marat Dukhande06f492020-04-09 00:19:31 -07009904 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009905 TEST_REQUIRES_ARM_NEON;
9906 for (uint32_t n = 16; n <= 24; n += 8) {
9907 for (size_t k = 1; k <= 10; k += 3) {
9908 GemmMicrokernelTester()
9909 .mr(6)
9910 .nr(8)
9911 .kr(1)
9912 .sr(1)
9913 .m(6)
9914 .n(n)
9915 .k(k)
9916 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009917 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009918 }
9919 }
9920 }
9921
Marat Dukhande06f492020-04-09 00:19:31 -07009922 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009923 TEST_REQUIRES_ARM_NEON;
9924 for (uint32_t n = 16; n <= 24; n += 8) {
9925 for (size_t k = 1; k <= 10; k += 3) {
9926 for (uint32_t m = 1; m <= 6; m++) {
9927 GemmMicrokernelTester()
9928 .mr(6)
9929 .nr(8)
9930 .kr(1)
9931 .sr(1)
9932 .m(m)
9933 .n(n)
9934 .k(k)
9935 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009936 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009937 }
9938 }
9939 }
9940 }
9941
Marat Dukhande06f492020-04-09 00:19:31 -07009942 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009943 TEST_REQUIRES_ARM_NEON;
9944 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009945 for (uint32_t n = 1; n <= 8; n++) {
9946 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009947 GemmMicrokernelTester()
9948 .mr(6)
9949 .nr(8)
9950 .kr(1)
9951 .sr(1)
9952 .m(m)
9953 .n(n)
9954 .k(k)
9955 .cm_stride(11)
9956 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009957 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009958 }
9959 }
9960 }
9961 }
9962
Marat Dukhande06f492020-04-09 00:19:31 -07009963 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009964 TEST_REQUIRES_ARM_NEON;
9965 GemmMicrokernelTester()
9966 .mr(6)
9967 .nr(8)
9968 .kr(1)
9969 .sr(1)
9970 .m(6)
9971 .n(8)
9972 .k(2)
9973 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009974 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009975 }
9976
Marat Dukhande06f492020-04-09 00:19:31 -07009977 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009978 TEST_REQUIRES_ARM_NEON;
9979 GemmMicrokernelTester()
9980 .mr(6)
9981 .nr(8)
9982 .kr(1)
9983 .sr(1)
9984 .m(6)
9985 .n(8)
9986 .k(2)
9987 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009988 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009989 }
9990
Marat Dukhande06f492020-04-09 00:19:31 -07009991 TEST(F32_GEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009992 TEST_REQUIRES_ARM_NEON;
9993 GemmMicrokernelTester()
9994 .mr(6)
9995 .nr(8)
9996 .kr(1)
9997 .sr(1)
9998 .m(6)
9999 .n(8)
10000 .k(2)
10001 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010002 .Test(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010003 }
10004#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10005
10006
10007#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010008 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010009 TEST_REQUIRES_ARM_NEON_FMA;
10010 GemmMicrokernelTester()
10011 .mr(4)
10012 .nr(8)
10013 .kr(1)
10014 .sr(1)
10015 .m(4)
10016 .n(8)
10017 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010018 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010019 }
10020
Marat Dukhande06f492020-04-09 00:19:31 -070010021 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010022 TEST_REQUIRES_ARM_NEON_FMA;
10023 GemmMicrokernelTester()
10024 .mr(4)
10025 .nr(8)
10026 .kr(1)
10027 .sr(1)
10028 .m(4)
10029 .n(8)
10030 .k(2)
10031 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010032 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010033 }
10034
Marat Dukhande06f492020-04-09 00:19:31 -070010035 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010036 TEST_REQUIRES_ARM_NEON_FMA;
10037 GemmMicrokernelTester()
10038 .mr(4)
10039 .nr(8)
10040 .kr(1)
10041 .sr(1)
10042 .m(4)
10043 .n(8)
10044 .k(2)
10045 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010046 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010047 }
10048
Marat Dukhande06f492020-04-09 00:19:31 -070010049 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010050 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010051 for (uint32_t n = 1; n <= 8; n++) {
10052 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010053 GemmMicrokernelTester()
10054 .mr(4)
10055 .nr(8)
10056 .kr(1)
10057 .sr(1)
10058 .m(m)
10059 .n(n)
10060 .k(2)
10061 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010062 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010063 }
10064 }
10065 }
10066
Marat Dukhande06f492020-04-09 00:19:31 -070010067 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010068 TEST_REQUIRES_ARM_NEON_FMA;
10069 for (uint32_t m = 1; m <= 4; m++) {
10070 GemmMicrokernelTester()
10071 .mr(4)
10072 .nr(8)
10073 .kr(1)
10074 .sr(1)
10075 .m(m)
10076 .n(8)
10077 .k(2)
10078 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010079 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010080 }
10081 }
10082
Marat Dukhande06f492020-04-09 00:19:31 -070010083 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010084 TEST_REQUIRES_ARM_NEON_FMA;
10085 for (uint32_t n = 1; n <= 8; n++) {
10086 GemmMicrokernelTester()
10087 .mr(4)
10088 .nr(8)
10089 .kr(1)
10090 .sr(1)
10091 .m(4)
10092 .n(n)
10093 .k(2)
10094 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010095 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010096 }
10097 }
10098
Marat Dukhande06f492020-04-09 00:19:31 -070010099 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010100 TEST_REQUIRES_ARM_NEON_FMA;
10101 for (size_t k = 1; k < 2; k++) {
10102 GemmMicrokernelTester()
10103 .mr(4)
10104 .nr(8)
10105 .kr(1)
10106 .sr(1)
10107 .m(4)
10108 .n(8)
10109 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010110 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010111 }
10112 }
10113
Marat Dukhande06f492020-04-09 00:19:31 -070010114 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010115 TEST_REQUIRES_ARM_NEON_FMA;
10116 for (size_t k = 1; k < 2; k++) {
10117 GemmMicrokernelTester()
10118 .mr(4)
10119 .nr(8)
10120 .kr(1)
10121 .sr(1)
10122 .m(4)
10123 .n(8)
10124 .k(k)
10125 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010126 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010127 }
10128 }
10129
Marat Dukhande06f492020-04-09 00:19:31 -070010130 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010131 TEST_REQUIRES_ARM_NEON_FMA;
10132 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010133 for (uint32_t n = 1; n <= 8; n++) {
10134 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010135 GemmMicrokernelTester()
10136 .mr(4)
10137 .nr(8)
10138 .kr(1)
10139 .sr(1)
10140 .m(m)
10141 .n(n)
10142 .k(k)
10143 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010144 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010145 }
10146 }
10147 }
10148 }
10149
Marat Dukhande06f492020-04-09 00:19:31 -070010150 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010151 TEST_REQUIRES_ARM_NEON_FMA;
10152 for (size_t k = 3; k < 4; k++) {
10153 GemmMicrokernelTester()
10154 .mr(4)
10155 .nr(8)
10156 .kr(1)
10157 .sr(1)
10158 .m(4)
10159 .n(8)
10160 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010161 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010162 }
10163 }
10164
Marat Dukhande06f492020-04-09 00:19:31 -070010165 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010166 TEST_REQUIRES_ARM_NEON_FMA;
10167 for (size_t k = 3; k < 4; k++) {
10168 GemmMicrokernelTester()
10169 .mr(4)
10170 .nr(8)
10171 .kr(1)
10172 .sr(1)
10173 .m(4)
10174 .n(8)
10175 .k(k)
10176 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010177 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010178 }
10179 }
10180
Marat Dukhande06f492020-04-09 00:19:31 -070010181 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010182 TEST_REQUIRES_ARM_NEON_FMA;
10183 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010184 for (uint32_t n = 1; n <= 8; n++) {
10185 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010186 GemmMicrokernelTester()
10187 .mr(4)
10188 .nr(8)
10189 .kr(1)
10190 .sr(1)
10191 .m(m)
10192 .n(n)
10193 .k(k)
10194 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010195 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010196 }
10197 }
10198 }
10199 }
10200
Marat Dukhande06f492020-04-09 00:19:31 -070010201 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010202 TEST_REQUIRES_ARM_NEON_FMA;
10203 for (size_t k = 4; k <= 20; k += 2) {
10204 GemmMicrokernelTester()
10205 .mr(4)
10206 .nr(8)
10207 .kr(1)
10208 .sr(1)
10209 .m(4)
10210 .n(8)
10211 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010212 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010213 }
10214 }
10215
Marat Dukhande06f492020-04-09 00:19:31 -070010216 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010217 TEST_REQUIRES_ARM_NEON_FMA;
10218 for (size_t k = 4; k <= 20; k += 2) {
10219 GemmMicrokernelTester()
10220 .mr(4)
10221 .nr(8)
10222 .kr(1)
10223 .sr(1)
10224 .m(4)
10225 .n(8)
10226 .k(k)
10227 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010228 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010229 }
10230 }
10231
Marat Dukhande06f492020-04-09 00:19:31 -070010232 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010233 TEST_REQUIRES_ARM_NEON_FMA;
10234 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010235 for (uint32_t n = 1; n <= 8; n++) {
10236 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010237 GemmMicrokernelTester()
10238 .mr(4)
10239 .nr(8)
10240 .kr(1)
10241 .sr(1)
10242 .m(m)
10243 .n(n)
10244 .k(k)
10245 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010246 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010247 }
10248 }
10249 }
10250 }
10251
Marat Dukhande06f492020-04-09 00:19:31 -070010252 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010253 TEST_REQUIRES_ARM_NEON_FMA;
10254 for (uint32_t n = 9; n < 16; n++) {
10255 for (size_t k = 1; k <= 10; k += 3) {
10256 GemmMicrokernelTester()
10257 .mr(4)
10258 .nr(8)
10259 .kr(1)
10260 .sr(1)
10261 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010262 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010263 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010264 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010265 }
10266 }
10267 }
10268
Marat Dukhande06f492020-04-09 00:19:31 -070010269 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010270 TEST_REQUIRES_ARM_NEON_FMA;
10271 for (uint32_t n = 9; n < 16; n++) {
10272 for (size_t k = 1; k <= 10; k += 3) {
10273 GemmMicrokernelTester()
10274 .mr(4)
10275 .nr(8)
10276 .kr(1)
10277 .sr(1)
10278 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010279 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010280 .k(k)
10281 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010282 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010283 }
10284 }
10285 }
10286
Marat Dukhande06f492020-04-09 00:19:31 -070010287 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010288 TEST_REQUIRES_ARM_NEON_FMA;
10289 for (uint32_t n = 9; n < 16; n++) {
10290 for (size_t k = 1; k <= 10; k += 3) {
10291 GemmMicrokernelTester()
10292 .mr(4)
10293 .nr(8)
10294 .kr(1)
10295 .sr(1)
10296 .m(4)
10297 .n(n)
10298 .k(k)
10299 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010300 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010301 }
10302 }
10303 }
10304
Marat Dukhande06f492020-04-09 00:19:31 -070010305 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010306 TEST_REQUIRES_ARM_NEON_FMA;
10307 for (uint32_t n = 9; n < 16; n++) {
10308 for (size_t k = 1; k <= 10; k += 3) {
10309 for (uint32_t m = 1; m <= 4; m++) {
10310 GemmMicrokernelTester()
10311 .mr(4)
10312 .nr(8)
10313 .kr(1)
10314 .sr(1)
10315 .m(m)
10316 .n(n)
10317 .k(k)
10318 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010319 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010320 }
10321 }
10322 }
10323 }
10324
Marat Dukhande06f492020-04-09 00:19:31 -070010325 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010326 TEST_REQUIRES_ARM_NEON_FMA;
10327 for (uint32_t n = 16; n <= 24; n += 8) {
10328 for (size_t k = 1; k <= 10; k += 3) {
10329 GemmMicrokernelTester()
10330 .mr(4)
10331 .nr(8)
10332 .kr(1)
10333 .sr(1)
10334 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010335 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010336 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010337 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010338 }
10339 }
10340 }
10341
Marat Dukhande06f492020-04-09 00:19:31 -070010342 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010343 TEST_REQUIRES_ARM_NEON_FMA;
10344 for (uint32_t n = 16; n <= 24; n += 8) {
10345 for (size_t k = 1; k <= 10; k += 3) {
10346 GemmMicrokernelTester()
10347 .mr(4)
10348 .nr(8)
10349 .kr(1)
10350 .sr(1)
10351 .m(4)
10352 .n(n)
10353 .k(k)
10354 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010355 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010356 }
10357 }
10358 }
10359
Marat Dukhande06f492020-04-09 00:19:31 -070010360 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010361 TEST_REQUIRES_ARM_NEON_FMA;
10362 for (uint32_t n = 16; n <= 24; n += 8) {
10363 for (size_t k = 1; k <= 10; k += 3) {
10364 GemmMicrokernelTester()
10365 .mr(4)
10366 .nr(8)
10367 .kr(1)
10368 .sr(1)
10369 .m(4)
10370 .n(n)
10371 .k(k)
10372 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010373 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010374 }
10375 }
10376 }
10377
Marat Dukhande06f492020-04-09 00:19:31 -070010378 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010379 TEST_REQUIRES_ARM_NEON_FMA;
10380 for (uint32_t n = 16; n <= 24; n += 8) {
10381 for (size_t k = 1; k <= 10; k += 3) {
10382 for (uint32_t m = 1; m <= 4; m++) {
10383 GemmMicrokernelTester()
10384 .mr(4)
10385 .nr(8)
10386 .kr(1)
10387 .sr(1)
10388 .m(m)
10389 .n(n)
10390 .k(k)
10391 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010392 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010393 }
10394 }
10395 }
10396 }
10397
Marat Dukhande06f492020-04-09 00:19:31 -070010398 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010399 TEST_REQUIRES_ARM_NEON_FMA;
10400 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010401 for (uint32_t n = 1; n <= 8; n++) {
10402 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010403 GemmMicrokernelTester()
10404 .mr(4)
10405 .nr(8)
10406 .kr(1)
10407 .sr(1)
10408 .m(m)
10409 .n(n)
10410 .k(k)
10411 .cm_stride(11)
10412 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010413 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010414 }
10415 }
10416 }
10417 }
10418
Marat Dukhande06f492020-04-09 00:19:31 -070010419 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010420 TEST_REQUIRES_ARM_NEON_FMA;
10421 GemmMicrokernelTester()
10422 .mr(4)
10423 .nr(8)
10424 .kr(1)
10425 .sr(1)
10426 .m(4)
10427 .n(8)
10428 .k(2)
10429 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010430 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010431 }
10432
Marat Dukhande06f492020-04-09 00:19:31 -070010433 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010434 TEST_REQUIRES_ARM_NEON_FMA;
10435 GemmMicrokernelTester()
10436 .mr(4)
10437 .nr(8)
10438 .kr(1)
10439 .sr(1)
10440 .m(4)
10441 .n(8)
10442 .k(2)
10443 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010444 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010445 }
10446
Marat Dukhande06f492020-04-09 00:19:31 -070010447 TEST(F32_GEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010448 TEST_REQUIRES_ARM_NEON_FMA;
10449 GemmMicrokernelTester()
10450 .mr(4)
10451 .nr(8)
10452 .kr(1)
10453 .sr(1)
10454 .m(4)
10455 .n(8)
10456 .k(2)
10457 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010458 .Test(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010459 }
10460#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10461
10462
10463#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010464 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010465 TEST_REQUIRES_ARM_NEON;
10466 GemmMicrokernelTester()
10467 .mr(1)
10468 .nr(8)
10469 .kr(1)
10470 .sr(4)
10471 .m(1)
10472 .n(8)
10473 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010474 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010475 }
10476
Marat Dukhande06f492020-04-09 00:19:31 -070010477 TEST(F32_GEMM_MINMAX_1X8S4__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010478 TEST_REQUIRES_ARM_NEON;
10479 GemmMicrokernelTester()
10480 .mr(1)
10481 .nr(8)
10482 .kr(1)
10483 .sr(4)
10484 .m(1)
10485 .n(8)
10486 .k(4)
10487 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010488 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010489 }
10490
Marat Dukhande06f492020-04-09 00:19:31 -070010491 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010492 TEST_REQUIRES_ARM_NEON;
10493 GemmMicrokernelTester()
10494 .mr(1)
10495 .nr(8)
10496 .kr(1)
10497 .sr(4)
10498 .m(1)
10499 .n(8)
10500 .k(4)
10501 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010502 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010503 }
10504
Marat Dukhande06f492020-04-09 00:19:31 -070010505 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010506 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010507 for (uint32_t n = 1; n <= 8; n++) {
10508 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010509 GemmMicrokernelTester()
10510 .mr(1)
10511 .nr(8)
10512 .kr(1)
10513 .sr(4)
10514 .m(m)
10515 .n(n)
10516 .k(4)
10517 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010518 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010519 }
10520 }
10521 }
10522
Marat Dukhande06f492020-04-09 00:19:31 -070010523 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010524 TEST_REQUIRES_ARM_NEON;
10525 for (uint32_t m = 1; m <= 1; m++) {
10526 GemmMicrokernelTester()
10527 .mr(1)
10528 .nr(8)
10529 .kr(1)
10530 .sr(4)
10531 .m(m)
10532 .n(8)
10533 .k(4)
10534 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010535 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010536 }
10537 }
10538
Marat Dukhande06f492020-04-09 00:19:31 -070010539 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010540 TEST_REQUIRES_ARM_NEON;
10541 for (uint32_t n = 1; n <= 8; n++) {
10542 GemmMicrokernelTester()
10543 .mr(1)
10544 .nr(8)
10545 .kr(1)
10546 .sr(4)
10547 .m(1)
10548 .n(n)
10549 .k(4)
10550 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010551 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010552 }
10553 }
10554
Marat Dukhande06f492020-04-09 00:19:31 -070010555 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010556 TEST_REQUIRES_ARM_NEON;
10557 for (size_t k = 1; k < 4; k++) {
10558 GemmMicrokernelTester()
10559 .mr(1)
10560 .nr(8)
10561 .kr(1)
10562 .sr(4)
10563 .m(1)
10564 .n(8)
10565 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010566 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010567 }
10568 }
10569
Marat Dukhande06f492020-04-09 00:19:31 -070010570 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010571 TEST_REQUIRES_ARM_NEON;
10572 for (size_t k = 1; k < 4; k++) {
10573 GemmMicrokernelTester()
10574 .mr(1)
10575 .nr(8)
10576 .kr(1)
10577 .sr(4)
10578 .m(1)
10579 .n(8)
10580 .k(k)
10581 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010582 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010583 }
10584 }
10585
Marat Dukhande06f492020-04-09 00:19:31 -070010586 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010587 TEST_REQUIRES_ARM_NEON;
10588 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010589 for (uint32_t n = 1; n <= 8; n++) {
10590 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010591 GemmMicrokernelTester()
10592 .mr(1)
10593 .nr(8)
10594 .kr(1)
10595 .sr(4)
10596 .m(m)
10597 .n(n)
10598 .k(k)
10599 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010600 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010601 }
10602 }
10603 }
10604 }
10605
Marat Dukhande06f492020-04-09 00:19:31 -070010606 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010607 TEST_REQUIRES_ARM_NEON;
10608 for (size_t k = 5; k < 8; k++) {
10609 GemmMicrokernelTester()
10610 .mr(1)
10611 .nr(8)
10612 .kr(1)
10613 .sr(4)
10614 .m(1)
10615 .n(8)
10616 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010617 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010618 }
10619 }
10620
Marat Dukhande06f492020-04-09 00:19:31 -070010621 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010622 TEST_REQUIRES_ARM_NEON;
10623 for (size_t k = 5; k < 8; k++) {
10624 GemmMicrokernelTester()
10625 .mr(1)
10626 .nr(8)
10627 .kr(1)
10628 .sr(4)
10629 .m(1)
10630 .n(8)
10631 .k(k)
10632 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010633 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010634 }
10635 }
10636
Marat Dukhande06f492020-04-09 00:19:31 -070010637 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010638 TEST_REQUIRES_ARM_NEON;
10639 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010640 for (uint32_t n = 1; n <= 8; n++) {
10641 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010642 GemmMicrokernelTester()
10643 .mr(1)
10644 .nr(8)
10645 .kr(1)
10646 .sr(4)
10647 .m(m)
10648 .n(n)
10649 .k(k)
10650 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010651 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010652 }
10653 }
10654 }
10655 }
10656
Marat Dukhande06f492020-04-09 00:19:31 -070010657 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010658 TEST_REQUIRES_ARM_NEON;
10659 for (size_t k = 8; k <= 40; k += 4) {
10660 GemmMicrokernelTester()
10661 .mr(1)
10662 .nr(8)
10663 .kr(1)
10664 .sr(4)
10665 .m(1)
10666 .n(8)
10667 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010668 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010669 }
10670 }
10671
Marat Dukhande06f492020-04-09 00:19:31 -070010672 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010673 TEST_REQUIRES_ARM_NEON;
10674 for (size_t k = 8; k <= 40; k += 4) {
10675 GemmMicrokernelTester()
10676 .mr(1)
10677 .nr(8)
10678 .kr(1)
10679 .sr(4)
10680 .m(1)
10681 .n(8)
10682 .k(k)
10683 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010684 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010685 }
10686 }
10687
Marat Dukhande06f492020-04-09 00:19:31 -070010688 TEST(F32_GEMM_MINMAX_1X8S4__NEON, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010689 TEST_REQUIRES_ARM_NEON;
10690 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010691 for (uint32_t n = 1; n <= 8; n++) {
10692 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010693 GemmMicrokernelTester()
10694 .mr(1)
10695 .nr(8)
10696 .kr(1)
10697 .sr(4)
10698 .m(m)
10699 .n(n)
10700 .k(k)
10701 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010702 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010703 }
10704 }
10705 }
10706 }
10707
Marat Dukhande06f492020-04-09 00:19:31 -070010708 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010709 TEST_REQUIRES_ARM_NEON;
10710 for (uint32_t n = 9; n < 16; n++) {
10711 for (size_t k = 1; k <= 20; k += 5) {
10712 GemmMicrokernelTester()
10713 .mr(1)
10714 .nr(8)
10715 .kr(1)
10716 .sr(4)
10717 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010718 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010719 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010720 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010721 }
10722 }
10723 }
10724
Marat Dukhande06f492020-04-09 00:19:31 -070010725 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010726 TEST_REQUIRES_ARM_NEON;
10727 for (uint32_t n = 9; n < 16; n++) {
10728 for (size_t k = 1; k <= 20; k += 5) {
10729 GemmMicrokernelTester()
10730 .mr(1)
10731 .nr(8)
10732 .kr(1)
10733 .sr(4)
10734 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010735 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010736 .k(k)
10737 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010738 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010739 }
10740 }
10741 }
10742
Marat Dukhande06f492020-04-09 00:19:31 -070010743 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010744 TEST_REQUIRES_ARM_NEON;
10745 for (uint32_t n = 9; n < 16; n++) {
10746 for (size_t k = 1; k <= 20; k += 5) {
10747 GemmMicrokernelTester()
10748 .mr(1)
10749 .nr(8)
10750 .kr(1)
10751 .sr(4)
10752 .m(1)
10753 .n(n)
10754 .k(k)
10755 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010756 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010757 }
10758 }
10759 }
10760
Marat Dukhande06f492020-04-09 00:19:31 -070010761 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010762 TEST_REQUIRES_ARM_NEON;
10763 for (uint32_t n = 9; n < 16; n++) {
10764 for (size_t k = 1; k <= 20; k += 5) {
10765 for (uint32_t m = 1; m <= 1; m++) {
10766 GemmMicrokernelTester()
10767 .mr(1)
10768 .nr(8)
10769 .kr(1)
10770 .sr(4)
10771 .m(m)
10772 .n(n)
10773 .k(k)
10774 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010775 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010776 }
10777 }
10778 }
10779 }
10780
Marat Dukhande06f492020-04-09 00:19:31 -070010781 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010782 TEST_REQUIRES_ARM_NEON;
10783 for (uint32_t n = 16; n <= 24; n += 8) {
10784 for (size_t k = 1; k <= 20; k += 5) {
10785 GemmMicrokernelTester()
10786 .mr(1)
10787 .nr(8)
10788 .kr(1)
10789 .sr(4)
10790 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010791 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010792 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010793 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010794 }
10795 }
10796 }
10797
Marat Dukhande06f492020-04-09 00:19:31 -070010798 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010799 TEST_REQUIRES_ARM_NEON;
10800 for (uint32_t n = 16; n <= 24; n += 8) {
10801 for (size_t k = 1; k <= 20; k += 5) {
10802 GemmMicrokernelTester()
10803 .mr(1)
10804 .nr(8)
10805 .kr(1)
10806 .sr(4)
10807 .m(1)
10808 .n(n)
10809 .k(k)
10810 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010811 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010812 }
10813 }
10814 }
10815
Marat Dukhande06f492020-04-09 00:19:31 -070010816 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010817 TEST_REQUIRES_ARM_NEON;
10818 for (uint32_t n = 16; n <= 24; n += 8) {
10819 for (size_t k = 1; k <= 20; k += 5) {
10820 GemmMicrokernelTester()
10821 .mr(1)
10822 .nr(8)
10823 .kr(1)
10824 .sr(4)
10825 .m(1)
10826 .n(n)
10827 .k(k)
10828 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010829 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010830 }
10831 }
10832 }
10833
Marat Dukhande06f492020-04-09 00:19:31 -070010834 TEST(F32_GEMM_MINMAX_1X8S4__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010835 TEST_REQUIRES_ARM_NEON;
10836 for (uint32_t n = 16; n <= 24; n += 8) {
10837 for (size_t k = 1; k <= 20; k += 5) {
10838 for (uint32_t m = 1; m <= 1; m++) {
10839 GemmMicrokernelTester()
10840 .mr(1)
10841 .nr(8)
10842 .kr(1)
10843 .sr(4)
10844 .m(m)
10845 .n(n)
10846 .k(k)
10847 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010848 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010849 }
10850 }
10851 }
10852 }
10853
Marat Dukhande06f492020-04-09 00:19:31 -070010854 TEST(F32_GEMM_MINMAX_1X8S4__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010855 TEST_REQUIRES_ARM_NEON;
10856 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010857 for (uint32_t n = 1; n <= 8; n++) {
10858 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010859 GemmMicrokernelTester()
10860 .mr(1)
10861 .nr(8)
10862 .kr(1)
10863 .sr(4)
10864 .m(m)
10865 .n(n)
10866 .k(k)
10867 .cm_stride(11)
10868 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010869 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010870 }
10871 }
10872 }
10873 }
10874
Marat Dukhande06f492020-04-09 00:19:31 -070010875 TEST(F32_GEMM_MINMAX_1X8S4__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010876 TEST_REQUIRES_ARM_NEON;
10877 GemmMicrokernelTester()
10878 .mr(1)
10879 .nr(8)
10880 .kr(1)
10881 .sr(4)
10882 .m(1)
10883 .n(8)
10884 .k(4)
10885 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010886 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010887 }
10888
Marat Dukhande06f492020-04-09 00:19:31 -070010889 TEST(F32_GEMM_MINMAX_1X8S4__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010890 TEST_REQUIRES_ARM_NEON;
10891 GemmMicrokernelTester()
10892 .mr(1)
10893 .nr(8)
10894 .kr(1)
10895 .sr(4)
10896 .m(1)
10897 .n(8)
10898 .k(4)
10899 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010900 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010901 }
10902
Marat Dukhande06f492020-04-09 00:19:31 -070010903 TEST(F32_GEMM_MINMAX_1X8S4__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010904 TEST_REQUIRES_ARM_NEON;
10905 GemmMicrokernelTester()
10906 .mr(1)
10907 .nr(8)
10908 .kr(1)
10909 .sr(4)
10910 .m(1)
10911 .n(8)
10912 .k(4)
10913 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010914 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010915 }
10916#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10917
10918
10919#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010920 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010921 TEST_REQUIRES_ARM_NEON;
10922 GemmMicrokernelTester()
10923 .mr(4)
10924 .nr(8)
10925 .kr(1)
10926 .sr(4)
10927 .m(4)
10928 .n(8)
10929 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010930 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010931 }
10932
Marat Dukhande06f492020-04-09 00:19:31 -070010933 TEST(F32_GEMM_MINMAX_4X8S4__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010934 TEST_REQUIRES_ARM_NEON;
10935 GemmMicrokernelTester()
10936 .mr(4)
10937 .nr(8)
10938 .kr(1)
10939 .sr(4)
10940 .m(4)
10941 .n(8)
10942 .k(4)
10943 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010944 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010945 }
10946
Marat Dukhande06f492020-04-09 00:19:31 -070010947 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010948 TEST_REQUIRES_ARM_NEON;
10949 GemmMicrokernelTester()
10950 .mr(4)
10951 .nr(8)
10952 .kr(1)
10953 .sr(4)
10954 .m(4)
10955 .n(8)
10956 .k(4)
10957 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010958 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010959 }
10960
Marat Dukhande06f492020-04-09 00:19:31 -070010961 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010962 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010963 for (uint32_t n = 1; n <= 8; n++) {
10964 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010965 GemmMicrokernelTester()
10966 .mr(4)
10967 .nr(8)
10968 .kr(1)
10969 .sr(4)
10970 .m(m)
10971 .n(n)
10972 .k(4)
10973 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010974 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010975 }
10976 }
10977 }
10978
Marat Dukhande06f492020-04-09 00:19:31 -070010979 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010980 TEST_REQUIRES_ARM_NEON;
10981 for (uint32_t m = 1; m <= 4; m++) {
10982 GemmMicrokernelTester()
10983 .mr(4)
10984 .nr(8)
10985 .kr(1)
10986 .sr(4)
10987 .m(m)
10988 .n(8)
10989 .k(4)
10990 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010991 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010992 }
10993 }
10994
Marat Dukhande06f492020-04-09 00:19:31 -070010995 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010996 TEST_REQUIRES_ARM_NEON;
10997 for (uint32_t n = 1; n <= 8; n++) {
10998 GemmMicrokernelTester()
10999 .mr(4)
11000 .nr(8)
11001 .kr(1)
11002 .sr(4)
11003 .m(4)
11004 .n(n)
11005 .k(4)
11006 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011007 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011008 }
11009 }
11010
Marat Dukhande06f492020-04-09 00:19:31 -070011011 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011012 TEST_REQUIRES_ARM_NEON;
11013 for (size_t k = 1; k < 4; k++) {
11014 GemmMicrokernelTester()
11015 .mr(4)
11016 .nr(8)
11017 .kr(1)
11018 .sr(4)
11019 .m(4)
11020 .n(8)
11021 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011022 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011023 }
11024 }
11025
Marat Dukhande06f492020-04-09 00:19:31 -070011026 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011027 TEST_REQUIRES_ARM_NEON;
11028 for (size_t k = 1; k < 4; k++) {
11029 GemmMicrokernelTester()
11030 .mr(4)
11031 .nr(8)
11032 .kr(1)
11033 .sr(4)
11034 .m(4)
11035 .n(8)
11036 .k(k)
11037 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011038 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011039 }
11040 }
11041
Marat Dukhande06f492020-04-09 00:19:31 -070011042 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011043 TEST_REQUIRES_ARM_NEON;
11044 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011045 for (uint32_t n = 1; n <= 8; n++) {
11046 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011047 GemmMicrokernelTester()
11048 .mr(4)
11049 .nr(8)
11050 .kr(1)
11051 .sr(4)
11052 .m(m)
11053 .n(n)
11054 .k(k)
11055 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011056 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011057 }
11058 }
11059 }
11060 }
11061
Marat Dukhande06f492020-04-09 00:19:31 -070011062 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011063 TEST_REQUIRES_ARM_NEON;
11064 for (size_t k = 5; k < 8; k++) {
11065 GemmMicrokernelTester()
11066 .mr(4)
11067 .nr(8)
11068 .kr(1)
11069 .sr(4)
11070 .m(4)
11071 .n(8)
11072 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011073 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011074 }
11075 }
11076
Marat Dukhande06f492020-04-09 00:19:31 -070011077 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011078 TEST_REQUIRES_ARM_NEON;
11079 for (size_t k = 5; k < 8; k++) {
11080 GemmMicrokernelTester()
11081 .mr(4)
11082 .nr(8)
11083 .kr(1)
11084 .sr(4)
11085 .m(4)
11086 .n(8)
11087 .k(k)
11088 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011089 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011090 }
11091 }
11092
Marat Dukhande06f492020-04-09 00:19:31 -070011093 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011094 TEST_REQUIRES_ARM_NEON;
11095 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011096 for (uint32_t n = 1; n <= 8; n++) {
11097 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011098 GemmMicrokernelTester()
11099 .mr(4)
11100 .nr(8)
11101 .kr(1)
11102 .sr(4)
11103 .m(m)
11104 .n(n)
11105 .k(k)
11106 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011107 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011108 }
11109 }
11110 }
11111 }
11112
Marat Dukhande06f492020-04-09 00:19:31 -070011113 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011114 TEST_REQUIRES_ARM_NEON;
11115 for (size_t k = 8; k <= 40; k += 4) {
11116 GemmMicrokernelTester()
11117 .mr(4)
11118 .nr(8)
11119 .kr(1)
11120 .sr(4)
11121 .m(4)
11122 .n(8)
11123 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011124 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011125 }
11126 }
11127
Marat Dukhande06f492020-04-09 00:19:31 -070011128 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011129 TEST_REQUIRES_ARM_NEON;
11130 for (size_t k = 8; k <= 40; k += 4) {
11131 GemmMicrokernelTester()
11132 .mr(4)
11133 .nr(8)
11134 .kr(1)
11135 .sr(4)
11136 .m(4)
11137 .n(8)
11138 .k(k)
11139 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011140 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011141 }
11142 }
11143
Marat Dukhande06f492020-04-09 00:19:31 -070011144 TEST(F32_GEMM_MINMAX_4X8S4__NEON, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011145 TEST_REQUIRES_ARM_NEON;
11146 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011147 for (uint32_t n = 1; n <= 8; n++) {
11148 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011149 GemmMicrokernelTester()
11150 .mr(4)
11151 .nr(8)
11152 .kr(1)
11153 .sr(4)
11154 .m(m)
11155 .n(n)
11156 .k(k)
11157 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011158 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011159 }
11160 }
11161 }
11162 }
11163
Marat Dukhande06f492020-04-09 00:19:31 -070011164 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011165 TEST_REQUIRES_ARM_NEON;
11166 for (uint32_t n = 9; n < 16; n++) {
11167 for (size_t k = 1; k <= 20; k += 5) {
11168 GemmMicrokernelTester()
11169 .mr(4)
11170 .nr(8)
11171 .kr(1)
11172 .sr(4)
11173 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011174 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011175 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011176 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011177 }
11178 }
11179 }
11180
Marat Dukhande06f492020-04-09 00:19:31 -070011181 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011182 TEST_REQUIRES_ARM_NEON;
11183 for (uint32_t n = 9; n < 16; n++) {
11184 for (size_t k = 1; k <= 20; k += 5) {
11185 GemmMicrokernelTester()
11186 .mr(4)
11187 .nr(8)
11188 .kr(1)
11189 .sr(4)
11190 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011191 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011192 .k(k)
11193 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011194 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011195 }
11196 }
11197 }
11198
Marat Dukhande06f492020-04-09 00:19:31 -070011199 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011200 TEST_REQUIRES_ARM_NEON;
11201 for (uint32_t n = 9; n < 16; n++) {
11202 for (size_t k = 1; k <= 20; k += 5) {
11203 GemmMicrokernelTester()
11204 .mr(4)
11205 .nr(8)
11206 .kr(1)
11207 .sr(4)
11208 .m(4)
11209 .n(n)
11210 .k(k)
11211 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011212 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011213 }
11214 }
11215 }
11216
Marat Dukhande06f492020-04-09 00:19:31 -070011217 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011218 TEST_REQUIRES_ARM_NEON;
11219 for (uint32_t n = 9; n < 16; n++) {
11220 for (size_t k = 1; k <= 20; k += 5) {
11221 for (uint32_t m = 1; m <= 4; m++) {
11222 GemmMicrokernelTester()
11223 .mr(4)
11224 .nr(8)
11225 .kr(1)
11226 .sr(4)
11227 .m(m)
11228 .n(n)
11229 .k(k)
11230 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011231 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011232 }
11233 }
11234 }
11235 }
11236
Marat Dukhande06f492020-04-09 00:19:31 -070011237 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011238 TEST_REQUIRES_ARM_NEON;
11239 for (uint32_t n = 16; n <= 24; n += 8) {
11240 for (size_t k = 1; k <= 20; k += 5) {
11241 GemmMicrokernelTester()
11242 .mr(4)
11243 .nr(8)
11244 .kr(1)
11245 .sr(4)
11246 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011247 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011248 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011249 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011250 }
11251 }
11252 }
11253
Marat Dukhande06f492020-04-09 00:19:31 -070011254 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011255 TEST_REQUIRES_ARM_NEON;
11256 for (uint32_t n = 16; n <= 24; n += 8) {
11257 for (size_t k = 1; k <= 20; k += 5) {
11258 GemmMicrokernelTester()
11259 .mr(4)
11260 .nr(8)
11261 .kr(1)
11262 .sr(4)
11263 .m(4)
11264 .n(n)
11265 .k(k)
11266 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011267 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011268 }
11269 }
11270 }
11271
Marat Dukhande06f492020-04-09 00:19:31 -070011272 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011273 TEST_REQUIRES_ARM_NEON;
11274 for (uint32_t n = 16; n <= 24; n += 8) {
11275 for (size_t k = 1; k <= 20; k += 5) {
11276 GemmMicrokernelTester()
11277 .mr(4)
11278 .nr(8)
11279 .kr(1)
11280 .sr(4)
11281 .m(4)
11282 .n(n)
11283 .k(k)
11284 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011285 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011286 }
11287 }
11288 }
11289
Marat Dukhande06f492020-04-09 00:19:31 -070011290 TEST(F32_GEMM_MINMAX_4X8S4__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011291 TEST_REQUIRES_ARM_NEON;
11292 for (uint32_t n = 16; n <= 24; n += 8) {
11293 for (size_t k = 1; k <= 20; k += 5) {
11294 for (uint32_t m = 1; m <= 4; m++) {
11295 GemmMicrokernelTester()
11296 .mr(4)
11297 .nr(8)
11298 .kr(1)
11299 .sr(4)
11300 .m(m)
11301 .n(n)
11302 .k(k)
11303 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011304 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011305 }
11306 }
11307 }
11308 }
11309
Marat Dukhande06f492020-04-09 00:19:31 -070011310 TEST(F32_GEMM_MINMAX_4X8S4__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011311 TEST_REQUIRES_ARM_NEON;
11312 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011313 for (uint32_t n = 1; n <= 8; n++) {
11314 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011315 GemmMicrokernelTester()
11316 .mr(4)
11317 .nr(8)
11318 .kr(1)
11319 .sr(4)
11320 .m(m)
11321 .n(n)
11322 .k(k)
11323 .cm_stride(11)
11324 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011325 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011326 }
11327 }
11328 }
11329 }
11330
Marat Dukhande06f492020-04-09 00:19:31 -070011331 TEST(F32_GEMM_MINMAX_4X8S4__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011332 TEST_REQUIRES_ARM_NEON;
11333 GemmMicrokernelTester()
11334 .mr(4)
11335 .nr(8)
11336 .kr(1)
11337 .sr(4)
11338 .m(4)
11339 .n(8)
11340 .k(4)
11341 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011342 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011343 }
11344
Marat Dukhande06f492020-04-09 00:19:31 -070011345 TEST(F32_GEMM_MINMAX_4X8S4__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011346 TEST_REQUIRES_ARM_NEON;
11347 GemmMicrokernelTester()
11348 .mr(4)
11349 .nr(8)
11350 .kr(1)
11351 .sr(4)
11352 .m(4)
11353 .n(8)
11354 .k(4)
11355 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011356 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011357 }
11358
Marat Dukhande06f492020-04-09 00:19:31 -070011359 TEST(F32_GEMM_MINMAX_4X8S4__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011360 TEST_REQUIRES_ARM_NEON;
11361 GemmMicrokernelTester()
11362 .mr(4)
11363 .nr(8)
11364 .kr(1)
11365 .sr(4)
11366 .m(4)
11367 .n(8)
11368 .k(4)
11369 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011370 .Test(xnn_f32_gemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011371 }
11372#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11373
11374
11375#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070011376 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011377 TEST_REQUIRES_ARM_NEON_FMA;
11378 GemmMicrokernelTester()
11379 .mr(1)
11380 .nr(8)
11381 .kr(1)
11382 .sr(4)
11383 .m(1)
11384 .n(8)
11385 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011386 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011387 }
11388
Marat Dukhande06f492020-04-09 00:19:31 -070011389 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011390 TEST_REQUIRES_ARM_NEON_FMA;
11391 GemmMicrokernelTester()
11392 .mr(1)
11393 .nr(8)
11394 .kr(1)
11395 .sr(4)
11396 .m(1)
11397 .n(8)
11398 .k(4)
11399 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011400 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011401 }
11402
Marat Dukhande06f492020-04-09 00:19:31 -070011403 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011404 TEST_REQUIRES_ARM_NEON_FMA;
11405 GemmMicrokernelTester()
11406 .mr(1)
11407 .nr(8)
11408 .kr(1)
11409 .sr(4)
11410 .m(1)
11411 .n(8)
11412 .k(4)
11413 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011414 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011415 }
11416
Marat Dukhande06f492020-04-09 00:19:31 -070011417 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011418 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011419 for (uint32_t n = 1; n <= 8; n++) {
11420 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011421 GemmMicrokernelTester()
11422 .mr(1)
11423 .nr(8)
11424 .kr(1)
11425 .sr(4)
11426 .m(m)
11427 .n(n)
11428 .k(4)
11429 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011430 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011431 }
11432 }
11433 }
11434
Marat Dukhande06f492020-04-09 00:19:31 -070011435 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011436 TEST_REQUIRES_ARM_NEON_FMA;
11437 for (uint32_t m = 1; m <= 1; m++) {
11438 GemmMicrokernelTester()
11439 .mr(1)
11440 .nr(8)
11441 .kr(1)
11442 .sr(4)
11443 .m(m)
11444 .n(8)
11445 .k(4)
11446 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011447 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011448 }
11449 }
11450
Marat Dukhande06f492020-04-09 00:19:31 -070011451 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011452 TEST_REQUIRES_ARM_NEON_FMA;
11453 for (uint32_t n = 1; n <= 8; n++) {
11454 GemmMicrokernelTester()
11455 .mr(1)
11456 .nr(8)
11457 .kr(1)
11458 .sr(4)
11459 .m(1)
11460 .n(n)
11461 .k(4)
11462 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011463 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011464 }
11465 }
11466
Marat Dukhande06f492020-04-09 00:19:31 -070011467 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011468 TEST_REQUIRES_ARM_NEON_FMA;
11469 for (size_t k = 1; k < 4; k++) {
11470 GemmMicrokernelTester()
11471 .mr(1)
11472 .nr(8)
11473 .kr(1)
11474 .sr(4)
11475 .m(1)
11476 .n(8)
11477 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011478 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011479 }
11480 }
11481
Marat Dukhande06f492020-04-09 00:19:31 -070011482 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011483 TEST_REQUIRES_ARM_NEON_FMA;
11484 for (size_t k = 1; k < 4; k++) {
11485 GemmMicrokernelTester()
11486 .mr(1)
11487 .nr(8)
11488 .kr(1)
11489 .sr(4)
11490 .m(1)
11491 .n(8)
11492 .k(k)
11493 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011494 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011495 }
11496 }
11497
Marat Dukhande06f492020-04-09 00:19:31 -070011498 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011499 TEST_REQUIRES_ARM_NEON_FMA;
11500 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011501 for (uint32_t n = 1; n <= 8; n++) {
11502 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011503 GemmMicrokernelTester()
11504 .mr(1)
11505 .nr(8)
11506 .kr(1)
11507 .sr(4)
11508 .m(m)
11509 .n(n)
11510 .k(k)
11511 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011512 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011513 }
11514 }
11515 }
11516 }
11517
Marat Dukhande06f492020-04-09 00:19:31 -070011518 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011519 TEST_REQUIRES_ARM_NEON_FMA;
11520 for (size_t k = 5; k < 8; k++) {
11521 GemmMicrokernelTester()
11522 .mr(1)
11523 .nr(8)
11524 .kr(1)
11525 .sr(4)
11526 .m(1)
11527 .n(8)
11528 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011529 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011530 }
11531 }
11532
Marat Dukhande06f492020-04-09 00:19:31 -070011533 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011534 TEST_REQUIRES_ARM_NEON_FMA;
11535 for (size_t k = 5; k < 8; k++) {
11536 GemmMicrokernelTester()
11537 .mr(1)
11538 .nr(8)
11539 .kr(1)
11540 .sr(4)
11541 .m(1)
11542 .n(8)
11543 .k(k)
11544 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011545 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011546 }
11547 }
11548
Marat Dukhande06f492020-04-09 00:19:31 -070011549 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011550 TEST_REQUIRES_ARM_NEON_FMA;
11551 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011552 for (uint32_t n = 1; n <= 8; n++) {
11553 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011554 GemmMicrokernelTester()
11555 .mr(1)
11556 .nr(8)
11557 .kr(1)
11558 .sr(4)
11559 .m(m)
11560 .n(n)
11561 .k(k)
11562 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011563 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011564 }
11565 }
11566 }
11567 }
11568
Marat Dukhande06f492020-04-09 00:19:31 -070011569 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011570 TEST_REQUIRES_ARM_NEON_FMA;
11571 for (size_t k = 8; k <= 40; k += 4) {
11572 GemmMicrokernelTester()
11573 .mr(1)
11574 .nr(8)
11575 .kr(1)
11576 .sr(4)
11577 .m(1)
11578 .n(8)
11579 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011580 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011581 }
11582 }
11583
Marat Dukhande06f492020-04-09 00:19:31 -070011584 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011585 TEST_REQUIRES_ARM_NEON_FMA;
11586 for (size_t k = 8; k <= 40; k += 4) {
11587 GemmMicrokernelTester()
11588 .mr(1)
11589 .nr(8)
11590 .kr(1)
11591 .sr(4)
11592 .m(1)
11593 .n(8)
11594 .k(k)
11595 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011596 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011597 }
11598 }
11599
Marat Dukhande06f492020-04-09 00:19:31 -070011600 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011601 TEST_REQUIRES_ARM_NEON_FMA;
11602 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011603 for (uint32_t n = 1; n <= 8; n++) {
11604 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011605 GemmMicrokernelTester()
11606 .mr(1)
11607 .nr(8)
11608 .kr(1)
11609 .sr(4)
11610 .m(m)
11611 .n(n)
11612 .k(k)
11613 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011614 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011615 }
11616 }
11617 }
11618 }
11619
Marat Dukhande06f492020-04-09 00:19:31 -070011620 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011621 TEST_REQUIRES_ARM_NEON_FMA;
11622 for (uint32_t n = 9; n < 16; n++) {
11623 for (size_t k = 1; k <= 20; k += 5) {
11624 GemmMicrokernelTester()
11625 .mr(1)
11626 .nr(8)
11627 .kr(1)
11628 .sr(4)
11629 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011630 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011631 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011632 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011633 }
11634 }
11635 }
11636
Marat Dukhande06f492020-04-09 00:19:31 -070011637 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011638 TEST_REQUIRES_ARM_NEON_FMA;
11639 for (uint32_t n = 9; n < 16; n++) {
11640 for (size_t k = 1; k <= 20; k += 5) {
11641 GemmMicrokernelTester()
11642 .mr(1)
11643 .nr(8)
11644 .kr(1)
11645 .sr(4)
11646 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011647 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011648 .k(k)
11649 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011650 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011651 }
11652 }
11653 }
11654
Marat Dukhande06f492020-04-09 00:19:31 -070011655 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011656 TEST_REQUIRES_ARM_NEON_FMA;
11657 for (uint32_t n = 9; n < 16; n++) {
11658 for (size_t k = 1; k <= 20; k += 5) {
11659 GemmMicrokernelTester()
11660 .mr(1)
11661 .nr(8)
11662 .kr(1)
11663 .sr(4)
11664 .m(1)
11665 .n(n)
11666 .k(k)
11667 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011668 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011669 }
11670 }
11671 }
11672
Marat Dukhande06f492020-04-09 00:19:31 -070011673 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011674 TEST_REQUIRES_ARM_NEON_FMA;
11675 for (uint32_t n = 9; n < 16; n++) {
11676 for (size_t k = 1; k <= 20; k += 5) {
11677 for (uint32_t m = 1; m <= 1; m++) {
11678 GemmMicrokernelTester()
11679 .mr(1)
11680 .nr(8)
11681 .kr(1)
11682 .sr(4)
11683 .m(m)
11684 .n(n)
11685 .k(k)
11686 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011687 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011688 }
11689 }
11690 }
11691 }
11692
Marat Dukhande06f492020-04-09 00:19:31 -070011693 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011694 TEST_REQUIRES_ARM_NEON_FMA;
11695 for (uint32_t n = 16; n <= 24; n += 8) {
11696 for (size_t k = 1; k <= 20; k += 5) {
11697 GemmMicrokernelTester()
11698 .mr(1)
11699 .nr(8)
11700 .kr(1)
11701 .sr(4)
11702 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011703 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011704 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011705 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011706 }
11707 }
11708 }
11709
Marat Dukhande06f492020-04-09 00:19:31 -070011710 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011711 TEST_REQUIRES_ARM_NEON_FMA;
11712 for (uint32_t n = 16; n <= 24; n += 8) {
11713 for (size_t k = 1; k <= 20; k += 5) {
11714 GemmMicrokernelTester()
11715 .mr(1)
11716 .nr(8)
11717 .kr(1)
11718 .sr(4)
11719 .m(1)
11720 .n(n)
11721 .k(k)
11722 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011723 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011724 }
11725 }
11726 }
11727
Marat Dukhande06f492020-04-09 00:19:31 -070011728 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011729 TEST_REQUIRES_ARM_NEON_FMA;
11730 for (uint32_t n = 16; n <= 24; n += 8) {
11731 for (size_t k = 1; k <= 20; k += 5) {
11732 GemmMicrokernelTester()
11733 .mr(1)
11734 .nr(8)
11735 .kr(1)
11736 .sr(4)
11737 .m(1)
11738 .n(n)
11739 .k(k)
11740 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011741 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011742 }
11743 }
11744 }
11745
Marat Dukhande06f492020-04-09 00:19:31 -070011746 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011747 TEST_REQUIRES_ARM_NEON_FMA;
11748 for (uint32_t n = 16; n <= 24; n += 8) {
11749 for (size_t k = 1; k <= 20; k += 5) {
11750 for (uint32_t m = 1; m <= 1; m++) {
11751 GemmMicrokernelTester()
11752 .mr(1)
11753 .nr(8)
11754 .kr(1)
11755 .sr(4)
11756 .m(m)
11757 .n(n)
11758 .k(k)
11759 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011760 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011761 }
11762 }
11763 }
11764 }
11765
Marat Dukhande06f492020-04-09 00:19:31 -070011766 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011767 TEST_REQUIRES_ARM_NEON_FMA;
11768 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011769 for (uint32_t n = 1; n <= 8; n++) {
11770 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011771 GemmMicrokernelTester()
11772 .mr(1)
11773 .nr(8)
11774 .kr(1)
11775 .sr(4)
11776 .m(m)
11777 .n(n)
11778 .k(k)
11779 .cm_stride(11)
11780 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011781 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011782 }
11783 }
11784 }
11785 }
11786
Marat Dukhande06f492020-04-09 00:19:31 -070011787 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011788 TEST_REQUIRES_ARM_NEON_FMA;
11789 GemmMicrokernelTester()
11790 .mr(1)
11791 .nr(8)
11792 .kr(1)
11793 .sr(4)
11794 .m(1)
11795 .n(8)
11796 .k(4)
11797 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011798 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011799 }
11800
Marat Dukhande06f492020-04-09 00:19:31 -070011801 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011802 TEST_REQUIRES_ARM_NEON_FMA;
11803 GemmMicrokernelTester()
11804 .mr(1)
11805 .nr(8)
11806 .kr(1)
11807 .sr(4)
11808 .m(1)
11809 .n(8)
11810 .k(4)
11811 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011812 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011813 }
11814
Marat Dukhande06f492020-04-09 00:19:31 -070011815 TEST(F32_GEMM_MINMAX_1X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011816 TEST_REQUIRES_ARM_NEON_FMA;
11817 GemmMicrokernelTester()
11818 .mr(1)
11819 .nr(8)
11820 .kr(1)
11821 .sr(4)
11822 .m(1)
11823 .n(8)
11824 .k(4)
11825 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011826 .Test(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011827 }
11828#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11829
11830
11831#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070011832 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011833 TEST_REQUIRES_ARM_NEON_FMA;
11834 GemmMicrokernelTester()
11835 .mr(6)
11836 .nr(8)
11837 .kr(1)
11838 .sr(4)
11839 .m(6)
11840 .n(8)
11841 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011842 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011843 }
11844
Marat Dukhande06f492020-04-09 00:19:31 -070011845 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011846 TEST_REQUIRES_ARM_NEON_FMA;
11847 GemmMicrokernelTester()
11848 .mr(6)
11849 .nr(8)
11850 .kr(1)
11851 .sr(4)
11852 .m(6)
11853 .n(8)
11854 .k(4)
11855 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011856 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011857 }
11858
Marat Dukhande06f492020-04-09 00:19:31 -070011859 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011860 TEST_REQUIRES_ARM_NEON_FMA;
11861 GemmMicrokernelTester()
11862 .mr(6)
11863 .nr(8)
11864 .kr(1)
11865 .sr(4)
11866 .m(6)
11867 .n(8)
11868 .k(4)
11869 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011870 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011871 }
11872
Marat Dukhande06f492020-04-09 00:19:31 -070011873 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011874 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011875 for (uint32_t n = 1; n <= 8; n++) {
11876 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011877 GemmMicrokernelTester()
11878 .mr(6)
11879 .nr(8)
11880 .kr(1)
11881 .sr(4)
11882 .m(m)
11883 .n(n)
11884 .k(4)
11885 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011886 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011887 }
11888 }
11889 }
11890
Marat Dukhande06f492020-04-09 00:19:31 -070011891 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011892 TEST_REQUIRES_ARM_NEON_FMA;
11893 for (uint32_t m = 1; m <= 6; m++) {
11894 GemmMicrokernelTester()
11895 .mr(6)
11896 .nr(8)
11897 .kr(1)
11898 .sr(4)
11899 .m(m)
11900 .n(8)
11901 .k(4)
11902 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011903 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011904 }
11905 }
11906
Marat Dukhande06f492020-04-09 00:19:31 -070011907 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011908 TEST_REQUIRES_ARM_NEON_FMA;
11909 for (uint32_t n = 1; n <= 8; n++) {
11910 GemmMicrokernelTester()
11911 .mr(6)
11912 .nr(8)
11913 .kr(1)
11914 .sr(4)
11915 .m(6)
11916 .n(n)
11917 .k(4)
11918 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011919 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011920 }
11921 }
11922
Marat Dukhande06f492020-04-09 00:19:31 -070011923 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011924 TEST_REQUIRES_ARM_NEON_FMA;
11925 for (size_t k = 1; k < 4; k++) {
11926 GemmMicrokernelTester()
11927 .mr(6)
11928 .nr(8)
11929 .kr(1)
11930 .sr(4)
11931 .m(6)
11932 .n(8)
11933 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011934 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011935 }
11936 }
11937
Marat Dukhande06f492020-04-09 00:19:31 -070011938 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011939 TEST_REQUIRES_ARM_NEON_FMA;
11940 for (size_t k = 1; k < 4; k++) {
11941 GemmMicrokernelTester()
11942 .mr(6)
11943 .nr(8)
11944 .kr(1)
11945 .sr(4)
11946 .m(6)
11947 .n(8)
11948 .k(k)
11949 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011950 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011951 }
11952 }
11953
Marat Dukhande06f492020-04-09 00:19:31 -070011954 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011955 TEST_REQUIRES_ARM_NEON_FMA;
11956 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011957 for (uint32_t n = 1; n <= 8; n++) {
11958 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011959 GemmMicrokernelTester()
11960 .mr(6)
11961 .nr(8)
11962 .kr(1)
11963 .sr(4)
11964 .m(m)
11965 .n(n)
11966 .k(k)
11967 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011968 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011969 }
11970 }
11971 }
11972 }
11973
Marat Dukhande06f492020-04-09 00:19:31 -070011974 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011975 TEST_REQUIRES_ARM_NEON_FMA;
11976 for (size_t k = 5; k < 8; k++) {
11977 GemmMicrokernelTester()
11978 .mr(6)
11979 .nr(8)
11980 .kr(1)
11981 .sr(4)
11982 .m(6)
11983 .n(8)
11984 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011985 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011986 }
11987 }
11988
Marat Dukhande06f492020-04-09 00:19:31 -070011989 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011990 TEST_REQUIRES_ARM_NEON_FMA;
11991 for (size_t k = 5; k < 8; k++) {
11992 GemmMicrokernelTester()
11993 .mr(6)
11994 .nr(8)
11995 .kr(1)
11996 .sr(4)
11997 .m(6)
11998 .n(8)
11999 .k(k)
12000 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012001 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012002 }
12003 }
12004
Marat Dukhande06f492020-04-09 00:19:31 -070012005 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012006 TEST_REQUIRES_ARM_NEON_FMA;
12007 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012008 for (uint32_t n = 1; n <= 8; n++) {
12009 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012010 GemmMicrokernelTester()
12011 .mr(6)
12012 .nr(8)
12013 .kr(1)
12014 .sr(4)
12015 .m(m)
12016 .n(n)
12017 .k(k)
12018 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012019 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012020 }
12021 }
12022 }
12023 }
12024
Marat Dukhande06f492020-04-09 00:19:31 -070012025 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012026 TEST_REQUIRES_ARM_NEON_FMA;
12027 for (size_t k = 8; k <= 40; k += 4) {
12028 GemmMicrokernelTester()
12029 .mr(6)
12030 .nr(8)
12031 .kr(1)
12032 .sr(4)
12033 .m(6)
12034 .n(8)
12035 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012036 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012037 }
12038 }
12039
Marat Dukhande06f492020-04-09 00:19:31 -070012040 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012041 TEST_REQUIRES_ARM_NEON_FMA;
12042 for (size_t k = 8; k <= 40; k += 4) {
12043 GemmMicrokernelTester()
12044 .mr(6)
12045 .nr(8)
12046 .kr(1)
12047 .sr(4)
12048 .m(6)
12049 .n(8)
12050 .k(k)
12051 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012052 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012053 }
12054 }
12055
Marat Dukhande06f492020-04-09 00:19:31 -070012056 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012057 TEST_REQUIRES_ARM_NEON_FMA;
12058 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012059 for (uint32_t n = 1; n <= 8; n++) {
12060 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012061 GemmMicrokernelTester()
12062 .mr(6)
12063 .nr(8)
12064 .kr(1)
12065 .sr(4)
12066 .m(m)
12067 .n(n)
12068 .k(k)
12069 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012070 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012071 }
12072 }
12073 }
12074 }
12075
Marat Dukhande06f492020-04-09 00:19:31 -070012076 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012077 TEST_REQUIRES_ARM_NEON_FMA;
12078 for (uint32_t n = 9; n < 16; n++) {
12079 for (size_t k = 1; k <= 20; k += 5) {
12080 GemmMicrokernelTester()
12081 .mr(6)
12082 .nr(8)
12083 .kr(1)
12084 .sr(4)
12085 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012086 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012087 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012088 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012089 }
12090 }
12091 }
12092
Marat Dukhande06f492020-04-09 00:19:31 -070012093 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012094 TEST_REQUIRES_ARM_NEON_FMA;
12095 for (uint32_t n = 9; n < 16; n++) {
12096 for (size_t k = 1; k <= 20; k += 5) {
12097 GemmMicrokernelTester()
12098 .mr(6)
12099 .nr(8)
12100 .kr(1)
12101 .sr(4)
12102 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012103 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012104 .k(k)
12105 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012106 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012107 }
12108 }
12109 }
12110
Marat Dukhande06f492020-04-09 00:19:31 -070012111 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012112 TEST_REQUIRES_ARM_NEON_FMA;
12113 for (uint32_t n = 9; n < 16; n++) {
12114 for (size_t k = 1; k <= 20; k += 5) {
12115 GemmMicrokernelTester()
12116 .mr(6)
12117 .nr(8)
12118 .kr(1)
12119 .sr(4)
12120 .m(6)
12121 .n(n)
12122 .k(k)
12123 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012124 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012125 }
12126 }
12127 }
12128
Marat Dukhande06f492020-04-09 00:19:31 -070012129 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012130 TEST_REQUIRES_ARM_NEON_FMA;
12131 for (uint32_t n = 9; n < 16; n++) {
12132 for (size_t k = 1; k <= 20; k += 5) {
12133 for (uint32_t m = 1; m <= 6; m++) {
12134 GemmMicrokernelTester()
12135 .mr(6)
12136 .nr(8)
12137 .kr(1)
12138 .sr(4)
12139 .m(m)
12140 .n(n)
12141 .k(k)
12142 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012143 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012144 }
12145 }
12146 }
12147 }
12148
Marat Dukhande06f492020-04-09 00:19:31 -070012149 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012150 TEST_REQUIRES_ARM_NEON_FMA;
12151 for (uint32_t n = 16; n <= 24; n += 8) {
12152 for (size_t k = 1; k <= 20; k += 5) {
12153 GemmMicrokernelTester()
12154 .mr(6)
12155 .nr(8)
12156 .kr(1)
12157 .sr(4)
12158 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012159 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012160 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012161 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012162 }
12163 }
12164 }
12165
Marat Dukhande06f492020-04-09 00:19:31 -070012166 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012167 TEST_REQUIRES_ARM_NEON_FMA;
12168 for (uint32_t n = 16; n <= 24; n += 8) {
12169 for (size_t k = 1; k <= 20; k += 5) {
12170 GemmMicrokernelTester()
12171 .mr(6)
12172 .nr(8)
12173 .kr(1)
12174 .sr(4)
12175 .m(6)
12176 .n(n)
12177 .k(k)
12178 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012179 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012180 }
12181 }
12182 }
12183
Marat Dukhande06f492020-04-09 00:19:31 -070012184 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012185 TEST_REQUIRES_ARM_NEON_FMA;
12186 for (uint32_t n = 16; n <= 24; n += 8) {
12187 for (size_t k = 1; k <= 20; k += 5) {
12188 GemmMicrokernelTester()
12189 .mr(6)
12190 .nr(8)
12191 .kr(1)
12192 .sr(4)
12193 .m(6)
12194 .n(n)
12195 .k(k)
12196 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012197 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012198 }
12199 }
12200 }
12201
Marat Dukhande06f492020-04-09 00:19:31 -070012202 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012203 TEST_REQUIRES_ARM_NEON_FMA;
12204 for (uint32_t n = 16; n <= 24; n += 8) {
12205 for (size_t k = 1; k <= 20; k += 5) {
12206 for (uint32_t m = 1; m <= 6; m++) {
12207 GemmMicrokernelTester()
12208 .mr(6)
12209 .nr(8)
12210 .kr(1)
12211 .sr(4)
12212 .m(m)
12213 .n(n)
12214 .k(k)
12215 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012216 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012217 }
12218 }
12219 }
12220 }
12221
Marat Dukhande06f492020-04-09 00:19:31 -070012222 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012223 TEST_REQUIRES_ARM_NEON_FMA;
12224 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012225 for (uint32_t n = 1; n <= 8; n++) {
12226 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012227 GemmMicrokernelTester()
12228 .mr(6)
12229 .nr(8)
12230 .kr(1)
12231 .sr(4)
12232 .m(m)
12233 .n(n)
12234 .k(k)
12235 .cm_stride(11)
12236 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012237 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012238 }
12239 }
12240 }
12241 }
12242
Marat Dukhande06f492020-04-09 00:19:31 -070012243 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012244 TEST_REQUIRES_ARM_NEON_FMA;
12245 GemmMicrokernelTester()
12246 .mr(6)
12247 .nr(8)
12248 .kr(1)
12249 .sr(4)
12250 .m(6)
12251 .n(8)
12252 .k(4)
12253 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012254 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012255 }
12256
Marat Dukhande06f492020-04-09 00:19:31 -070012257 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012258 TEST_REQUIRES_ARM_NEON_FMA;
12259 GemmMicrokernelTester()
12260 .mr(6)
12261 .nr(8)
12262 .kr(1)
12263 .sr(4)
12264 .m(6)
12265 .n(8)
12266 .k(4)
12267 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012268 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012269 }
12270
Marat Dukhande06f492020-04-09 00:19:31 -070012271 TEST(F32_GEMM_MINMAX_6X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012272 TEST_REQUIRES_ARM_NEON_FMA;
12273 GemmMicrokernelTester()
12274 .mr(6)
12275 .nr(8)
12276 .kr(1)
12277 .sr(4)
12278 .m(6)
12279 .n(8)
12280 .k(4)
12281 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012282 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012283 }
12284#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12285
12286
12287#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070012288 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012289 TEST_REQUIRES_ARM_NEON_FMA;
12290 GemmMicrokernelTester()
12291 .mr(8)
12292 .nr(8)
12293 .kr(1)
12294 .sr(4)
12295 .m(8)
12296 .n(8)
12297 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012298 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012299 }
12300
Marat Dukhande06f492020-04-09 00:19:31 -070012301 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012302 TEST_REQUIRES_ARM_NEON_FMA;
12303 GemmMicrokernelTester()
12304 .mr(8)
12305 .nr(8)
12306 .kr(1)
12307 .sr(4)
12308 .m(8)
12309 .n(8)
12310 .k(4)
12311 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012312 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012313 }
12314
Marat Dukhande06f492020-04-09 00:19:31 -070012315 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012316 TEST_REQUIRES_ARM_NEON_FMA;
12317 GemmMicrokernelTester()
12318 .mr(8)
12319 .nr(8)
12320 .kr(1)
12321 .sr(4)
12322 .m(8)
12323 .n(8)
12324 .k(4)
12325 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012326 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012327 }
12328
Marat Dukhande06f492020-04-09 00:19:31 -070012329 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012330 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012331 for (uint32_t n = 1; n <= 8; n++) {
12332 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012333 GemmMicrokernelTester()
12334 .mr(8)
12335 .nr(8)
12336 .kr(1)
12337 .sr(4)
12338 .m(m)
12339 .n(n)
12340 .k(4)
12341 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012342 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012343 }
12344 }
12345 }
12346
Marat Dukhande06f492020-04-09 00:19:31 -070012347 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012348 TEST_REQUIRES_ARM_NEON_FMA;
12349 for (uint32_t m = 1; m <= 8; m++) {
12350 GemmMicrokernelTester()
12351 .mr(8)
12352 .nr(8)
12353 .kr(1)
12354 .sr(4)
12355 .m(m)
12356 .n(8)
12357 .k(4)
12358 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012359 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012360 }
12361 }
12362
Marat Dukhande06f492020-04-09 00:19:31 -070012363 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012364 TEST_REQUIRES_ARM_NEON_FMA;
12365 for (uint32_t n = 1; n <= 8; n++) {
12366 GemmMicrokernelTester()
12367 .mr(8)
12368 .nr(8)
12369 .kr(1)
12370 .sr(4)
12371 .m(8)
12372 .n(n)
12373 .k(4)
12374 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012375 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012376 }
12377 }
12378
Marat Dukhande06f492020-04-09 00:19:31 -070012379 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012380 TEST_REQUIRES_ARM_NEON_FMA;
12381 for (size_t k = 1; k < 4; k++) {
12382 GemmMicrokernelTester()
12383 .mr(8)
12384 .nr(8)
12385 .kr(1)
12386 .sr(4)
12387 .m(8)
12388 .n(8)
12389 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012390 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012391 }
12392 }
12393
Marat Dukhande06f492020-04-09 00:19:31 -070012394 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012395 TEST_REQUIRES_ARM_NEON_FMA;
12396 for (size_t k = 1; k < 4; k++) {
12397 GemmMicrokernelTester()
12398 .mr(8)
12399 .nr(8)
12400 .kr(1)
12401 .sr(4)
12402 .m(8)
12403 .n(8)
12404 .k(k)
12405 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012406 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012407 }
12408 }
12409
Marat Dukhande06f492020-04-09 00:19:31 -070012410 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012411 TEST_REQUIRES_ARM_NEON_FMA;
12412 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012413 for (uint32_t n = 1; n <= 8; n++) {
12414 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012415 GemmMicrokernelTester()
12416 .mr(8)
12417 .nr(8)
12418 .kr(1)
12419 .sr(4)
12420 .m(m)
12421 .n(n)
12422 .k(k)
12423 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012424 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012425 }
12426 }
12427 }
12428 }
12429
Marat Dukhande06f492020-04-09 00:19:31 -070012430 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012431 TEST_REQUIRES_ARM_NEON_FMA;
12432 for (size_t k = 5; k < 8; k++) {
12433 GemmMicrokernelTester()
12434 .mr(8)
12435 .nr(8)
12436 .kr(1)
12437 .sr(4)
12438 .m(8)
12439 .n(8)
12440 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012441 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012442 }
12443 }
12444
Marat Dukhande06f492020-04-09 00:19:31 -070012445 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012446 TEST_REQUIRES_ARM_NEON_FMA;
12447 for (size_t k = 5; k < 8; k++) {
12448 GemmMicrokernelTester()
12449 .mr(8)
12450 .nr(8)
12451 .kr(1)
12452 .sr(4)
12453 .m(8)
12454 .n(8)
12455 .k(k)
12456 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012457 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012458 }
12459 }
12460
Marat Dukhande06f492020-04-09 00:19:31 -070012461 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012462 TEST_REQUIRES_ARM_NEON_FMA;
12463 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012464 for (uint32_t n = 1; n <= 8; n++) {
12465 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012466 GemmMicrokernelTester()
12467 .mr(8)
12468 .nr(8)
12469 .kr(1)
12470 .sr(4)
12471 .m(m)
12472 .n(n)
12473 .k(k)
12474 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012475 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012476 }
12477 }
12478 }
12479 }
12480
Marat Dukhande06f492020-04-09 00:19:31 -070012481 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012482 TEST_REQUIRES_ARM_NEON_FMA;
12483 for (size_t k = 8; k <= 40; k += 4) {
12484 GemmMicrokernelTester()
12485 .mr(8)
12486 .nr(8)
12487 .kr(1)
12488 .sr(4)
12489 .m(8)
12490 .n(8)
12491 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012492 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012493 }
12494 }
12495
Marat Dukhande06f492020-04-09 00:19:31 -070012496 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012497 TEST_REQUIRES_ARM_NEON_FMA;
12498 for (size_t k = 8; k <= 40; k += 4) {
12499 GemmMicrokernelTester()
12500 .mr(8)
12501 .nr(8)
12502 .kr(1)
12503 .sr(4)
12504 .m(8)
12505 .n(8)
12506 .k(k)
12507 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012508 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012509 }
12510 }
12511
Marat Dukhande06f492020-04-09 00:19:31 -070012512 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012513 TEST_REQUIRES_ARM_NEON_FMA;
12514 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012515 for (uint32_t n = 1; n <= 8; n++) {
12516 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012517 GemmMicrokernelTester()
12518 .mr(8)
12519 .nr(8)
12520 .kr(1)
12521 .sr(4)
12522 .m(m)
12523 .n(n)
12524 .k(k)
12525 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012526 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012527 }
12528 }
12529 }
12530 }
12531
Marat Dukhande06f492020-04-09 00:19:31 -070012532 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012533 TEST_REQUIRES_ARM_NEON_FMA;
12534 for (uint32_t n = 9; n < 16; n++) {
12535 for (size_t k = 1; k <= 20; k += 5) {
12536 GemmMicrokernelTester()
12537 .mr(8)
12538 .nr(8)
12539 .kr(1)
12540 .sr(4)
12541 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012542 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012543 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012544 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012545 }
12546 }
12547 }
12548
Marat Dukhande06f492020-04-09 00:19:31 -070012549 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012550 TEST_REQUIRES_ARM_NEON_FMA;
12551 for (uint32_t n = 9; n < 16; n++) {
12552 for (size_t k = 1; k <= 20; k += 5) {
12553 GemmMicrokernelTester()
12554 .mr(8)
12555 .nr(8)
12556 .kr(1)
12557 .sr(4)
12558 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012559 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012560 .k(k)
12561 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012562 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012563 }
12564 }
12565 }
12566
Marat Dukhande06f492020-04-09 00:19:31 -070012567 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012568 TEST_REQUIRES_ARM_NEON_FMA;
12569 for (uint32_t n = 9; n < 16; n++) {
12570 for (size_t k = 1; k <= 20; k += 5) {
12571 GemmMicrokernelTester()
12572 .mr(8)
12573 .nr(8)
12574 .kr(1)
12575 .sr(4)
12576 .m(8)
12577 .n(n)
12578 .k(k)
12579 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012580 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012581 }
12582 }
12583 }
12584
Marat Dukhande06f492020-04-09 00:19:31 -070012585 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012586 TEST_REQUIRES_ARM_NEON_FMA;
12587 for (uint32_t n = 9; n < 16; n++) {
12588 for (size_t k = 1; k <= 20; k += 5) {
12589 for (uint32_t m = 1; m <= 8; m++) {
12590 GemmMicrokernelTester()
12591 .mr(8)
12592 .nr(8)
12593 .kr(1)
12594 .sr(4)
12595 .m(m)
12596 .n(n)
12597 .k(k)
12598 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012599 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012600 }
12601 }
12602 }
12603 }
12604
Marat Dukhande06f492020-04-09 00:19:31 -070012605 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012606 TEST_REQUIRES_ARM_NEON_FMA;
12607 for (uint32_t n = 16; n <= 24; n += 8) {
12608 for (size_t k = 1; k <= 20; k += 5) {
12609 GemmMicrokernelTester()
12610 .mr(8)
12611 .nr(8)
12612 .kr(1)
12613 .sr(4)
12614 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012615 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012616 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012617 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012618 }
12619 }
12620 }
12621
Marat Dukhande06f492020-04-09 00:19:31 -070012622 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012623 TEST_REQUIRES_ARM_NEON_FMA;
12624 for (uint32_t n = 16; n <= 24; n += 8) {
12625 for (size_t k = 1; k <= 20; k += 5) {
12626 GemmMicrokernelTester()
12627 .mr(8)
12628 .nr(8)
12629 .kr(1)
12630 .sr(4)
12631 .m(8)
12632 .n(n)
12633 .k(k)
12634 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012635 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012636 }
12637 }
12638 }
12639
Marat Dukhande06f492020-04-09 00:19:31 -070012640 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012641 TEST_REQUIRES_ARM_NEON_FMA;
12642 for (uint32_t n = 16; n <= 24; n += 8) {
12643 for (size_t k = 1; k <= 20; k += 5) {
12644 GemmMicrokernelTester()
12645 .mr(8)
12646 .nr(8)
12647 .kr(1)
12648 .sr(4)
12649 .m(8)
12650 .n(n)
12651 .k(k)
12652 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012653 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012654 }
12655 }
12656 }
12657
Marat Dukhande06f492020-04-09 00:19:31 -070012658 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012659 TEST_REQUIRES_ARM_NEON_FMA;
12660 for (uint32_t n = 16; n <= 24; n += 8) {
12661 for (size_t k = 1; k <= 20; k += 5) {
12662 for (uint32_t m = 1; m <= 8; m++) {
12663 GemmMicrokernelTester()
12664 .mr(8)
12665 .nr(8)
12666 .kr(1)
12667 .sr(4)
12668 .m(m)
12669 .n(n)
12670 .k(k)
12671 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012672 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012673 }
12674 }
12675 }
12676 }
12677
Marat Dukhande06f492020-04-09 00:19:31 -070012678 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012679 TEST_REQUIRES_ARM_NEON_FMA;
12680 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012681 for (uint32_t n = 1; n <= 8; n++) {
12682 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012683 GemmMicrokernelTester()
12684 .mr(8)
12685 .nr(8)
12686 .kr(1)
12687 .sr(4)
12688 .m(m)
12689 .n(n)
12690 .k(k)
12691 .cm_stride(11)
12692 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012693 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012694 }
12695 }
12696 }
12697 }
12698
Marat Dukhande06f492020-04-09 00:19:31 -070012699 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012700 TEST_REQUIRES_ARM_NEON_FMA;
12701 GemmMicrokernelTester()
12702 .mr(8)
12703 .nr(8)
12704 .kr(1)
12705 .sr(4)
12706 .m(8)
12707 .n(8)
12708 .k(4)
12709 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012710 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012711 }
12712
Marat Dukhande06f492020-04-09 00:19:31 -070012713 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012714 TEST_REQUIRES_ARM_NEON_FMA;
12715 GemmMicrokernelTester()
12716 .mr(8)
12717 .nr(8)
12718 .kr(1)
12719 .sr(4)
12720 .m(8)
12721 .n(8)
12722 .k(4)
12723 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012724 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012725 }
12726
Marat Dukhande06f492020-04-09 00:19:31 -070012727 TEST(F32_GEMM_MINMAX_8X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012728 TEST_REQUIRES_ARM_NEON_FMA;
12729 GemmMicrokernelTester()
12730 .mr(8)
12731 .nr(8)
12732 .kr(1)
12733 .sr(4)
12734 .m(8)
12735 .n(8)
12736 .k(4)
12737 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012738 .Test(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012739 }
12740#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12741
12742
12743#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080012744 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1) {
12745 TEST_REQUIRES_X86_SSE;
12746 GemmMicrokernelTester()
12747 .mr(3)
12748 .nr(8)
12749 .kr(1)
12750 .sr(1)
12751 .m(3)
12752 .n(8)
12753 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012754 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012755 }
12756
12757 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, strided_cn) {
12758 TEST_REQUIRES_X86_SSE;
12759 GemmMicrokernelTester()
12760 .mr(3)
12761 .nr(8)
12762 .kr(1)
12763 .sr(1)
12764 .m(3)
12765 .n(8)
12766 .k(1)
12767 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012768 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012769 }
12770
12771 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_strided_a) {
12772 TEST_REQUIRES_X86_SSE;
12773 GemmMicrokernelTester()
12774 .mr(3)
12775 .nr(8)
12776 .kr(1)
12777 .sr(1)
12778 .m(3)
12779 .n(8)
12780 .k(1)
12781 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012782 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012783 }
12784
12785 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile) {
12786 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012787 for (uint32_t n = 1; n <= 8; n++) {
12788 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080012789 GemmMicrokernelTester()
12790 .mr(3)
12791 .nr(8)
12792 .kr(1)
12793 .sr(1)
12794 .m(m)
12795 .n(n)
12796 .k(1)
12797 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012798 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012799 }
12800 }
12801 }
12802
12803 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_m) {
12804 TEST_REQUIRES_X86_SSE;
12805 for (uint32_t m = 1; m <= 3; m++) {
12806 GemmMicrokernelTester()
12807 .mr(3)
12808 .nr(8)
12809 .kr(1)
12810 .sr(1)
12811 .m(m)
12812 .n(8)
12813 .k(1)
12814 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012815 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012816 }
12817 }
12818
12819 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_n) {
12820 TEST_REQUIRES_X86_SSE;
12821 for (uint32_t n = 1; n <= 8; n++) {
12822 GemmMicrokernelTester()
12823 .mr(3)
12824 .nr(8)
12825 .kr(1)
12826 .sr(1)
12827 .m(3)
12828 .n(n)
12829 .k(1)
12830 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012831 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012832 }
12833 }
12834
12835 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1) {
12836 TEST_REQUIRES_X86_SSE;
12837 for (size_t k = 2; k < 10; k++) {
12838 GemmMicrokernelTester()
12839 .mr(3)
12840 .nr(8)
12841 .kr(1)
12842 .sr(1)
12843 .m(3)
12844 .n(8)
12845 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012846 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012847 }
12848 }
12849
12850 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1_strided_a) {
12851 TEST_REQUIRES_X86_SSE;
12852 for (size_t k = 2; k < 10; k++) {
12853 GemmMicrokernelTester()
12854 .mr(3)
12855 .nr(8)
12856 .kr(1)
12857 .sr(1)
12858 .m(3)
12859 .n(8)
12860 .k(k)
12861 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012862 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012863 }
12864 }
12865
12866 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1_subtile) {
12867 TEST_REQUIRES_X86_SSE;
12868 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012869 for (uint32_t n = 1; n <= 8; n++) {
12870 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080012871 GemmMicrokernelTester()
12872 .mr(3)
12873 .nr(8)
12874 .kr(1)
12875 .sr(1)
12876 .m(m)
12877 .n(n)
12878 .k(k)
12879 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012880 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012881 }
12882 }
12883 }
12884 }
12885
12886 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8) {
12887 TEST_REQUIRES_X86_SSE;
12888 for (uint32_t n = 9; n < 16; n++) {
12889 for (size_t k = 1; k <= 5; k += 2) {
12890 GemmMicrokernelTester()
12891 .mr(3)
12892 .nr(8)
12893 .kr(1)
12894 .sr(1)
12895 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012896 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080012897 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012898 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012899 }
12900 }
12901 }
12902
12903 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_cn) {
12904 TEST_REQUIRES_X86_SSE;
12905 for (uint32_t n = 9; n < 16; n++) {
12906 for (size_t k = 1; k <= 5; k += 2) {
12907 GemmMicrokernelTester()
12908 .mr(3)
12909 .nr(8)
12910 .kr(1)
12911 .sr(1)
12912 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012913 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080012914 .k(k)
12915 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012916 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012917 }
12918 }
12919 }
12920
12921 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_a) {
12922 TEST_REQUIRES_X86_SSE;
12923 for (uint32_t n = 9; n < 16; n++) {
12924 for (size_t k = 1; k <= 5; k += 2) {
12925 GemmMicrokernelTester()
12926 .mr(3)
12927 .nr(8)
12928 .kr(1)
12929 .sr(1)
12930 .m(3)
12931 .n(n)
12932 .k(k)
12933 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012934 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012935 }
12936 }
12937 }
12938
12939 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_subtile) {
12940 TEST_REQUIRES_X86_SSE;
12941 for (uint32_t n = 9; n < 16; n++) {
12942 for (size_t k = 1; k <= 5; k += 2) {
12943 for (uint32_t m = 1; m <= 3; m++) {
12944 GemmMicrokernelTester()
12945 .mr(3)
12946 .nr(8)
12947 .kr(1)
12948 .sr(1)
12949 .m(m)
12950 .n(n)
12951 .k(k)
12952 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012953 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012954 }
12955 }
12956 }
12957 }
12958
12959 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_div_8) {
12960 TEST_REQUIRES_X86_SSE;
12961 for (uint32_t n = 16; n <= 24; n += 8) {
12962 for (size_t k = 1; k <= 5; k += 2) {
12963 GemmMicrokernelTester()
12964 .mr(3)
12965 .nr(8)
12966 .kr(1)
12967 .sr(1)
12968 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012969 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080012970 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012971 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012972 }
12973 }
12974 }
12975
12976 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_cn) {
12977 TEST_REQUIRES_X86_SSE;
12978 for (uint32_t n = 16; n <= 24; n += 8) {
12979 for (size_t k = 1; k <= 5; k += 2) {
12980 GemmMicrokernelTester()
12981 .mr(3)
12982 .nr(8)
12983 .kr(1)
12984 .sr(1)
12985 .m(3)
12986 .n(n)
12987 .k(k)
12988 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012989 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080012990 }
12991 }
12992 }
12993
12994 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_a) {
12995 TEST_REQUIRES_X86_SSE;
12996 for (uint32_t n = 16; n <= 24; n += 8) {
12997 for (size_t k = 1; k <= 5; k += 2) {
12998 GemmMicrokernelTester()
12999 .mr(3)
13000 .nr(8)
13001 .kr(1)
13002 .sr(1)
13003 .m(3)
13004 .n(n)
13005 .k(k)
13006 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013007 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013008 }
13009 }
13010 }
13011
13012 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_subtile) {
13013 TEST_REQUIRES_X86_SSE;
13014 for (uint32_t n = 16; n <= 24; n += 8) {
13015 for (size_t k = 1; k <= 5; k += 2) {
13016 for (uint32_t m = 1; m <= 3; m++) {
13017 GemmMicrokernelTester()
13018 .mr(3)
13019 .nr(8)
13020 .kr(1)
13021 .sr(1)
13022 .m(m)
13023 .n(n)
13024 .k(k)
13025 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013026 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013027 }
13028 }
13029 }
13030 }
13031
13032 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, strided_cm_subtile) {
13033 TEST_REQUIRES_X86_SSE;
13034 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013035 for (uint32_t n = 1; n <= 8; n++) {
13036 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013037 GemmMicrokernelTester()
13038 .mr(3)
13039 .nr(8)
13040 .kr(1)
13041 .sr(1)
13042 .m(m)
13043 .n(n)
13044 .k(k)
13045 .cm_stride(11)
13046 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013047 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013048 }
13049 }
13050 }
13051 }
13052
13053 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, qmin) {
13054 TEST_REQUIRES_X86_SSE;
13055 GemmMicrokernelTester()
13056 .mr(3)
13057 .nr(8)
13058 .kr(1)
13059 .sr(1)
13060 .m(3)
13061 .n(8)
13062 .k(1)
13063 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013064 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013065 }
13066
13067 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, qmax) {
13068 TEST_REQUIRES_X86_SSE;
13069 GemmMicrokernelTester()
13070 .mr(3)
13071 .nr(8)
13072 .kr(1)
13073 .sr(1)
13074 .m(3)
13075 .n(8)
13076 .k(1)
13077 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013078 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013079 }
13080
13081 TEST(F32_GEMM_MINMAX_3X8__SSE_LOAD1, strided_cm) {
13082 TEST_REQUIRES_X86_SSE;
13083 GemmMicrokernelTester()
13084 .mr(3)
13085 .nr(8)
13086 .kr(1)
13087 .sr(1)
13088 .m(3)
13089 .n(8)
13090 .k(1)
13091 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013092 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013093 }
13094#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13095
13096
13097#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070013098 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013099 TEST_REQUIRES_X86_SSE;
13100 GemmMicrokernelTester()
13101 .mr(4)
13102 .nr(8)
13103 .kr(1)
13104 .sr(1)
13105 .m(4)
13106 .n(8)
13107 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013108 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013109 }
13110
Marat Dukhande06f492020-04-09 00:19:31 -070013111 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013112 TEST_REQUIRES_X86_SSE;
13113 GemmMicrokernelTester()
13114 .mr(4)
13115 .nr(8)
13116 .kr(1)
13117 .sr(1)
13118 .m(4)
13119 .n(8)
13120 .k(1)
13121 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013122 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013123 }
13124
Marat Dukhande06f492020-04-09 00:19:31 -070013125 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013126 TEST_REQUIRES_X86_SSE;
13127 GemmMicrokernelTester()
13128 .mr(4)
13129 .nr(8)
13130 .kr(1)
13131 .sr(1)
13132 .m(4)
13133 .n(8)
13134 .k(1)
13135 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013136 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013137 }
13138
Marat Dukhande06f492020-04-09 00:19:31 -070013139 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013140 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013141 for (uint32_t n = 1; n <= 8; n++) {
13142 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013143 GemmMicrokernelTester()
13144 .mr(4)
13145 .nr(8)
13146 .kr(1)
13147 .sr(1)
13148 .m(m)
13149 .n(n)
13150 .k(1)
13151 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013152 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013153 }
13154 }
13155 }
13156
Marat Dukhande06f492020-04-09 00:19:31 -070013157 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013158 TEST_REQUIRES_X86_SSE;
13159 for (uint32_t m = 1; m <= 4; m++) {
13160 GemmMicrokernelTester()
13161 .mr(4)
13162 .nr(8)
13163 .kr(1)
13164 .sr(1)
13165 .m(m)
13166 .n(8)
13167 .k(1)
13168 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013169 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013170 }
13171 }
13172
Marat Dukhande06f492020-04-09 00:19:31 -070013173 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013174 TEST_REQUIRES_X86_SSE;
13175 for (uint32_t n = 1; n <= 8; n++) {
13176 GemmMicrokernelTester()
13177 .mr(4)
13178 .nr(8)
13179 .kr(1)
13180 .sr(1)
13181 .m(4)
13182 .n(n)
13183 .k(1)
13184 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013185 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013186 }
13187 }
13188
Marat Dukhande06f492020-04-09 00:19:31 -070013189 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013190 TEST_REQUIRES_X86_SSE;
13191 for (size_t k = 2; k < 10; k++) {
13192 GemmMicrokernelTester()
13193 .mr(4)
13194 .nr(8)
13195 .kr(1)
13196 .sr(1)
13197 .m(4)
13198 .n(8)
13199 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013200 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013201 }
13202 }
13203
Marat Dukhande06f492020-04-09 00:19:31 -070013204 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013205 TEST_REQUIRES_X86_SSE;
13206 for (size_t k = 2; k < 10; k++) {
13207 GemmMicrokernelTester()
13208 .mr(4)
13209 .nr(8)
13210 .kr(1)
13211 .sr(1)
13212 .m(4)
13213 .n(8)
13214 .k(k)
13215 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013216 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013217 }
13218 }
13219
Marat Dukhande06f492020-04-09 00:19:31 -070013220 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013221 TEST_REQUIRES_X86_SSE;
13222 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013223 for (uint32_t n = 1; n <= 8; n++) {
13224 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013225 GemmMicrokernelTester()
13226 .mr(4)
13227 .nr(8)
13228 .kr(1)
13229 .sr(1)
13230 .m(m)
13231 .n(n)
13232 .k(k)
13233 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013234 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013235 }
13236 }
13237 }
13238 }
13239
Marat Dukhande06f492020-04-09 00:19:31 -070013240 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013241 TEST_REQUIRES_X86_SSE;
13242 for (uint32_t n = 9; n < 16; n++) {
13243 for (size_t k = 1; k <= 5; k += 2) {
13244 GemmMicrokernelTester()
13245 .mr(4)
13246 .nr(8)
13247 .kr(1)
13248 .sr(1)
13249 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013250 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013251 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013252 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013253 }
13254 }
13255 }
13256
Marat Dukhande06f492020-04-09 00:19:31 -070013257 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013258 TEST_REQUIRES_X86_SSE;
13259 for (uint32_t n = 9; n < 16; n++) {
13260 for (size_t k = 1; k <= 5; k += 2) {
13261 GemmMicrokernelTester()
13262 .mr(4)
13263 .nr(8)
13264 .kr(1)
13265 .sr(1)
13266 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013267 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013268 .k(k)
13269 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013270 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013271 }
13272 }
13273 }
13274
Marat Dukhande06f492020-04-09 00:19:31 -070013275 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013276 TEST_REQUIRES_X86_SSE;
13277 for (uint32_t n = 9; n < 16; n++) {
13278 for (size_t k = 1; k <= 5; k += 2) {
13279 GemmMicrokernelTester()
13280 .mr(4)
13281 .nr(8)
13282 .kr(1)
13283 .sr(1)
13284 .m(4)
13285 .n(n)
13286 .k(k)
13287 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013288 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013289 }
13290 }
13291 }
13292
Marat Dukhande06f492020-04-09 00:19:31 -070013293 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013294 TEST_REQUIRES_X86_SSE;
13295 for (uint32_t n = 9; n < 16; n++) {
13296 for (size_t k = 1; k <= 5; k += 2) {
13297 for (uint32_t m = 1; m <= 4; m++) {
13298 GemmMicrokernelTester()
13299 .mr(4)
13300 .nr(8)
13301 .kr(1)
13302 .sr(1)
13303 .m(m)
13304 .n(n)
13305 .k(k)
13306 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013307 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013308 }
13309 }
13310 }
13311 }
13312
Marat Dukhande06f492020-04-09 00:19:31 -070013313 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013314 TEST_REQUIRES_X86_SSE;
13315 for (uint32_t n = 16; n <= 24; n += 8) {
13316 for (size_t k = 1; k <= 5; k += 2) {
13317 GemmMicrokernelTester()
13318 .mr(4)
13319 .nr(8)
13320 .kr(1)
13321 .sr(1)
13322 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013323 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013324 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013325 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013326 }
13327 }
13328 }
13329
Marat Dukhande06f492020-04-09 00:19:31 -070013330 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013331 TEST_REQUIRES_X86_SSE;
13332 for (uint32_t n = 16; n <= 24; n += 8) {
13333 for (size_t k = 1; k <= 5; k += 2) {
13334 GemmMicrokernelTester()
13335 .mr(4)
13336 .nr(8)
13337 .kr(1)
13338 .sr(1)
13339 .m(4)
13340 .n(n)
13341 .k(k)
13342 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013343 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013344 }
13345 }
13346 }
13347
Marat Dukhande06f492020-04-09 00:19:31 -070013348 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013349 TEST_REQUIRES_X86_SSE;
13350 for (uint32_t n = 16; n <= 24; n += 8) {
13351 for (size_t k = 1; k <= 5; k += 2) {
13352 GemmMicrokernelTester()
13353 .mr(4)
13354 .nr(8)
13355 .kr(1)
13356 .sr(1)
13357 .m(4)
13358 .n(n)
13359 .k(k)
13360 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013361 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013362 }
13363 }
13364 }
13365
Marat Dukhande06f492020-04-09 00:19:31 -070013366 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013367 TEST_REQUIRES_X86_SSE;
13368 for (uint32_t n = 16; n <= 24; n += 8) {
13369 for (size_t k = 1; k <= 5; k += 2) {
13370 for (uint32_t m = 1; m <= 4; m++) {
13371 GemmMicrokernelTester()
13372 .mr(4)
13373 .nr(8)
13374 .kr(1)
13375 .sr(1)
13376 .m(m)
13377 .n(n)
13378 .k(k)
13379 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013380 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013381 }
13382 }
13383 }
13384 }
13385
Marat Dukhande06f492020-04-09 00:19:31 -070013386 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013387 TEST_REQUIRES_X86_SSE;
13388 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013389 for (uint32_t n = 1; n <= 8; n++) {
13390 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013391 GemmMicrokernelTester()
13392 .mr(4)
13393 .nr(8)
13394 .kr(1)
13395 .sr(1)
13396 .m(m)
13397 .n(n)
13398 .k(k)
13399 .cm_stride(11)
13400 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013401 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013402 }
13403 }
13404 }
13405 }
13406
Marat Dukhande06f492020-04-09 00:19:31 -070013407 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013408 TEST_REQUIRES_X86_SSE;
13409 GemmMicrokernelTester()
13410 .mr(4)
13411 .nr(8)
13412 .kr(1)
13413 .sr(1)
13414 .m(4)
13415 .n(8)
13416 .k(1)
13417 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013418 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013419 }
13420
Marat Dukhande06f492020-04-09 00:19:31 -070013421 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013422 TEST_REQUIRES_X86_SSE;
13423 GemmMicrokernelTester()
13424 .mr(4)
13425 .nr(8)
13426 .kr(1)
13427 .sr(1)
13428 .m(4)
13429 .n(8)
13430 .k(1)
13431 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013432 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013433 }
13434
Marat Dukhande06f492020-04-09 00:19:31 -070013435 TEST(F32_GEMM_MINMAX_4X8__SSE_LOAD1, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013436 TEST_REQUIRES_X86_SSE;
13437 GemmMicrokernelTester()
13438 .mr(4)
13439 .nr(8)
13440 .kr(1)
13441 .sr(1)
13442 .m(4)
13443 .n(8)
13444 .k(1)
13445 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013446 .Test(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013447 }
13448#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13449
13450
13451#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080013452 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1) {
13453 TEST_REQUIRES_X86_SSE;
13454 GemmMicrokernelTester()
13455 .mr(5)
13456 .nr(8)
13457 .kr(1)
13458 .sr(1)
13459 .m(5)
13460 .n(8)
13461 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013462 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013463 }
13464
13465 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, strided_cn) {
13466 TEST_REQUIRES_X86_SSE;
13467 GemmMicrokernelTester()
13468 .mr(5)
13469 .nr(8)
13470 .kr(1)
13471 .sr(1)
13472 .m(5)
13473 .n(8)
13474 .k(1)
13475 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013476 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013477 }
13478
13479 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_strided_a) {
13480 TEST_REQUIRES_X86_SSE;
13481 GemmMicrokernelTester()
13482 .mr(5)
13483 .nr(8)
13484 .kr(1)
13485 .sr(1)
13486 .m(5)
13487 .n(8)
13488 .k(1)
13489 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013490 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013491 }
13492
13493 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile) {
13494 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013495 for (uint32_t n = 1; n <= 8; n++) {
13496 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013497 GemmMicrokernelTester()
13498 .mr(5)
13499 .nr(8)
13500 .kr(1)
13501 .sr(1)
13502 .m(m)
13503 .n(n)
13504 .k(1)
13505 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013506 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013507 }
13508 }
13509 }
13510
13511 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_m) {
13512 TEST_REQUIRES_X86_SSE;
13513 for (uint32_t m = 1; m <= 5; m++) {
13514 GemmMicrokernelTester()
13515 .mr(5)
13516 .nr(8)
13517 .kr(1)
13518 .sr(1)
13519 .m(m)
13520 .n(8)
13521 .k(1)
13522 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013523 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013524 }
13525 }
13526
13527 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_n) {
13528 TEST_REQUIRES_X86_SSE;
13529 for (uint32_t n = 1; n <= 8; n++) {
13530 GemmMicrokernelTester()
13531 .mr(5)
13532 .nr(8)
13533 .kr(1)
13534 .sr(1)
13535 .m(5)
13536 .n(n)
13537 .k(1)
13538 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013539 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013540 }
13541 }
13542
13543 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1) {
13544 TEST_REQUIRES_X86_SSE;
13545 for (size_t k = 2; k < 10; k++) {
13546 GemmMicrokernelTester()
13547 .mr(5)
13548 .nr(8)
13549 .kr(1)
13550 .sr(1)
13551 .m(5)
13552 .n(8)
13553 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013554 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013555 }
13556 }
13557
13558 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1_strided_a) {
13559 TEST_REQUIRES_X86_SSE;
13560 for (size_t k = 2; k < 10; k++) {
13561 GemmMicrokernelTester()
13562 .mr(5)
13563 .nr(8)
13564 .kr(1)
13565 .sr(1)
13566 .m(5)
13567 .n(8)
13568 .k(k)
13569 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013570 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013571 }
13572 }
13573
13574 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1_subtile) {
13575 TEST_REQUIRES_X86_SSE;
13576 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013577 for (uint32_t n = 1; n <= 8; n++) {
13578 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013579 GemmMicrokernelTester()
13580 .mr(5)
13581 .nr(8)
13582 .kr(1)
13583 .sr(1)
13584 .m(m)
13585 .n(n)
13586 .k(k)
13587 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013588 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013589 }
13590 }
13591 }
13592 }
13593
13594 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8) {
13595 TEST_REQUIRES_X86_SSE;
13596 for (uint32_t n = 9; n < 16; n++) {
13597 for (size_t k = 1; k <= 5; k += 2) {
13598 GemmMicrokernelTester()
13599 .mr(5)
13600 .nr(8)
13601 .kr(1)
13602 .sr(1)
13603 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013604 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013605 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013606 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013607 }
13608 }
13609 }
13610
13611 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_cn) {
13612 TEST_REQUIRES_X86_SSE;
13613 for (uint32_t n = 9; n < 16; n++) {
13614 for (size_t k = 1; k <= 5; k += 2) {
13615 GemmMicrokernelTester()
13616 .mr(5)
13617 .nr(8)
13618 .kr(1)
13619 .sr(1)
13620 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013621 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013622 .k(k)
13623 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013624 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013625 }
13626 }
13627 }
13628
13629 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_a) {
13630 TEST_REQUIRES_X86_SSE;
13631 for (uint32_t n = 9; n < 16; n++) {
13632 for (size_t k = 1; k <= 5; k += 2) {
13633 GemmMicrokernelTester()
13634 .mr(5)
13635 .nr(8)
13636 .kr(1)
13637 .sr(1)
13638 .m(5)
13639 .n(n)
13640 .k(k)
13641 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013642 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013643 }
13644 }
13645 }
13646
13647 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_subtile) {
13648 TEST_REQUIRES_X86_SSE;
13649 for (uint32_t n = 9; n < 16; n++) {
13650 for (size_t k = 1; k <= 5; k += 2) {
13651 for (uint32_t m = 1; m <= 5; m++) {
13652 GemmMicrokernelTester()
13653 .mr(5)
13654 .nr(8)
13655 .kr(1)
13656 .sr(1)
13657 .m(m)
13658 .n(n)
13659 .k(k)
13660 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013661 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013662 }
13663 }
13664 }
13665 }
13666
13667 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_div_8) {
13668 TEST_REQUIRES_X86_SSE;
13669 for (uint32_t n = 16; n <= 24; n += 8) {
13670 for (size_t k = 1; k <= 5; k += 2) {
13671 GemmMicrokernelTester()
13672 .mr(5)
13673 .nr(8)
13674 .kr(1)
13675 .sr(1)
13676 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013677 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013678 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013679 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013680 }
13681 }
13682 }
13683
13684 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_cn) {
13685 TEST_REQUIRES_X86_SSE;
13686 for (uint32_t n = 16; n <= 24; n += 8) {
13687 for (size_t k = 1; k <= 5; k += 2) {
13688 GemmMicrokernelTester()
13689 .mr(5)
13690 .nr(8)
13691 .kr(1)
13692 .sr(1)
13693 .m(5)
13694 .n(n)
13695 .k(k)
13696 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013697 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013698 }
13699 }
13700 }
13701
13702 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_a) {
13703 TEST_REQUIRES_X86_SSE;
13704 for (uint32_t n = 16; n <= 24; n += 8) {
13705 for (size_t k = 1; k <= 5; k += 2) {
13706 GemmMicrokernelTester()
13707 .mr(5)
13708 .nr(8)
13709 .kr(1)
13710 .sr(1)
13711 .m(5)
13712 .n(n)
13713 .k(k)
13714 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013715 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013716 }
13717 }
13718 }
13719
13720 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_subtile) {
13721 TEST_REQUIRES_X86_SSE;
13722 for (uint32_t n = 16; n <= 24; n += 8) {
13723 for (size_t k = 1; k <= 5; k += 2) {
13724 for (uint32_t m = 1; m <= 5; m++) {
13725 GemmMicrokernelTester()
13726 .mr(5)
13727 .nr(8)
13728 .kr(1)
13729 .sr(1)
13730 .m(m)
13731 .n(n)
13732 .k(k)
13733 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013734 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013735 }
13736 }
13737 }
13738 }
13739
13740 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, strided_cm_subtile) {
13741 TEST_REQUIRES_X86_SSE;
13742 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013743 for (uint32_t n = 1; n <= 8; n++) {
13744 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013745 GemmMicrokernelTester()
13746 .mr(5)
13747 .nr(8)
13748 .kr(1)
13749 .sr(1)
13750 .m(m)
13751 .n(n)
13752 .k(k)
13753 .cm_stride(11)
13754 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013755 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013756 }
13757 }
13758 }
13759 }
13760
13761 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, qmin) {
13762 TEST_REQUIRES_X86_SSE;
13763 GemmMicrokernelTester()
13764 .mr(5)
13765 .nr(8)
13766 .kr(1)
13767 .sr(1)
13768 .m(5)
13769 .n(8)
13770 .k(1)
13771 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013772 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013773 }
13774
13775 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, qmax) {
13776 TEST_REQUIRES_X86_SSE;
13777 GemmMicrokernelTester()
13778 .mr(5)
13779 .nr(8)
13780 .kr(1)
13781 .sr(1)
13782 .m(5)
13783 .n(8)
13784 .k(1)
13785 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013786 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013787 }
13788
13789 TEST(F32_GEMM_MINMAX_5X8__SSE_LOAD1, strided_cm) {
13790 TEST_REQUIRES_X86_SSE;
13791 GemmMicrokernelTester()
13792 .mr(5)
13793 .nr(8)
13794 .kr(1)
13795 .sr(1)
13796 .m(5)
13797 .n(8)
13798 .k(1)
13799 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013800 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013801 }
13802#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13803
13804
13805#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080013806 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_eq_4) {
13807 TEST_REQUIRES_X86_SSE;
13808 GemmMicrokernelTester()
13809 .mr(3)
13810 .nr(8)
13811 .kr(1)
13812 .sr(1)
13813 .m(3)
13814 .n(8)
13815 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013816 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013817 }
13818
13819 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, strided_cn) {
13820 TEST_REQUIRES_X86_SSE;
13821 GemmMicrokernelTester()
13822 .mr(3)
13823 .nr(8)
13824 .kr(1)
13825 .sr(1)
13826 .m(3)
13827 .n(8)
13828 .k(4)
13829 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013830 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013831 }
13832
13833 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_eq_4_strided_a) {
13834 TEST_REQUIRES_X86_SSE;
13835 GemmMicrokernelTester()
13836 .mr(3)
13837 .nr(8)
13838 .kr(1)
13839 .sr(1)
13840 .m(3)
13841 .n(8)
13842 .k(4)
13843 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013844 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013845 }
13846
13847 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile) {
13848 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013849 for (uint32_t n = 1; n <= 8; n++) {
13850 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013851 GemmMicrokernelTester()
13852 .mr(3)
13853 .nr(8)
13854 .kr(1)
13855 .sr(1)
13856 .m(m)
13857 .n(n)
13858 .k(4)
13859 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013860 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013861 }
13862 }
13863 }
13864
13865 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_m) {
13866 TEST_REQUIRES_X86_SSE;
13867 for (uint32_t m = 1; m <= 3; m++) {
13868 GemmMicrokernelTester()
13869 .mr(3)
13870 .nr(8)
13871 .kr(1)
13872 .sr(1)
13873 .m(m)
13874 .n(8)
13875 .k(4)
13876 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013877 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013878 }
13879 }
13880
13881 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_n) {
13882 TEST_REQUIRES_X86_SSE;
13883 for (uint32_t n = 1; n <= 8; n++) {
13884 GemmMicrokernelTester()
13885 .mr(3)
13886 .nr(8)
13887 .kr(1)
13888 .sr(1)
13889 .m(3)
13890 .n(n)
13891 .k(4)
13892 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013893 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013894 }
13895 }
13896
13897 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_lt_4) {
13898 TEST_REQUIRES_X86_SSE;
13899 for (size_t k = 1; k < 4; k++) {
13900 GemmMicrokernelTester()
13901 .mr(3)
13902 .nr(8)
13903 .kr(1)
13904 .sr(1)
13905 .m(3)
13906 .n(8)
13907 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013908 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013909 }
13910 }
13911
13912 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_lt_4_strided_a) {
13913 TEST_REQUIRES_X86_SSE;
13914 for (size_t k = 1; k < 4; k++) {
13915 GemmMicrokernelTester()
13916 .mr(3)
13917 .nr(8)
13918 .kr(1)
13919 .sr(1)
13920 .m(3)
13921 .n(8)
13922 .k(k)
13923 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013924 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013925 }
13926 }
13927
13928 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_lt_4_subtile) {
13929 TEST_REQUIRES_X86_SSE;
13930 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013931 for (uint32_t n = 1; n <= 8; n++) {
13932 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013933 GemmMicrokernelTester()
13934 .mr(3)
13935 .nr(8)
13936 .kr(1)
13937 .sr(1)
13938 .m(m)
13939 .n(n)
13940 .k(k)
13941 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013942 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013943 }
13944 }
13945 }
13946 }
13947
13948 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_gt_4) {
13949 TEST_REQUIRES_X86_SSE;
13950 for (size_t k = 5; k < 8; k++) {
13951 GemmMicrokernelTester()
13952 .mr(3)
13953 .nr(8)
13954 .kr(1)
13955 .sr(1)
13956 .m(3)
13957 .n(8)
13958 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013959 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013960 }
13961 }
13962
13963 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_gt_4_strided_a) {
13964 TEST_REQUIRES_X86_SSE;
13965 for (size_t k = 5; k < 8; k++) {
13966 GemmMicrokernelTester()
13967 .mr(3)
13968 .nr(8)
13969 .kr(1)
13970 .sr(1)
13971 .m(3)
13972 .n(8)
13973 .k(k)
13974 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013975 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013976 }
13977 }
13978
13979 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_gt_4_subtile) {
13980 TEST_REQUIRES_X86_SSE;
13981 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013982 for (uint32_t n = 1; n <= 8; n++) {
13983 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013984 GemmMicrokernelTester()
13985 .mr(3)
13986 .nr(8)
13987 .kr(1)
13988 .sr(1)
13989 .m(m)
13990 .n(n)
13991 .k(k)
13992 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013993 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013994 }
13995 }
13996 }
13997 }
13998
13999 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_div_4) {
14000 TEST_REQUIRES_X86_SSE;
14001 for (size_t k = 8; k <= 40; k += 4) {
14002 GemmMicrokernelTester()
14003 .mr(3)
14004 .nr(8)
14005 .kr(1)
14006 .sr(1)
14007 .m(3)
14008 .n(8)
14009 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014010 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014011 }
14012 }
14013
14014 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_div_4_strided_a) {
14015 TEST_REQUIRES_X86_SSE;
14016 for (size_t k = 8; k <= 40; k += 4) {
14017 GemmMicrokernelTester()
14018 .mr(3)
14019 .nr(8)
14020 .kr(1)
14021 .sr(1)
14022 .m(3)
14023 .n(8)
14024 .k(k)
14025 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014026 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014027 }
14028 }
14029
14030 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, k_div_4_subtile) {
14031 TEST_REQUIRES_X86_SSE;
14032 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014033 for (uint32_t n = 1; n <= 8; n++) {
14034 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014035 GemmMicrokernelTester()
14036 .mr(3)
14037 .nr(8)
14038 .kr(1)
14039 .sr(1)
14040 .m(m)
14041 .n(n)
14042 .k(k)
14043 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014044 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014045 }
14046 }
14047 }
14048 }
14049
14050 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_gt_8) {
14051 TEST_REQUIRES_X86_SSE;
14052 for (uint32_t n = 9; n < 16; n++) {
14053 for (size_t k = 1; k <= 20; k += 5) {
14054 GemmMicrokernelTester()
14055 .mr(3)
14056 .nr(8)
14057 .kr(1)
14058 .sr(1)
14059 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014060 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014061 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014062 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014063 }
14064 }
14065 }
14066
14067 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_gt_8_strided_cn) {
14068 TEST_REQUIRES_X86_SSE;
14069 for (uint32_t n = 9; n < 16; n++) {
14070 for (size_t k = 1; k <= 20; k += 5) {
14071 GemmMicrokernelTester()
14072 .mr(3)
14073 .nr(8)
14074 .kr(1)
14075 .sr(1)
14076 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014077 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014078 .k(k)
14079 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014080 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014081 }
14082 }
14083 }
14084
14085 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_gt_8_strided_a) {
14086 TEST_REQUIRES_X86_SSE;
14087 for (uint32_t n = 9; n < 16; n++) {
14088 for (size_t k = 1; k <= 20; k += 5) {
14089 GemmMicrokernelTester()
14090 .mr(3)
14091 .nr(8)
14092 .kr(1)
14093 .sr(1)
14094 .m(3)
14095 .n(n)
14096 .k(k)
14097 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014098 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014099 }
14100 }
14101 }
14102
14103 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_gt_8_subtile) {
14104 TEST_REQUIRES_X86_SSE;
14105 for (uint32_t n = 9; n < 16; n++) {
14106 for (size_t k = 1; k <= 20; k += 5) {
14107 for (uint32_t m = 1; m <= 3; m++) {
14108 GemmMicrokernelTester()
14109 .mr(3)
14110 .nr(8)
14111 .kr(1)
14112 .sr(1)
14113 .m(m)
14114 .n(n)
14115 .k(k)
14116 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014117 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014118 }
14119 }
14120 }
14121 }
14122
14123 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_div_8) {
14124 TEST_REQUIRES_X86_SSE;
14125 for (uint32_t n = 16; n <= 24; n += 8) {
14126 for (size_t k = 1; k <= 20; k += 5) {
14127 GemmMicrokernelTester()
14128 .mr(3)
14129 .nr(8)
14130 .kr(1)
14131 .sr(1)
14132 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014133 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014134 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014135 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014136 }
14137 }
14138 }
14139
14140 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_div_8_strided_cn) {
14141 TEST_REQUIRES_X86_SSE;
14142 for (uint32_t n = 16; n <= 24; n += 8) {
14143 for (size_t k = 1; k <= 20; k += 5) {
14144 GemmMicrokernelTester()
14145 .mr(3)
14146 .nr(8)
14147 .kr(1)
14148 .sr(1)
14149 .m(3)
14150 .n(n)
14151 .k(k)
14152 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014153 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014154 }
14155 }
14156 }
14157
14158 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_div_8_strided_a) {
14159 TEST_REQUIRES_X86_SSE;
14160 for (uint32_t n = 16; n <= 24; n += 8) {
14161 for (size_t k = 1; k <= 20; k += 5) {
14162 GemmMicrokernelTester()
14163 .mr(3)
14164 .nr(8)
14165 .kr(1)
14166 .sr(1)
14167 .m(3)
14168 .n(n)
14169 .k(k)
14170 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014171 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014172 }
14173 }
14174 }
14175
14176 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, n_div_8_subtile) {
14177 TEST_REQUIRES_X86_SSE;
14178 for (uint32_t n = 16; n <= 24; n += 8) {
14179 for (size_t k = 1; k <= 20; k += 5) {
14180 for (uint32_t m = 1; m <= 3; m++) {
14181 GemmMicrokernelTester()
14182 .mr(3)
14183 .nr(8)
14184 .kr(1)
14185 .sr(1)
14186 .m(m)
14187 .n(n)
14188 .k(k)
14189 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014190 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014191 }
14192 }
14193 }
14194 }
14195
14196 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, strided_cm_subtile) {
14197 TEST_REQUIRES_X86_SSE;
14198 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014199 for (uint32_t n = 1; n <= 8; n++) {
14200 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014201 GemmMicrokernelTester()
14202 .mr(3)
14203 .nr(8)
14204 .kr(1)
14205 .sr(1)
14206 .m(m)
14207 .n(n)
14208 .k(k)
14209 .cm_stride(11)
14210 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014211 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014212 }
14213 }
14214 }
14215 }
14216
14217 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, qmin) {
14218 TEST_REQUIRES_X86_SSE;
14219 GemmMicrokernelTester()
14220 .mr(3)
14221 .nr(8)
14222 .kr(1)
14223 .sr(1)
14224 .m(3)
14225 .n(8)
14226 .k(4)
14227 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014228 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014229 }
14230
14231 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, qmax) {
14232 TEST_REQUIRES_X86_SSE;
14233 GemmMicrokernelTester()
14234 .mr(3)
14235 .nr(8)
14236 .kr(1)
14237 .sr(1)
14238 .m(3)
14239 .n(8)
14240 .k(4)
14241 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014242 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014243 }
14244
14245 TEST(F32_GEMM_MINMAX_3X8__SSE_DUP, strided_cm) {
14246 TEST_REQUIRES_X86_SSE;
14247 GemmMicrokernelTester()
14248 .mr(3)
14249 .nr(8)
14250 .kr(1)
14251 .sr(1)
14252 .m(3)
14253 .n(8)
14254 .k(4)
14255 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014256 .Test(xnn_f32_gemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014257 }
14258#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14259
14260
14261#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080014262 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_eq_4) {
14263 TEST_REQUIRES_X86_SSE;
14264 GemmMicrokernelTester()
14265 .mr(5)
14266 .nr(8)
14267 .kr(1)
14268 .sr(1)
14269 .m(5)
14270 .n(8)
14271 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014272 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014273 }
14274
14275 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, strided_cn) {
14276 TEST_REQUIRES_X86_SSE;
14277 GemmMicrokernelTester()
14278 .mr(5)
14279 .nr(8)
14280 .kr(1)
14281 .sr(1)
14282 .m(5)
14283 .n(8)
14284 .k(4)
14285 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014286 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014287 }
14288
14289 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_eq_4_strided_a) {
14290 TEST_REQUIRES_X86_SSE;
14291 GemmMicrokernelTester()
14292 .mr(5)
14293 .nr(8)
14294 .kr(1)
14295 .sr(1)
14296 .m(5)
14297 .n(8)
14298 .k(4)
14299 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014300 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014301 }
14302
14303 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile) {
14304 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014305 for (uint32_t n = 1; n <= 8; n++) {
14306 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014307 GemmMicrokernelTester()
14308 .mr(5)
14309 .nr(8)
14310 .kr(1)
14311 .sr(1)
14312 .m(m)
14313 .n(n)
14314 .k(4)
14315 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014316 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014317 }
14318 }
14319 }
14320
14321 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_m) {
14322 TEST_REQUIRES_X86_SSE;
14323 for (uint32_t m = 1; m <= 5; m++) {
14324 GemmMicrokernelTester()
14325 .mr(5)
14326 .nr(8)
14327 .kr(1)
14328 .sr(1)
14329 .m(m)
14330 .n(8)
14331 .k(4)
14332 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014333 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014334 }
14335 }
14336
14337 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_n) {
14338 TEST_REQUIRES_X86_SSE;
14339 for (uint32_t n = 1; n <= 8; n++) {
14340 GemmMicrokernelTester()
14341 .mr(5)
14342 .nr(8)
14343 .kr(1)
14344 .sr(1)
14345 .m(5)
14346 .n(n)
14347 .k(4)
14348 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014349 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014350 }
14351 }
14352
14353 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_lt_4) {
14354 TEST_REQUIRES_X86_SSE;
14355 for (size_t k = 1; k < 4; k++) {
14356 GemmMicrokernelTester()
14357 .mr(5)
14358 .nr(8)
14359 .kr(1)
14360 .sr(1)
14361 .m(5)
14362 .n(8)
14363 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014364 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014365 }
14366 }
14367
14368 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_lt_4_strided_a) {
14369 TEST_REQUIRES_X86_SSE;
14370 for (size_t k = 1; k < 4; k++) {
14371 GemmMicrokernelTester()
14372 .mr(5)
14373 .nr(8)
14374 .kr(1)
14375 .sr(1)
14376 .m(5)
14377 .n(8)
14378 .k(k)
14379 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014380 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014381 }
14382 }
14383
14384 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_lt_4_subtile) {
14385 TEST_REQUIRES_X86_SSE;
14386 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014387 for (uint32_t n = 1; n <= 8; n++) {
14388 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014389 GemmMicrokernelTester()
14390 .mr(5)
14391 .nr(8)
14392 .kr(1)
14393 .sr(1)
14394 .m(m)
14395 .n(n)
14396 .k(k)
14397 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014398 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014399 }
14400 }
14401 }
14402 }
14403
14404 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_gt_4) {
14405 TEST_REQUIRES_X86_SSE;
14406 for (size_t k = 5; k < 8; k++) {
14407 GemmMicrokernelTester()
14408 .mr(5)
14409 .nr(8)
14410 .kr(1)
14411 .sr(1)
14412 .m(5)
14413 .n(8)
14414 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014415 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014416 }
14417 }
14418
14419 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_gt_4_strided_a) {
14420 TEST_REQUIRES_X86_SSE;
14421 for (size_t k = 5; k < 8; k++) {
14422 GemmMicrokernelTester()
14423 .mr(5)
14424 .nr(8)
14425 .kr(1)
14426 .sr(1)
14427 .m(5)
14428 .n(8)
14429 .k(k)
14430 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014431 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014432 }
14433 }
14434
14435 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_gt_4_subtile) {
14436 TEST_REQUIRES_X86_SSE;
14437 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014438 for (uint32_t n = 1; n <= 8; n++) {
14439 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014440 GemmMicrokernelTester()
14441 .mr(5)
14442 .nr(8)
14443 .kr(1)
14444 .sr(1)
14445 .m(m)
14446 .n(n)
14447 .k(k)
14448 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014449 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014450 }
14451 }
14452 }
14453 }
14454
14455 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_div_4) {
14456 TEST_REQUIRES_X86_SSE;
14457 for (size_t k = 8; k <= 40; k += 4) {
14458 GemmMicrokernelTester()
14459 .mr(5)
14460 .nr(8)
14461 .kr(1)
14462 .sr(1)
14463 .m(5)
14464 .n(8)
14465 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014466 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014467 }
14468 }
14469
14470 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_div_4_strided_a) {
14471 TEST_REQUIRES_X86_SSE;
14472 for (size_t k = 8; k <= 40; k += 4) {
14473 GemmMicrokernelTester()
14474 .mr(5)
14475 .nr(8)
14476 .kr(1)
14477 .sr(1)
14478 .m(5)
14479 .n(8)
14480 .k(k)
14481 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014482 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014483 }
14484 }
14485
14486 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, k_div_4_subtile) {
14487 TEST_REQUIRES_X86_SSE;
14488 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014489 for (uint32_t n = 1; n <= 8; n++) {
14490 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014491 GemmMicrokernelTester()
14492 .mr(5)
14493 .nr(8)
14494 .kr(1)
14495 .sr(1)
14496 .m(m)
14497 .n(n)
14498 .k(k)
14499 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014500 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014501 }
14502 }
14503 }
14504 }
14505
14506 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_gt_8) {
14507 TEST_REQUIRES_X86_SSE;
14508 for (uint32_t n = 9; n < 16; n++) {
14509 for (size_t k = 1; k <= 20; k += 5) {
14510 GemmMicrokernelTester()
14511 .mr(5)
14512 .nr(8)
14513 .kr(1)
14514 .sr(1)
14515 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014516 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014517 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014518 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014519 }
14520 }
14521 }
14522
14523 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_gt_8_strided_cn) {
14524 TEST_REQUIRES_X86_SSE;
14525 for (uint32_t n = 9; n < 16; n++) {
14526 for (size_t k = 1; k <= 20; k += 5) {
14527 GemmMicrokernelTester()
14528 .mr(5)
14529 .nr(8)
14530 .kr(1)
14531 .sr(1)
14532 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014533 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014534 .k(k)
14535 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014536 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014537 }
14538 }
14539 }
14540
14541 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_gt_8_strided_a) {
14542 TEST_REQUIRES_X86_SSE;
14543 for (uint32_t n = 9; n < 16; n++) {
14544 for (size_t k = 1; k <= 20; k += 5) {
14545 GemmMicrokernelTester()
14546 .mr(5)
14547 .nr(8)
14548 .kr(1)
14549 .sr(1)
14550 .m(5)
14551 .n(n)
14552 .k(k)
14553 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014554 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014555 }
14556 }
14557 }
14558
14559 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_gt_8_subtile) {
14560 TEST_REQUIRES_X86_SSE;
14561 for (uint32_t n = 9; n < 16; n++) {
14562 for (size_t k = 1; k <= 20; k += 5) {
14563 for (uint32_t m = 1; m <= 5; m++) {
14564 GemmMicrokernelTester()
14565 .mr(5)
14566 .nr(8)
14567 .kr(1)
14568 .sr(1)
14569 .m(m)
14570 .n(n)
14571 .k(k)
14572 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014573 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014574 }
14575 }
14576 }
14577 }
14578
14579 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_div_8) {
14580 TEST_REQUIRES_X86_SSE;
14581 for (uint32_t n = 16; n <= 24; n += 8) {
14582 for (size_t k = 1; k <= 20; k += 5) {
14583 GemmMicrokernelTester()
14584 .mr(5)
14585 .nr(8)
14586 .kr(1)
14587 .sr(1)
14588 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014589 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014590 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014591 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014592 }
14593 }
14594 }
14595
14596 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_div_8_strided_cn) {
14597 TEST_REQUIRES_X86_SSE;
14598 for (uint32_t n = 16; n <= 24; n += 8) {
14599 for (size_t k = 1; k <= 20; k += 5) {
14600 GemmMicrokernelTester()
14601 .mr(5)
14602 .nr(8)
14603 .kr(1)
14604 .sr(1)
14605 .m(5)
14606 .n(n)
14607 .k(k)
14608 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014609 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014610 }
14611 }
14612 }
14613
14614 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_div_8_strided_a) {
14615 TEST_REQUIRES_X86_SSE;
14616 for (uint32_t n = 16; n <= 24; n += 8) {
14617 for (size_t k = 1; k <= 20; k += 5) {
14618 GemmMicrokernelTester()
14619 .mr(5)
14620 .nr(8)
14621 .kr(1)
14622 .sr(1)
14623 .m(5)
14624 .n(n)
14625 .k(k)
14626 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014627 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014628 }
14629 }
14630 }
14631
14632 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, n_div_8_subtile) {
14633 TEST_REQUIRES_X86_SSE;
14634 for (uint32_t n = 16; n <= 24; n += 8) {
14635 for (size_t k = 1; k <= 20; k += 5) {
14636 for (uint32_t m = 1; m <= 5; m++) {
14637 GemmMicrokernelTester()
14638 .mr(5)
14639 .nr(8)
14640 .kr(1)
14641 .sr(1)
14642 .m(m)
14643 .n(n)
14644 .k(k)
14645 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014646 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014647 }
14648 }
14649 }
14650 }
14651
14652 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, strided_cm_subtile) {
14653 TEST_REQUIRES_X86_SSE;
14654 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014655 for (uint32_t n = 1; n <= 8; n++) {
14656 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014657 GemmMicrokernelTester()
14658 .mr(5)
14659 .nr(8)
14660 .kr(1)
14661 .sr(1)
14662 .m(m)
14663 .n(n)
14664 .k(k)
14665 .cm_stride(11)
14666 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014667 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014668 }
14669 }
14670 }
14671 }
14672
14673 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, qmin) {
14674 TEST_REQUIRES_X86_SSE;
14675 GemmMicrokernelTester()
14676 .mr(5)
14677 .nr(8)
14678 .kr(1)
14679 .sr(1)
14680 .m(5)
14681 .n(8)
14682 .k(4)
14683 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014684 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014685 }
14686
14687 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, qmax) {
14688 TEST_REQUIRES_X86_SSE;
14689 GemmMicrokernelTester()
14690 .mr(5)
14691 .nr(8)
14692 .kr(1)
14693 .sr(1)
14694 .m(5)
14695 .n(8)
14696 .k(4)
14697 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014698 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014699 }
14700
14701 TEST(F32_GEMM_MINMAX_5X8__SSE_DUP, strided_cm) {
14702 TEST_REQUIRES_X86_SSE;
14703 GemmMicrokernelTester()
14704 .mr(5)
14705 .nr(8)
14706 .kr(1)
14707 .sr(1)
14708 .m(5)
14709 .n(8)
14710 .k(4)
14711 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014712 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014713 }
14714#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14715
14716
14717#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080014718 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_eq_4) {
14719 TEST_REQUIRES_X86_SSE;
14720 GemmMicrokernelTester()
14721 .mr(3)
14722 .nr(8)
14723 .kr(1)
14724 .sr(4)
14725 .m(3)
14726 .n(8)
14727 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014728 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014729 }
14730
14731 TEST(F32_GEMM_MINMAX_3X8S4__SSE, strided_cn) {
14732 TEST_REQUIRES_X86_SSE;
14733 GemmMicrokernelTester()
14734 .mr(3)
14735 .nr(8)
14736 .kr(1)
14737 .sr(4)
14738 .m(3)
14739 .n(8)
14740 .k(4)
14741 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014742 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014743 }
14744
14745 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_eq_4_strided_a) {
14746 TEST_REQUIRES_X86_SSE;
14747 GemmMicrokernelTester()
14748 .mr(3)
14749 .nr(8)
14750 .kr(1)
14751 .sr(4)
14752 .m(3)
14753 .n(8)
14754 .k(4)
14755 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014756 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014757 }
14758
14759 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile) {
14760 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014761 for (uint32_t n = 1; n <= 8; n++) {
14762 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014763 GemmMicrokernelTester()
14764 .mr(3)
14765 .nr(8)
14766 .kr(1)
14767 .sr(4)
14768 .m(m)
14769 .n(n)
14770 .k(4)
14771 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014772 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014773 }
14774 }
14775 }
14776
14777 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile_m) {
14778 TEST_REQUIRES_X86_SSE;
14779 for (uint32_t m = 1; m <= 3; m++) {
14780 GemmMicrokernelTester()
14781 .mr(3)
14782 .nr(8)
14783 .kr(1)
14784 .sr(4)
14785 .m(m)
14786 .n(8)
14787 .k(4)
14788 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014789 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014790 }
14791 }
14792
14793 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile_n) {
14794 TEST_REQUIRES_X86_SSE;
14795 for (uint32_t n = 1; n <= 8; n++) {
14796 GemmMicrokernelTester()
14797 .mr(3)
14798 .nr(8)
14799 .kr(1)
14800 .sr(4)
14801 .m(3)
14802 .n(n)
14803 .k(4)
14804 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014805 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014806 }
14807 }
14808
14809 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_lt_4) {
14810 TEST_REQUIRES_X86_SSE;
14811 for (size_t k = 1; k < 4; k++) {
14812 GemmMicrokernelTester()
14813 .mr(3)
14814 .nr(8)
14815 .kr(1)
14816 .sr(4)
14817 .m(3)
14818 .n(8)
14819 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014820 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014821 }
14822 }
14823
14824 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_lt_4_strided_a) {
14825 TEST_REQUIRES_X86_SSE;
14826 for (size_t k = 1; k < 4; k++) {
14827 GemmMicrokernelTester()
14828 .mr(3)
14829 .nr(8)
14830 .kr(1)
14831 .sr(4)
14832 .m(3)
14833 .n(8)
14834 .k(k)
14835 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014836 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014837 }
14838 }
14839
14840 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_lt_4_subtile) {
14841 TEST_REQUIRES_X86_SSE;
14842 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014843 for (uint32_t n = 1; n <= 8; n++) {
14844 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014845 GemmMicrokernelTester()
14846 .mr(3)
14847 .nr(8)
14848 .kr(1)
14849 .sr(4)
14850 .m(m)
14851 .n(n)
14852 .k(k)
14853 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014854 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014855 }
14856 }
14857 }
14858 }
14859
14860 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_gt_4) {
14861 TEST_REQUIRES_X86_SSE;
14862 for (size_t k = 5; k < 8; k++) {
14863 GemmMicrokernelTester()
14864 .mr(3)
14865 .nr(8)
14866 .kr(1)
14867 .sr(4)
14868 .m(3)
14869 .n(8)
14870 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014871 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014872 }
14873 }
14874
14875 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_gt_4_strided_a) {
14876 TEST_REQUIRES_X86_SSE;
14877 for (size_t k = 5; k < 8; k++) {
14878 GemmMicrokernelTester()
14879 .mr(3)
14880 .nr(8)
14881 .kr(1)
14882 .sr(4)
14883 .m(3)
14884 .n(8)
14885 .k(k)
14886 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014887 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014888 }
14889 }
14890
14891 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_gt_4_subtile) {
14892 TEST_REQUIRES_X86_SSE;
14893 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014894 for (uint32_t n = 1; n <= 8; n++) {
14895 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014896 GemmMicrokernelTester()
14897 .mr(3)
14898 .nr(8)
14899 .kr(1)
14900 .sr(4)
14901 .m(m)
14902 .n(n)
14903 .k(k)
14904 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014905 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014906 }
14907 }
14908 }
14909 }
14910
14911 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_div_4) {
14912 TEST_REQUIRES_X86_SSE;
14913 for (size_t k = 8; k <= 40; k += 4) {
14914 GemmMicrokernelTester()
14915 .mr(3)
14916 .nr(8)
14917 .kr(1)
14918 .sr(4)
14919 .m(3)
14920 .n(8)
14921 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014922 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014923 }
14924 }
14925
14926 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_div_4_strided_a) {
14927 TEST_REQUIRES_X86_SSE;
14928 for (size_t k = 8; k <= 40; k += 4) {
14929 GemmMicrokernelTester()
14930 .mr(3)
14931 .nr(8)
14932 .kr(1)
14933 .sr(4)
14934 .m(3)
14935 .n(8)
14936 .k(k)
14937 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014938 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014939 }
14940 }
14941
14942 TEST(F32_GEMM_MINMAX_3X8S4__SSE, k_div_4_subtile) {
14943 TEST_REQUIRES_X86_SSE;
14944 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014945 for (uint32_t n = 1; n <= 8; n++) {
14946 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014947 GemmMicrokernelTester()
14948 .mr(3)
14949 .nr(8)
14950 .kr(1)
14951 .sr(4)
14952 .m(m)
14953 .n(n)
14954 .k(k)
14955 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014956 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014957 }
14958 }
14959 }
14960 }
14961
14962 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_gt_8) {
14963 TEST_REQUIRES_X86_SSE;
14964 for (uint32_t n = 9; n < 16; n++) {
14965 for (size_t k = 1; k <= 20; k += 5) {
14966 GemmMicrokernelTester()
14967 .mr(3)
14968 .nr(8)
14969 .kr(1)
14970 .sr(4)
14971 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014972 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014973 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014974 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014975 }
14976 }
14977 }
14978
14979 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_gt_8_strided_cn) {
14980 TEST_REQUIRES_X86_SSE;
14981 for (uint32_t n = 9; n < 16; n++) {
14982 for (size_t k = 1; k <= 20; k += 5) {
14983 GemmMicrokernelTester()
14984 .mr(3)
14985 .nr(8)
14986 .kr(1)
14987 .sr(4)
14988 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014989 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014990 .k(k)
14991 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014992 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014993 }
14994 }
14995 }
14996
14997 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_gt_8_strided_a) {
14998 TEST_REQUIRES_X86_SSE;
14999 for (uint32_t n = 9; n < 16; n++) {
15000 for (size_t k = 1; k <= 20; k += 5) {
15001 GemmMicrokernelTester()
15002 .mr(3)
15003 .nr(8)
15004 .kr(1)
15005 .sr(4)
15006 .m(3)
15007 .n(n)
15008 .k(k)
15009 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015010 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015011 }
15012 }
15013 }
15014
15015 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_gt_8_subtile) {
15016 TEST_REQUIRES_X86_SSE;
15017 for (uint32_t n = 9; n < 16; n++) {
15018 for (size_t k = 1; k <= 20; k += 5) {
15019 for (uint32_t m = 1; m <= 3; m++) {
15020 GemmMicrokernelTester()
15021 .mr(3)
15022 .nr(8)
15023 .kr(1)
15024 .sr(4)
15025 .m(m)
15026 .n(n)
15027 .k(k)
15028 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015029 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015030 }
15031 }
15032 }
15033 }
15034
15035 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_div_8) {
15036 TEST_REQUIRES_X86_SSE;
15037 for (uint32_t n = 16; n <= 24; n += 8) {
15038 for (size_t k = 1; k <= 20; k += 5) {
15039 GemmMicrokernelTester()
15040 .mr(3)
15041 .nr(8)
15042 .kr(1)
15043 .sr(4)
15044 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015045 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015046 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015047 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015048 }
15049 }
15050 }
15051
15052 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_div_8_strided_cn) {
15053 TEST_REQUIRES_X86_SSE;
15054 for (uint32_t n = 16; n <= 24; n += 8) {
15055 for (size_t k = 1; k <= 20; k += 5) {
15056 GemmMicrokernelTester()
15057 .mr(3)
15058 .nr(8)
15059 .kr(1)
15060 .sr(4)
15061 .m(3)
15062 .n(n)
15063 .k(k)
15064 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015065 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015066 }
15067 }
15068 }
15069
15070 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_div_8_strided_a) {
15071 TEST_REQUIRES_X86_SSE;
15072 for (uint32_t n = 16; n <= 24; n += 8) {
15073 for (size_t k = 1; k <= 20; k += 5) {
15074 GemmMicrokernelTester()
15075 .mr(3)
15076 .nr(8)
15077 .kr(1)
15078 .sr(4)
15079 .m(3)
15080 .n(n)
15081 .k(k)
15082 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015083 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015084 }
15085 }
15086 }
15087
15088 TEST(F32_GEMM_MINMAX_3X8S4__SSE, n_div_8_subtile) {
15089 TEST_REQUIRES_X86_SSE;
15090 for (uint32_t n = 16; n <= 24; n += 8) {
15091 for (size_t k = 1; k <= 20; k += 5) {
15092 for (uint32_t m = 1; m <= 3; m++) {
15093 GemmMicrokernelTester()
15094 .mr(3)
15095 .nr(8)
15096 .kr(1)
15097 .sr(4)
15098 .m(m)
15099 .n(n)
15100 .k(k)
15101 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015102 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015103 }
15104 }
15105 }
15106 }
15107
15108 TEST(F32_GEMM_MINMAX_3X8S4__SSE, strided_cm_subtile) {
15109 TEST_REQUIRES_X86_SSE;
15110 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015111 for (uint32_t n = 1; n <= 8; n++) {
15112 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015113 GemmMicrokernelTester()
15114 .mr(3)
15115 .nr(8)
15116 .kr(1)
15117 .sr(4)
15118 .m(m)
15119 .n(n)
15120 .k(k)
15121 .cm_stride(11)
15122 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015123 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015124 }
15125 }
15126 }
15127 }
15128
15129 TEST(F32_GEMM_MINMAX_3X8S4__SSE, qmin) {
15130 TEST_REQUIRES_X86_SSE;
15131 GemmMicrokernelTester()
15132 .mr(3)
15133 .nr(8)
15134 .kr(1)
15135 .sr(4)
15136 .m(3)
15137 .n(8)
15138 .k(4)
15139 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015140 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015141 }
15142
15143 TEST(F32_GEMM_MINMAX_3X8S4__SSE, qmax) {
15144 TEST_REQUIRES_X86_SSE;
15145 GemmMicrokernelTester()
15146 .mr(3)
15147 .nr(8)
15148 .kr(1)
15149 .sr(4)
15150 .m(3)
15151 .n(8)
15152 .k(4)
15153 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015154 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015155 }
15156
15157 TEST(F32_GEMM_MINMAX_3X8S4__SSE, strided_cm) {
15158 TEST_REQUIRES_X86_SSE;
15159 GemmMicrokernelTester()
15160 .mr(3)
15161 .nr(8)
15162 .kr(1)
15163 .sr(4)
15164 .m(3)
15165 .n(8)
15166 .k(4)
15167 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015168 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015169 }
15170#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15171
15172
15173#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080015174 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_eq_4) {
15175 TEST_REQUIRES_X86_SSE2;
15176 GemmMicrokernelTester()
15177 .mr(1)
15178 .nr(8)
15179 .kr(1)
15180 .sr(1)
15181 .m(1)
15182 .n(8)
15183 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015184 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015185 }
15186
15187 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, strided_cn) {
15188 TEST_REQUIRES_X86_SSE2;
15189 GemmMicrokernelTester()
15190 .mr(1)
15191 .nr(8)
15192 .kr(1)
15193 .sr(1)
15194 .m(1)
15195 .n(8)
15196 .k(4)
15197 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015198 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015199 }
15200
15201 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_strided_a) {
15202 TEST_REQUIRES_X86_SSE2;
15203 GemmMicrokernelTester()
15204 .mr(1)
15205 .nr(8)
15206 .kr(1)
15207 .sr(1)
15208 .m(1)
15209 .n(8)
15210 .k(4)
15211 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015212 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015213 }
15214
15215 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile) {
15216 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015217 for (uint32_t n = 1; n <= 8; n++) {
15218 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015219 GemmMicrokernelTester()
15220 .mr(1)
15221 .nr(8)
15222 .kr(1)
15223 .sr(1)
15224 .m(m)
15225 .n(n)
15226 .k(4)
15227 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015228 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015229 }
15230 }
15231 }
15232
15233 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_m) {
15234 TEST_REQUIRES_X86_SSE2;
15235 for (uint32_t m = 1; m <= 1; m++) {
15236 GemmMicrokernelTester()
15237 .mr(1)
15238 .nr(8)
15239 .kr(1)
15240 .sr(1)
15241 .m(m)
15242 .n(8)
15243 .k(4)
15244 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015245 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015246 }
15247 }
15248
15249 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_n) {
15250 TEST_REQUIRES_X86_SSE2;
15251 for (uint32_t n = 1; n <= 8; n++) {
15252 GemmMicrokernelTester()
15253 .mr(1)
15254 .nr(8)
15255 .kr(1)
15256 .sr(1)
15257 .m(1)
15258 .n(n)
15259 .k(4)
15260 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015261 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015262 }
15263 }
15264
15265 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_lt_4) {
15266 TEST_REQUIRES_X86_SSE2;
15267 for (size_t k = 1; k < 4; k++) {
15268 GemmMicrokernelTester()
15269 .mr(1)
15270 .nr(8)
15271 .kr(1)
15272 .sr(1)
15273 .m(1)
15274 .n(8)
15275 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015276 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015277 }
15278 }
15279
15280 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_lt_4_strided_a) {
15281 TEST_REQUIRES_X86_SSE2;
15282 for (size_t k = 1; k < 4; k++) {
15283 GemmMicrokernelTester()
15284 .mr(1)
15285 .nr(8)
15286 .kr(1)
15287 .sr(1)
15288 .m(1)
15289 .n(8)
15290 .k(k)
15291 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015292 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015293 }
15294 }
15295
15296 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_lt_4_subtile) {
15297 TEST_REQUIRES_X86_SSE2;
15298 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015299 for (uint32_t n = 1; n <= 8; n++) {
15300 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015301 GemmMicrokernelTester()
15302 .mr(1)
15303 .nr(8)
15304 .kr(1)
15305 .sr(1)
15306 .m(m)
15307 .n(n)
15308 .k(k)
15309 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015310 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015311 }
15312 }
15313 }
15314 }
15315
15316 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_gt_4) {
15317 TEST_REQUIRES_X86_SSE2;
15318 for (size_t k = 5; k < 8; k++) {
15319 GemmMicrokernelTester()
15320 .mr(1)
15321 .nr(8)
15322 .kr(1)
15323 .sr(1)
15324 .m(1)
15325 .n(8)
15326 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015327 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015328 }
15329 }
15330
15331 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_gt_4_strided_a) {
15332 TEST_REQUIRES_X86_SSE2;
15333 for (size_t k = 5; k < 8; k++) {
15334 GemmMicrokernelTester()
15335 .mr(1)
15336 .nr(8)
15337 .kr(1)
15338 .sr(1)
15339 .m(1)
15340 .n(8)
15341 .k(k)
15342 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015343 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015344 }
15345 }
15346
15347 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_gt_4_subtile) {
15348 TEST_REQUIRES_X86_SSE2;
15349 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015350 for (uint32_t n = 1; n <= 8; n++) {
15351 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015352 GemmMicrokernelTester()
15353 .mr(1)
15354 .nr(8)
15355 .kr(1)
15356 .sr(1)
15357 .m(m)
15358 .n(n)
15359 .k(k)
15360 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015361 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015362 }
15363 }
15364 }
15365 }
15366
15367 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_div_4) {
15368 TEST_REQUIRES_X86_SSE2;
15369 for (size_t k = 8; k <= 40; k += 4) {
15370 GemmMicrokernelTester()
15371 .mr(1)
15372 .nr(8)
15373 .kr(1)
15374 .sr(1)
15375 .m(1)
15376 .n(8)
15377 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015378 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015379 }
15380 }
15381
15382 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_div_4_strided_a) {
15383 TEST_REQUIRES_X86_SSE2;
15384 for (size_t k = 8; k <= 40; k += 4) {
15385 GemmMicrokernelTester()
15386 .mr(1)
15387 .nr(8)
15388 .kr(1)
15389 .sr(1)
15390 .m(1)
15391 .n(8)
15392 .k(k)
15393 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015394 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015395 }
15396 }
15397
15398 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, k_div_4_subtile) {
15399 TEST_REQUIRES_X86_SSE2;
15400 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015401 for (uint32_t n = 1; n <= 8; n++) {
15402 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015403 GemmMicrokernelTester()
15404 .mr(1)
15405 .nr(8)
15406 .kr(1)
15407 .sr(1)
15408 .m(m)
15409 .n(n)
15410 .k(k)
15411 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015412 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015413 }
15414 }
15415 }
15416 }
15417
15418 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_gt_8) {
15419 TEST_REQUIRES_X86_SSE2;
15420 for (uint32_t n = 9; n < 16; n++) {
15421 for (size_t k = 1; k <= 20; k += 5) {
15422 GemmMicrokernelTester()
15423 .mr(1)
15424 .nr(8)
15425 .kr(1)
15426 .sr(1)
15427 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015428 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015429 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015430 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015431 }
15432 }
15433 }
15434
15435 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_cn) {
15436 TEST_REQUIRES_X86_SSE2;
15437 for (uint32_t n = 9; n < 16; n++) {
15438 for (size_t k = 1; k <= 20; k += 5) {
15439 GemmMicrokernelTester()
15440 .mr(1)
15441 .nr(8)
15442 .kr(1)
15443 .sr(1)
15444 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015445 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015446 .k(k)
15447 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015448 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015449 }
15450 }
15451 }
15452
15453 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_a) {
15454 TEST_REQUIRES_X86_SSE2;
15455 for (uint32_t n = 9; n < 16; n++) {
15456 for (size_t k = 1; k <= 20; k += 5) {
15457 GemmMicrokernelTester()
15458 .mr(1)
15459 .nr(8)
15460 .kr(1)
15461 .sr(1)
15462 .m(1)
15463 .n(n)
15464 .k(k)
15465 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015466 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015467 }
15468 }
15469 }
15470
15471 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_subtile) {
15472 TEST_REQUIRES_X86_SSE2;
15473 for (uint32_t n = 9; n < 16; n++) {
15474 for (size_t k = 1; k <= 20; k += 5) {
15475 for (uint32_t m = 1; m <= 1; m++) {
15476 GemmMicrokernelTester()
15477 .mr(1)
15478 .nr(8)
15479 .kr(1)
15480 .sr(1)
15481 .m(m)
15482 .n(n)
15483 .k(k)
15484 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015485 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015486 }
15487 }
15488 }
15489 }
15490
15491 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_div_8) {
15492 TEST_REQUIRES_X86_SSE2;
15493 for (uint32_t n = 16; n <= 24; n += 8) {
15494 for (size_t k = 1; k <= 20; k += 5) {
15495 GemmMicrokernelTester()
15496 .mr(1)
15497 .nr(8)
15498 .kr(1)
15499 .sr(1)
15500 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015501 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015502 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015503 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015504 }
15505 }
15506 }
15507
15508 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_div_8_strided_cn) {
15509 TEST_REQUIRES_X86_SSE2;
15510 for (uint32_t n = 16; n <= 24; n += 8) {
15511 for (size_t k = 1; k <= 20; k += 5) {
15512 GemmMicrokernelTester()
15513 .mr(1)
15514 .nr(8)
15515 .kr(1)
15516 .sr(1)
15517 .m(1)
15518 .n(n)
15519 .k(k)
15520 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015521 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015522 }
15523 }
15524 }
15525
15526 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_div_8_strided_a) {
15527 TEST_REQUIRES_X86_SSE2;
15528 for (uint32_t n = 16; n <= 24; n += 8) {
15529 for (size_t k = 1; k <= 20; k += 5) {
15530 GemmMicrokernelTester()
15531 .mr(1)
15532 .nr(8)
15533 .kr(1)
15534 .sr(1)
15535 .m(1)
15536 .n(n)
15537 .k(k)
15538 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015539 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015540 }
15541 }
15542 }
15543
15544 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, n_div_8_subtile) {
15545 TEST_REQUIRES_X86_SSE2;
15546 for (uint32_t n = 16; n <= 24; n += 8) {
15547 for (size_t k = 1; k <= 20; k += 5) {
15548 for (uint32_t m = 1; m <= 1; m++) {
15549 GemmMicrokernelTester()
15550 .mr(1)
15551 .nr(8)
15552 .kr(1)
15553 .sr(1)
15554 .m(m)
15555 .n(n)
15556 .k(k)
15557 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015558 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015559 }
15560 }
15561 }
15562 }
15563
15564 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, strided_cm_subtile) {
15565 TEST_REQUIRES_X86_SSE2;
15566 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015567 for (uint32_t n = 1; n <= 8; n++) {
15568 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015569 GemmMicrokernelTester()
15570 .mr(1)
15571 .nr(8)
15572 .kr(1)
15573 .sr(1)
15574 .m(m)
15575 .n(n)
15576 .k(k)
15577 .cm_stride(11)
15578 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015579 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015580 }
15581 }
15582 }
15583 }
15584
15585 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, qmin) {
15586 TEST_REQUIRES_X86_SSE2;
15587 GemmMicrokernelTester()
15588 .mr(1)
15589 .nr(8)
15590 .kr(1)
15591 .sr(1)
15592 .m(1)
15593 .n(8)
15594 .k(4)
15595 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015596 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015597 }
15598
15599 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, qmax) {
15600 TEST_REQUIRES_X86_SSE2;
15601 GemmMicrokernelTester()
15602 .mr(1)
15603 .nr(8)
15604 .kr(1)
15605 .sr(1)
15606 .m(1)
15607 .n(8)
15608 .k(4)
15609 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015610 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015611 }
15612
15613 TEST(F32_GEMM_MINMAX_1X8__SSE2_DUP, strided_cm) {
15614 TEST_REQUIRES_X86_SSE2;
15615 GemmMicrokernelTester()
15616 .mr(1)
15617 .nr(8)
15618 .kr(1)
15619 .sr(1)
15620 .m(1)
15621 .n(8)
15622 .k(4)
15623 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015624 .Test(xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015625 }
15626#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15627
15628
15629#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080015630 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_eq_4) {
15631 TEST_REQUIRES_X86_SSE2;
15632 GemmMicrokernelTester()
15633 .mr(5)
15634 .nr(8)
15635 .kr(1)
15636 .sr(1)
15637 .m(5)
15638 .n(8)
15639 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015640 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015641 }
15642
15643 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, strided_cn) {
15644 TEST_REQUIRES_X86_SSE2;
15645 GemmMicrokernelTester()
15646 .mr(5)
15647 .nr(8)
15648 .kr(1)
15649 .sr(1)
15650 .m(5)
15651 .n(8)
15652 .k(4)
15653 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015654 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015655 }
15656
15657 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_strided_a) {
15658 TEST_REQUIRES_X86_SSE2;
15659 GemmMicrokernelTester()
15660 .mr(5)
15661 .nr(8)
15662 .kr(1)
15663 .sr(1)
15664 .m(5)
15665 .n(8)
15666 .k(4)
15667 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015668 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015669 }
15670
15671 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile) {
15672 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015673 for (uint32_t n = 1; n <= 8; n++) {
15674 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015675 GemmMicrokernelTester()
15676 .mr(5)
15677 .nr(8)
15678 .kr(1)
15679 .sr(1)
15680 .m(m)
15681 .n(n)
15682 .k(4)
15683 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015684 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015685 }
15686 }
15687 }
15688
15689 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_m) {
15690 TEST_REQUIRES_X86_SSE2;
15691 for (uint32_t m = 1; m <= 5; m++) {
15692 GemmMicrokernelTester()
15693 .mr(5)
15694 .nr(8)
15695 .kr(1)
15696 .sr(1)
15697 .m(m)
15698 .n(8)
15699 .k(4)
15700 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015701 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015702 }
15703 }
15704
15705 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_n) {
15706 TEST_REQUIRES_X86_SSE2;
15707 for (uint32_t n = 1; n <= 8; n++) {
15708 GemmMicrokernelTester()
15709 .mr(5)
15710 .nr(8)
15711 .kr(1)
15712 .sr(1)
15713 .m(5)
15714 .n(n)
15715 .k(4)
15716 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015717 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015718 }
15719 }
15720
15721 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_lt_4) {
15722 TEST_REQUIRES_X86_SSE2;
15723 for (size_t k = 1; k < 4; k++) {
15724 GemmMicrokernelTester()
15725 .mr(5)
15726 .nr(8)
15727 .kr(1)
15728 .sr(1)
15729 .m(5)
15730 .n(8)
15731 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015732 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015733 }
15734 }
15735
15736 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_lt_4_strided_a) {
15737 TEST_REQUIRES_X86_SSE2;
15738 for (size_t k = 1; k < 4; k++) {
15739 GemmMicrokernelTester()
15740 .mr(5)
15741 .nr(8)
15742 .kr(1)
15743 .sr(1)
15744 .m(5)
15745 .n(8)
15746 .k(k)
15747 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015748 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015749 }
15750 }
15751
15752 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_lt_4_subtile) {
15753 TEST_REQUIRES_X86_SSE2;
15754 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015755 for (uint32_t n = 1; n <= 8; n++) {
15756 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015757 GemmMicrokernelTester()
15758 .mr(5)
15759 .nr(8)
15760 .kr(1)
15761 .sr(1)
15762 .m(m)
15763 .n(n)
15764 .k(k)
15765 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015766 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015767 }
15768 }
15769 }
15770 }
15771
15772 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_gt_4) {
15773 TEST_REQUIRES_X86_SSE2;
15774 for (size_t k = 5; k < 8; k++) {
15775 GemmMicrokernelTester()
15776 .mr(5)
15777 .nr(8)
15778 .kr(1)
15779 .sr(1)
15780 .m(5)
15781 .n(8)
15782 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015783 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015784 }
15785 }
15786
15787 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_gt_4_strided_a) {
15788 TEST_REQUIRES_X86_SSE2;
15789 for (size_t k = 5; k < 8; k++) {
15790 GemmMicrokernelTester()
15791 .mr(5)
15792 .nr(8)
15793 .kr(1)
15794 .sr(1)
15795 .m(5)
15796 .n(8)
15797 .k(k)
15798 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015799 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015800 }
15801 }
15802
15803 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_gt_4_subtile) {
15804 TEST_REQUIRES_X86_SSE2;
15805 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015806 for (uint32_t n = 1; n <= 8; n++) {
15807 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015808 GemmMicrokernelTester()
15809 .mr(5)
15810 .nr(8)
15811 .kr(1)
15812 .sr(1)
15813 .m(m)
15814 .n(n)
15815 .k(k)
15816 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015817 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015818 }
15819 }
15820 }
15821 }
15822
15823 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_div_4) {
15824 TEST_REQUIRES_X86_SSE2;
15825 for (size_t k = 8; k <= 40; k += 4) {
15826 GemmMicrokernelTester()
15827 .mr(5)
15828 .nr(8)
15829 .kr(1)
15830 .sr(1)
15831 .m(5)
15832 .n(8)
15833 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015834 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015835 }
15836 }
15837
15838 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_div_4_strided_a) {
15839 TEST_REQUIRES_X86_SSE2;
15840 for (size_t k = 8; k <= 40; k += 4) {
15841 GemmMicrokernelTester()
15842 .mr(5)
15843 .nr(8)
15844 .kr(1)
15845 .sr(1)
15846 .m(5)
15847 .n(8)
15848 .k(k)
15849 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015850 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015851 }
15852 }
15853
15854 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, k_div_4_subtile) {
15855 TEST_REQUIRES_X86_SSE2;
15856 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015857 for (uint32_t n = 1; n <= 8; n++) {
15858 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015859 GemmMicrokernelTester()
15860 .mr(5)
15861 .nr(8)
15862 .kr(1)
15863 .sr(1)
15864 .m(m)
15865 .n(n)
15866 .k(k)
15867 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015868 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015869 }
15870 }
15871 }
15872 }
15873
15874 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_gt_8) {
15875 TEST_REQUIRES_X86_SSE2;
15876 for (uint32_t n = 9; n < 16; n++) {
15877 for (size_t k = 1; k <= 20; k += 5) {
15878 GemmMicrokernelTester()
15879 .mr(5)
15880 .nr(8)
15881 .kr(1)
15882 .sr(1)
15883 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015884 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015885 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015886 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015887 }
15888 }
15889 }
15890
15891 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_cn) {
15892 TEST_REQUIRES_X86_SSE2;
15893 for (uint32_t n = 9; n < 16; n++) {
15894 for (size_t k = 1; k <= 20; k += 5) {
15895 GemmMicrokernelTester()
15896 .mr(5)
15897 .nr(8)
15898 .kr(1)
15899 .sr(1)
15900 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015901 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015902 .k(k)
15903 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015904 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015905 }
15906 }
15907 }
15908
15909 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_a) {
15910 TEST_REQUIRES_X86_SSE2;
15911 for (uint32_t n = 9; n < 16; n++) {
15912 for (size_t k = 1; k <= 20; k += 5) {
15913 GemmMicrokernelTester()
15914 .mr(5)
15915 .nr(8)
15916 .kr(1)
15917 .sr(1)
15918 .m(5)
15919 .n(n)
15920 .k(k)
15921 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015922 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015923 }
15924 }
15925 }
15926
15927 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_subtile) {
15928 TEST_REQUIRES_X86_SSE2;
15929 for (uint32_t n = 9; n < 16; n++) {
15930 for (size_t k = 1; k <= 20; k += 5) {
15931 for (uint32_t m = 1; m <= 5; m++) {
15932 GemmMicrokernelTester()
15933 .mr(5)
15934 .nr(8)
15935 .kr(1)
15936 .sr(1)
15937 .m(m)
15938 .n(n)
15939 .k(k)
15940 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015941 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015942 }
15943 }
15944 }
15945 }
15946
15947 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_div_8) {
15948 TEST_REQUIRES_X86_SSE2;
15949 for (uint32_t n = 16; n <= 24; n += 8) {
15950 for (size_t k = 1; k <= 20; k += 5) {
15951 GemmMicrokernelTester()
15952 .mr(5)
15953 .nr(8)
15954 .kr(1)
15955 .sr(1)
15956 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015957 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015958 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015959 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015960 }
15961 }
15962 }
15963
15964 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_div_8_strided_cn) {
15965 TEST_REQUIRES_X86_SSE2;
15966 for (uint32_t n = 16; n <= 24; n += 8) {
15967 for (size_t k = 1; k <= 20; k += 5) {
15968 GemmMicrokernelTester()
15969 .mr(5)
15970 .nr(8)
15971 .kr(1)
15972 .sr(1)
15973 .m(5)
15974 .n(n)
15975 .k(k)
15976 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015977 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015978 }
15979 }
15980 }
15981
15982 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_div_8_strided_a) {
15983 TEST_REQUIRES_X86_SSE2;
15984 for (uint32_t n = 16; n <= 24; n += 8) {
15985 for (size_t k = 1; k <= 20; k += 5) {
15986 GemmMicrokernelTester()
15987 .mr(5)
15988 .nr(8)
15989 .kr(1)
15990 .sr(1)
15991 .m(5)
15992 .n(n)
15993 .k(k)
15994 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015995 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015996 }
15997 }
15998 }
15999
16000 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, n_div_8_subtile) {
16001 TEST_REQUIRES_X86_SSE2;
16002 for (uint32_t n = 16; n <= 24; n += 8) {
16003 for (size_t k = 1; k <= 20; k += 5) {
16004 for (uint32_t m = 1; m <= 5; m++) {
16005 GemmMicrokernelTester()
16006 .mr(5)
16007 .nr(8)
16008 .kr(1)
16009 .sr(1)
16010 .m(m)
16011 .n(n)
16012 .k(k)
16013 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016014 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016015 }
16016 }
16017 }
16018 }
16019
16020 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, strided_cm_subtile) {
16021 TEST_REQUIRES_X86_SSE2;
16022 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016023 for (uint32_t n = 1; n <= 8; n++) {
16024 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016025 GemmMicrokernelTester()
16026 .mr(5)
16027 .nr(8)
16028 .kr(1)
16029 .sr(1)
16030 .m(m)
16031 .n(n)
16032 .k(k)
16033 .cm_stride(11)
16034 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016035 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016036 }
16037 }
16038 }
16039 }
16040
16041 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, qmin) {
16042 TEST_REQUIRES_X86_SSE2;
16043 GemmMicrokernelTester()
16044 .mr(5)
16045 .nr(8)
16046 .kr(1)
16047 .sr(1)
16048 .m(5)
16049 .n(8)
16050 .k(4)
16051 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016052 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016053 }
16054
16055 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, qmax) {
16056 TEST_REQUIRES_X86_SSE2;
16057 GemmMicrokernelTester()
16058 .mr(5)
16059 .nr(8)
16060 .kr(1)
16061 .sr(1)
16062 .m(5)
16063 .n(8)
16064 .k(4)
16065 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016066 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016067 }
16068
16069 TEST(F32_GEMM_MINMAX_5X8__SSE2_DUP, strided_cm) {
16070 TEST_REQUIRES_X86_SSE2;
16071 GemmMicrokernelTester()
16072 .mr(5)
16073 .nr(8)
16074 .kr(1)
16075 .sr(1)
16076 .m(5)
16077 .n(8)
16078 .k(4)
16079 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016080 .Test(xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016081 }
16082#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16083
16084
16085#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070016086 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016087 TEST_REQUIRES_X86_AVX;
16088 GemmMicrokernelTester()
16089 .mr(1)
16090 .nr(8)
16091 .kr(1)
16092 .sr(1)
16093 .m(1)
16094 .n(8)
16095 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016096 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016097 }
16098
Marat Dukhande06f492020-04-09 00:19:31 -070016099 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016100 TEST_REQUIRES_X86_AVX;
16101 GemmMicrokernelTester()
16102 .mr(1)
16103 .nr(8)
16104 .kr(1)
16105 .sr(1)
16106 .m(1)
16107 .n(8)
16108 .k(1)
16109 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016110 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016111 }
16112
Marat Dukhande06f492020-04-09 00:19:31 -070016113 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016114 TEST_REQUIRES_X86_AVX;
16115 GemmMicrokernelTester()
16116 .mr(1)
16117 .nr(8)
16118 .kr(1)
16119 .sr(1)
16120 .m(1)
16121 .n(8)
16122 .k(1)
16123 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016124 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016125 }
16126
Marat Dukhande06f492020-04-09 00:19:31 -070016127 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016128 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016129 for (uint32_t n = 1; n <= 8; n++) {
16130 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016131 GemmMicrokernelTester()
16132 .mr(1)
16133 .nr(8)
16134 .kr(1)
16135 .sr(1)
16136 .m(m)
16137 .n(n)
16138 .k(1)
16139 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016140 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016141 }
16142 }
16143 }
16144
Marat Dukhande06f492020-04-09 00:19:31 -070016145 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016146 TEST_REQUIRES_X86_AVX;
16147 for (uint32_t m = 1; m <= 1; m++) {
16148 GemmMicrokernelTester()
16149 .mr(1)
16150 .nr(8)
16151 .kr(1)
16152 .sr(1)
16153 .m(m)
16154 .n(8)
16155 .k(1)
16156 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016157 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016158 }
16159 }
16160
Marat Dukhande06f492020-04-09 00:19:31 -070016161 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016162 TEST_REQUIRES_X86_AVX;
16163 for (uint32_t n = 1; n <= 8; n++) {
16164 GemmMicrokernelTester()
16165 .mr(1)
16166 .nr(8)
16167 .kr(1)
16168 .sr(1)
16169 .m(1)
16170 .n(n)
16171 .k(1)
16172 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016173 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016174 }
16175 }
16176
Marat Dukhande06f492020-04-09 00:19:31 -070016177 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016178 TEST_REQUIRES_X86_AVX;
16179 for (size_t k = 2; k < 10; k++) {
16180 GemmMicrokernelTester()
16181 .mr(1)
16182 .nr(8)
16183 .kr(1)
16184 .sr(1)
16185 .m(1)
16186 .n(8)
16187 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016188 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016189 }
16190 }
16191
Marat Dukhande06f492020-04-09 00:19:31 -070016192 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016193 TEST_REQUIRES_X86_AVX;
16194 for (size_t k = 2; k < 10; k++) {
16195 GemmMicrokernelTester()
16196 .mr(1)
16197 .nr(8)
16198 .kr(1)
16199 .sr(1)
16200 .m(1)
16201 .n(8)
16202 .k(k)
16203 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016204 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016205 }
16206 }
16207
Marat Dukhande06f492020-04-09 00:19:31 -070016208 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016209 TEST_REQUIRES_X86_AVX;
16210 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016211 for (uint32_t n = 1; n <= 8; n++) {
16212 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016213 GemmMicrokernelTester()
16214 .mr(1)
16215 .nr(8)
16216 .kr(1)
16217 .sr(1)
16218 .m(m)
16219 .n(n)
16220 .k(k)
16221 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016222 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016223 }
16224 }
16225 }
16226 }
16227
Marat Dukhande06f492020-04-09 00:19:31 -070016228 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016229 TEST_REQUIRES_X86_AVX;
16230 for (uint32_t n = 9; n < 16; n++) {
16231 for (size_t k = 1; k <= 5; k += 2) {
16232 GemmMicrokernelTester()
16233 .mr(1)
16234 .nr(8)
16235 .kr(1)
16236 .sr(1)
16237 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016238 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016239 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016240 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016241 }
16242 }
16243 }
16244
Marat Dukhande06f492020-04-09 00:19:31 -070016245 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016246 TEST_REQUIRES_X86_AVX;
16247 for (uint32_t n = 9; n < 16; n++) {
16248 for (size_t k = 1; k <= 5; k += 2) {
16249 GemmMicrokernelTester()
16250 .mr(1)
16251 .nr(8)
16252 .kr(1)
16253 .sr(1)
16254 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016255 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016256 .k(k)
16257 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016258 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016259 }
16260 }
16261 }
16262
Marat Dukhande06f492020-04-09 00:19:31 -070016263 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016264 TEST_REQUIRES_X86_AVX;
16265 for (uint32_t n = 9; n < 16; n++) {
16266 for (size_t k = 1; k <= 5; k += 2) {
16267 GemmMicrokernelTester()
16268 .mr(1)
16269 .nr(8)
16270 .kr(1)
16271 .sr(1)
16272 .m(1)
16273 .n(n)
16274 .k(k)
16275 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016276 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016277 }
16278 }
16279 }
16280
Marat Dukhande06f492020-04-09 00:19:31 -070016281 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016282 TEST_REQUIRES_X86_AVX;
16283 for (uint32_t n = 9; n < 16; n++) {
16284 for (size_t k = 1; k <= 5; k += 2) {
16285 for (uint32_t m = 1; m <= 1; m++) {
16286 GemmMicrokernelTester()
16287 .mr(1)
16288 .nr(8)
16289 .kr(1)
16290 .sr(1)
16291 .m(m)
16292 .n(n)
16293 .k(k)
16294 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016295 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016296 }
16297 }
16298 }
16299 }
16300
Marat Dukhande06f492020-04-09 00:19:31 -070016301 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016302 TEST_REQUIRES_X86_AVX;
16303 for (uint32_t n = 16; n <= 24; n += 8) {
16304 for (size_t k = 1; k <= 5; k += 2) {
16305 GemmMicrokernelTester()
16306 .mr(1)
16307 .nr(8)
16308 .kr(1)
16309 .sr(1)
16310 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016311 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016312 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016313 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016314 }
16315 }
16316 }
16317
Marat Dukhande06f492020-04-09 00:19:31 -070016318 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016319 TEST_REQUIRES_X86_AVX;
16320 for (uint32_t n = 16; n <= 24; n += 8) {
16321 for (size_t k = 1; k <= 5; k += 2) {
16322 GemmMicrokernelTester()
16323 .mr(1)
16324 .nr(8)
16325 .kr(1)
16326 .sr(1)
16327 .m(1)
16328 .n(n)
16329 .k(k)
16330 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016331 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016332 }
16333 }
16334 }
16335
Marat Dukhande06f492020-04-09 00:19:31 -070016336 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016337 TEST_REQUIRES_X86_AVX;
16338 for (uint32_t n = 16; n <= 24; n += 8) {
16339 for (size_t k = 1; k <= 5; k += 2) {
16340 GemmMicrokernelTester()
16341 .mr(1)
16342 .nr(8)
16343 .kr(1)
16344 .sr(1)
16345 .m(1)
16346 .n(n)
16347 .k(k)
16348 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016349 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016350 }
16351 }
16352 }
16353
Marat Dukhande06f492020-04-09 00:19:31 -070016354 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016355 TEST_REQUIRES_X86_AVX;
16356 for (uint32_t n = 16; n <= 24; n += 8) {
16357 for (size_t k = 1; k <= 5; k += 2) {
16358 for (uint32_t m = 1; m <= 1; m++) {
16359 GemmMicrokernelTester()
16360 .mr(1)
16361 .nr(8)
16362 .kr(1)
16363 .sr(1)
16364 .m(m)
16365 .n(n)
16366 .k(k)
16367 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016368 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016369 }
16370 }
16371 }
16372 }
16373
Marat Dukhande06f492020-04-09 00:19:31 -070016374 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016375 TEST_REQUIRES_X86_AVX;
16376 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016377 for (uint32_t n = 1; n <= 8; n++) {
16378 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016379 GemmMicrokernelTester()
16380 .mr(1)
16381 .nr(8)
16382 .kr(1)
16383 .sr(1)
16384 .m(m)
16385 .n(n)
16386 .k(k)
16387 .cm_stride(11)
16388 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016389 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016390 }
16391 }
16392 }
16393 }
16394
Marat Dukhande06f492020-04-09 00:19:31 -070016395 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016396 TEST_REQUIRES_X86_AVX;
16397 GemmMicrokernelTester()
16398 .mr(1)
16399 .nr(8)
16400 .kr(1)
16401 .sr(1)
16402 .m(1)
16403 .n(8)
16404 .k(1)
16405 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016406 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016407 }
16408
Marat Dukhande06f492020-04-09 00:19:31 -070016409 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016410 TEST_REQUIRES_X86_AVX;
16411 GemmMicrokernelTester()
16412 .mr(1)
16413 .nr(8)
16414 .kr(1)
16415 .sr(1)
16416 .m(1)
16417 .n(8)
16418 .k(1)
16419 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016420 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016421 }
16422
Marat Dukhande06f492020-04-09 00:19:31 -070016423 TEST(F32_GEMM_MINMAX_1X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016424 TEST_REQUIRES_X86_AVX;
16425 GemmMicrokernelTester()
16426 .mr(1)
16427 .nr(8)
16428 .kr(1)
16429 .sr(1)
16430 .m(1)
16431 .n(8)
16432 .k(1)
16433 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016434 .Test(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016435 }
16436#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16437
16438
16439#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070016440 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016441 TEST_REQUIRES_X86_AVX;
16442 GemmMicrokernelTester()
16443 .mr(5)
16444 .nr(8)
16445 .kr(1)
16446 .sr(1)
16447 .m(5)
16448 .n(8)
16449 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016450 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016451 }
16452
Marat Dukhande06f492020-04-09 00:19:31 -070016453 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016454 TEST_REQUIRES_X86_AVX;
16455 GemmMicrokernelTester()
16456 .mr(5)
16457 .nr(8)
16458 .kr(1)
16459 .sr(1)
16460 .m(5)
16461 .n(8)
16462 .k(1)
16463 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016464 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016465 }
16466
Marat Dukhande06f492020-04-09 00:19:31 -070016467 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016468 TEST_REQUIRES_X86_AVX;
16469 GemmMicrokernelTester()
16470 .mr(5)
16471 .nr(8)
16472 .kr(1)
16473 .sr(1)
16474 .m(5)
16475 .n(8)
16476 .k(1)
16477 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016478 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016479 }
16480
Marat Dukhande06f492020-04-09 00:19:31 -070016481 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016482 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016483 for (uint32_t n = 1; n <= 8; n++) {
16484 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016485 GemmMicrokernelTester()
16486 .mr(5)
16487 .nr(8)
16488 .kr(1)
16489 .sr(1)
16490 .m(m)
16491 .n(n)
16492 .k(1)
16493 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016494 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016495 }
16496 }
16497 }
16498
Marat Dukhande06f492020-04-09 00:19:31 -070016499 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016500 TEST_REQUIRES_X86_AVX;
16501 for (uint32_t m = 1; m <= 5; m++) {
16502 GemmMicrokernelTester()
16503 .mr(5)
16504 .nr(8)
16505 .kr(1)
16506 .sr(1)
16507 .m(m)
16508 .n(8)
16509 .k(1)
16510 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016511 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016512 }
16513 }
16514
Marat Dukhande06f492020-04-09 00:19:31 -070016515 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016516 TEST_REQUIRES_X86_AVX;
16517 for (uint32_t n = 1; n <= 8; n++) {
16518 GemmMicrokernelTester()
16519 .mr(5)
16520 .nr(8)
16521 .kr(1)
16522 .sr(1)
16523 .m(5)
16524 .n(n)
16525 .k(1)
16526 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016527 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016528 }
16529 }
16530
Marat Dukhande06f492020-04-09 00:19:31 -070016531 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016532 TEST_REQUIRES_X86_AVX;
16533 for (size_t k = 2; k < 10; k++) {
16534 GemmMicrokernelTester()
16535 .mr(5)
16536 .nr(8)
16537 .kr(1)
16538 .sr(1)
16539 .m(5)
16540 .n(8)
16541 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016542 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016543 }
16544 }
16545
Marat Dukhande06f492020-04-09 00:19:31 -070016546 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016547 TEST_REQUIRES_X86_AVX;
16548 for (size_t k = 2; k < 10; k++) {
16549 GemmMicrokernelTester()
16550 .mr(5)
16551 .nr(8)
16552 .kr(1)
16553 .sr(1)
16554 .m(5)
16555 .n(8)
16556 .k(k)
16557 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016558 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016559 }
16560 }
16561
Marat Dukhande06f492020-04-09 00:19:31 -070016562 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016563 TEST_REQUIRES_X86_AVX;
16564 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016565 for (uint32_t n = 1; n <= 8; n++) {
16566 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016567 GemmMicrokernelTester()
16568 .mr(5)
16569 .nr(8)
16570 .kr(1)
16571 .sr(1)
16572 .m(m)
16573 .n(n)
16574 .k(k)
16575 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016576 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016577 }
16578 }
16579 }
16580 }
16581
Marat Dukhande06f492020-04-09 00:19:31 -070016582 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016583 TEST_REQUIRES_X86_AVX;
16584 for (uint32_t n = 9; n < 16; n++) {
16585 for (size_t k = 1; k <= 5; k += 2) {
16586 GemmMicrokernelTester()
16587 .mr(5)
16588 .nr(8)
16589 .kr(1)
16590 .sr(1)
16591 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016592 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016593 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016594 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016595 }
16596 }
16597 }
16598
Marat Dukhande06f492020-04-09 00:19:31 -070016599 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016600 TEST_REQUIRES_X86_AVX;
16601 for (uint32_t n = 9; n < 16; n++) {
16602 for (size_t k = 1; k <= 5; k += 2) {
16603 GemmMicrokernelTester()
16604 .mr(5)
16605 .nr(8)
16606 .kr(1)
16607 .sr(1)
16608 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016609 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016610 .k(k)
16611 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016612 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016613 }
16614 }
16615 }
16616
Marat Dukhande06f492020-04-09 00:19:31 -070016617 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016618 TEST_REQUIRES_X86_AVX;
16619 for (uint32_t n = 9; n < 16; n++) {
16620 for (size_t k = 1; k <= 5; k += 2) {
16621 GemmMicrokernelTester()
16622 .mr(5)
16623 .nr(8)
16624 .kr(1)
16625 .sr(1)
16626 .m(5)
16627 .n(n)
16628 .k(k)
16629 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016630 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016631 }
16632 }
16633 }
16634
Marat Dukhande06f492020-04-09 00:19:31 -070016635 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016636 TEST_REQUIRES_X86_AVX;
16637 for (uint32_t n = 9; n < 16; n++) {
16638 for (size_t k = 1; k <= 5; k += 2) {
16639 for (uint32_t m = 1; m <= 5; m++) {
16640 GemmMicrokernelTester()
16641 .mr(5)
16642 .nr(8)
16643 .kr(1)
16644 .sr(1)
16645 .m(m)
16646 .n(n)
16647 .k(k)
16648 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016649 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016650 }
16651 }
16652 }
16653 }
16654
Marat Dukhande06f492020-04-09 00:19:31 -070016655 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016656 TEST_REQUIRES_X86_AVX;
16657 for (uint32_t n = 16; n <= 24; n += 8) {
16658 for (size_t k = 1; k <= 5; k += 2) {
16659 GemmMicrokernelTester()
16660 .mr(5)
16661 .nr(8)
16662 .kr(1)
16663 .sr(1)
16664 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016665 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016666 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016667 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016668 }
16669 }
16670 }
16671
Marat Dukhande06f492020-04-09 00:19:31 -070016672 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016673 TEST_REQUIRES_X86_AVX;
16674 for (uint32_t n = 16; n <= 24; n += 8) {
16675 for (size_t k = 1; k <= 5; k += 2) {
16676 GemmMicrokernelTester()
16677 .mr(5)
16678 .nr(8)
16679 .kr(1)
16680 .sr(1)
16681 .m(5)
16682 .n(n)
16683 .k(k)
16684 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016685 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016686 }
16687 }
16688 }
16689
Marat Dukhande06f492020-04-09 00:19:31 -070016690 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016691 TEST_REQUIRES_X86_AVX;
16692 for (uint32_t n = 16; n <= 24; n += 8) {
16693 for (size_t k = 1; k <= 5; k += 2) {
16694 GemmMicrokernelTester()
16695 .mr(5)
16696 .nr(8)
16697 .kr(1)
16698 .sr(1)
16699 .m(5)
16700 .n(n)
16701 .k(k)
16702 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016703 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016704 }
16705 }
16706 }
16707
Marat Dukhande06f492020-04-09 00:19:31 -070016708 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016709 TEST_REQUIRES_X86_AVX;
16710 for (uint32_t n = 16; n <= 24; n += 8) {
16711 for (size_t k = 1; k <= 5; k += 2) {
16712 for (uint32_t m = 1; m <= 5; m++) {
16713 GemmMicrokernelTester()
16714 .mr(5)
16715 .nr(8)
16716 .kr(1)
16717 .sr(1)
16718 .m(m)
16719 .n(n)
16720 .k(k)
16721 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016722 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016723 }
16724 }
16725 }
16726 }
16727
Marat Dukhande06f492020-04-09 00:19:31 -070016728 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016729 TEST_REQUIRES_X86_AVX;
16730 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016731 for (uint32_t n = 1; n <= 8; n++) {
16732 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016733 GemmMicrokernelTester()
16734 .mr(5)
16735 .nr(8)
16736 .kr(1)
16737 .sr(1)
16738 .m(m)
16739 .n(n)
16740 .k(k)
16741 .cm_stride(11)
16742 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016743 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016744 }
16745 }
16746 }
16747 }
16748
Marat Dukhande06f492020-04-09 00:19:31 -070016749 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016750 TEST_REQUIRES_X86_AVX;
16751 GemmMicrokernelTester()
16752 .mr(5)
16753 .nr(8)
16754 .kr(1)
16755 .sr(1)
16756 .m(5)
16757 .n(8)
16758 .k(1)
16759 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016760 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016761 }
16762
Marat Dukhande06f492020-04-09 00:19:31 -070016763 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016764 TEST_REQUIRES_X86_AVX;
16765 GemmMicrokernelTester()
16766 .mr(5)
16767 .nr(8)
16768 .kr(1)
16769 .sr(1)
16770 .m(5)
16771 .n(8)
16772 .k(1)
16773 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016774 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016775 }
16776
Marat Dukhande06f492020-04-09 00:19:31 -070016777 TEST(F32_GEMM_MINMAX_5X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016778 TEST_REQUIRES_X86_AVX;
16779 GemmMicrokernelTester()
16780 .mr(5)
16781 .nr(8)
16782 .kr(1)
16783 .sr(1)
16784 .m(5)
16785 .n(8)
16786 .k(1)
16787 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016788 .Test(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016789 }
16790#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16791
16792
16793#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070016794 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016795 TEST_REQUIRES_X86_AVX;
16796 GemmMicrokernelTester()
16797 .mr(6)
16798 .nr(8)
16799 .kr(1)
16800 .sr(1)
16801 .m(6)
16802 .n(8)
16803 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016804 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016805 }
16806
Marat Dukhande06f492020-04-09 00:19:31 -070016807 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016808 TEST_REQUIRES_X86_AVX;
16809 GemmMicrokernelTester()
16810 .mr(6)
16811 .nr(8)
16812 .kr(1)
16813 .sr(1)
16814 .m(6)
16815 .n(8)
16816 .k(1)
16817 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016818 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016819 }
16820
Marat Dukhande06f492020-04-09 00:19:31 -070016821 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016822 TEST_REQUIRES_X86_AVX;
16823 GemmMicrokernelTester()
16824 .mr(6)
16825 .nr(8)
16826 .kr(1)
16827 .sr(1)
16828 .m(6)
16829 .n(8)
16830 .k(1)
16831 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016832 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016833 }
16834
Marat Dukhande06f492020-04-09 00:19:31 -070016835 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016836 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016837 for (uint32_t n = 1; n <= 8; n++) {
16838 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016839 GemmMicrokernelTester()
16840 .mr(6)
16841 .nr(8)
16842 .kr(1)
16843 .sr(1)
16844 .m(m)
16845 .n(n)
16846 .k(1)
16847 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016848 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016849 }
16850 }
16851 }
16852
Marat Dukhande06f492020-04-09 00:19:31 -070016853 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016854 TEST_REQUIRES_X86_AVX;
16855 for (uint32_t m = 1; m <= 6; m++) {
16856 GemmMicrokernelTester()
16857 .mr(6)
16858 .nr(8)
16859 .kr(1)
16860 .sr(1)
16861 .m(m)
16862 .n(8)
16863 .k(1)
16864 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016865 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016866 }
16867 }
16868
Marat Dukhande06f492020-04-09 00:19:31 -070016869 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016870 TEST_REQUIRES_X86_AVX;
16871 for (uint32_t n = 1; n <= 8; n++) {
16872 GemmMicrokernelTester()
16873 .mr(6)
16874 .nr(8)
16875 .kr(1)
16876 .sr(1)
16877 .m(6)
16878 .n(n)
16879 .k(1)
16880 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016881 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016882 }
16883 }
16884
Marat Dukhande06f492020-04-09 00:19:31 -070016885 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016886 TEST_REQUIRES_X86_AVX;
16887 for (size_t k = 2; k < 10; k++) {
16888 GemmMicrokernelTester()
16889 .mr(6)
16890 .nr(8)
16891 .kr(1)
16892 .sr(1)
16893 .m(6)
16894 .n(8)
16895 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016896 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016897 }
16898 }
16899
Marat Dukhande06f492020-04-09 00:19:31 -070016900 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016901 TEST_REQUIRES_X86_AVX;
16902 for (size_t k = 2; k < 10; k++) {
16903 GemmMicrokernelTester()
16904 .mr(6)
16905 .nr(8)
16906 .kr(1)
16907 .sr(1)
16908 .m(6)
16909 .n(8)
16910 .k(k)
16911 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016912 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016913 }
16914 }
16915
Marat Dukhande06f492020-04-09 00:19:31 -070016916 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016917 TEST_REQUIRES_X86_AVX;
16918 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016919 for (uint32_t n = 1; n <= 8; n++) {
16920 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016921 GemmMicrokernelTester()
16922 .mr(6)
16923 .nr(8)
16924 .kr(1)
16925 .sr(1)
16926 .m(m)
16927 .n(n)
16928 .k(k)
16929 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016930 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016931 }
16932 }
16933 }
16934 }
16935
Marat Dukhande06f492020-04-09 00:19:31 -070016936 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016937 TEST_REQUIRES_X86_AVX;
16938 for (uint32_t n = 9; n < 16; n++) {
16939 for (size_t k = 1; k <= 5; k += 2) {
16940 GemmMicrokernelTester()
16941 .mr(6)
16942 .nr(8)
16943 .kr(1)
16944 .sr(1)
16945 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016946 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016947 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016948 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016949 }
16950 }
16951 }
16952
Marat Dukhande06f492020-04-09 00:19:31 -070016953 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016954 TEST_REQUIRES_X86_AVX;
16955 for (uint32_t n = 9; n < 16; n++) {
16956 for (size_t k = 1; k <= 5; k += 2) {
16957 GemmMicrokernelTester()
16958 .mr(6)
16959 .nr(8)
16960 .kr(1)
16961 .sr(1)
16962 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016963 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016964 .k(k)
16965 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016966 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016967 }
16968 }
16969 }
16970
Marat Dukhande06f492020-04-09 00:19:31 -070016971 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016972 TEST_REQUIRES_X86_AVX;
16973 for (uint32_t n = 9; n < 16; n++) {
16974 for (size_t k = 1; k <= 5; k += 2) {
16975 GemmMicrokernelTester()
16976 .mr(6)
16977 .nr(8)
16978 .kr(1)
16979 .sr(1)
16980 .m(6)
16981 .n(n)
16982 .k(k)
16983 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016984 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016985 }
16986 }
16987 }
16988
Marat Dukhande06f492020-04-09 00:19:31 -070016989 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016990 TEST_REQUIRES_X86_AVX;
16991 for (uint32_t n = 9; n < 16; n++) {
16992 for (size_t k = 1; k <= 5; k += 2) {
16993 for (uint32_t m = 1; m <= 6; m++) {
16994 GemmMicrokernelTester()
16995 .mr(6)
16996 .nr(8)
16997 .kr(1)
16998 .sr(1)
16999 .m(m)
17000 .n(n)
17001 .k(k)
17002 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017003 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017004 }
17005 }
17006 }
17007 }
17008
Marat Dukhande06f492020-04-09 00:19:31 -070017009 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017010 TEST_REQUIRES_X86_AVX;
17011 for (uint32_t n = 16; n <= 24; n += 8) {
17012 for (size_t k = 1; k <= 5; k += 2) {
17013 GemmMicrokernelTester()
17014 .mr(6)
17015 .nr(8)
17016 .kr(1)
17017 .sr(1)
17018 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017019 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017020 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017021 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017022 }
17023 }
17024 }
17025
Marat Dukhande06f492020-04-09 00:19:31 -070017026 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017027 TEST_REQUIRES_X86_AVX;
17028 for (uint32_t n = 16; n <= 24; n += 8) {
17029 for (size_t k = 1; k <= 5; k += 2) {
17030 GemmMicrokernelTester()
17031 .mr(6)
17032 .nr(8)
17033 .kr(1)
17034 .sr(1)
17035 .m(6)
17036 .n(n)
17037 .k(k)
17038 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017039 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017040 }
17041 }
17042 }
17043
Marat Dukhande06f492020-04-09 00:19:31 -070017044 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017045 TEST_REQUIRES_X86_AVX;
17046 for (uint32_t n = 16; n <= 24; n += 8) {
17047 for (size_t k = 1; k <= 5; k += 2) {
17048 GemmMicrokernelTester()
17049 .mr(6)
17050 .nr(8)
17051 .kr(1)
17052 .sr(1)
17053 .m(6)
17054 .n(n)
17055 .k(k)
17056 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017057 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017058 }
17059 }
17060 }
17061
Marat Dukhande06f492020-04-09 00:19:31 -070017062 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017063 TEST_REQUIRES_X86_AVX;
17064 for (uint32_t n = 16; n <= 24; n += 8) {
17065 for (size_t k = 1; k <= 5; k += 2) {
17066 for (uint32_t m = 1; m <= 6; m++) {
17067 GemmMicrokernelTester()
17068 .mr(6)
17069 .nr(8)
17070 .kr(1)
17071 .sr(1)
17072 .m(m)
17073 .n(n)
17074 .k(k)
17075 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017076 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017077 }
17078 }
17079 }
17080 }
17081
Marat Dukhande06f492020-04-09 00:19:31 -070017082 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017083 TEST_REQUIRES_X86_AVX;
17084 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017085 for (uint32_t n = 1; n <= 8; n++) {
17086 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017087 GemmMicrokernelTester()
17088 .mr(6)
17089 .nr(8)
17090 .kr(1)
17091 .sr(1)
17092 .m(m)
17093 .n(n)
17094 .k(k)
17095 .cm_stride(11)
17096 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017097 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017098 }
17099 }
17100 }
17101 }
17102
Marat Dukhande06f492020-04-09 00:19:31 -070017103 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017104 TEST_REQUIRES_X86_AVX;
17105 GemmMicrokernelTester()
17106 .mr(6)
17107 .nr(8)
17108 .kr(1)
17109 .sr(1)
17110 .m(6)
17111 .n(8)
17112 .k(1)
17113 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017114 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017115 }
17116
Marat Dukhande06f492020-04-09 00:19:31 -070017117 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017118 TEST_REQUIRES_X86_AVX;
17119 GemmMicrokernelTester()
17120 .mr(6)
17121 .nr(8)
17122 .kr(1)
17123 .sr(1)
17124 .m(6)
17125 .n(8)
17126 .k(1)
17127 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017128 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017129 }
17130
Marat Dukhande06f492020-04-09 00:19:31 -070017131 TEST(F32_GEMM_MINMAX_6X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017132 TEST_REQUIRES_X86_AVX;
17133 GemmMicrokernelTester()
17134 .mr(6)
17135 .nr(8)
17136 .kr(1)
17137 .sr(1)
17138 .m(6)
17139 .n(8)
17140 .k(1)
17141 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017142 .Test(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017143 }
17144#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17145
17146
17147#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017148 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017149 TEST_REQUIRES_X86_AVX;
17150 GemmMicrokernelTester()
17151 .mr(3)
17152 .nr(16)
17153 .kr(1)
17154 .sr(1)
17155 .m(3)
17156 .n(16)
17157 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017158 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017159 }
17160
Marat Dukhande06f492020-04-09 00:19:31 -070017161 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017162 TEST_REQUIRES_X86_AVX;
17163 GemmMicrokernelTester()
17164 .mr(3)
17165 .nr(16)
17166 .kr(1)
17167 .sr(1)
17168 .m(3)
17169 .n(16)
17170 .k(1)
17171 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017172 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017173 }
17174
Marat Dukhande06f492020-04-09 00:19:31 -070017175 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017176 TEST_REQUIRES_X86_AVX;
17177 GemmMicrokernelTester()
17178 .mr(3)
17179 .nr(16)
17180 .kr(1)
17181 .sr(1)
17182 .m(3)
17183 .n(16)
17184 .k(1)
17185 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017186 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017187 }
17188
Marat Dukhande06f492020-04-09 00:19:31 -070017189 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017190 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017191 for (uint32_t n = 1; n <= 16; n++) {
17192 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017193 GemmMicrokernelTester()
17194 .mr(3)
17195 .nr(16)
17196 .kr(1)
17197 .sr(1)
17198 .m(m)
17199 .n(n)
17200 .k(1)
17201 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017202 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017203 }
17204 }
17205 }
17206
Marat Dukhande06f492020-04-09 00:19:31 -070017207 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017208 TEST_REQUIRES_X86_AVX;
17209 for (uint32_t m = 1; m <= 3; m++) {
17210 GemmMicrokernelTester()
17211 .mr(3)
17212 .nr(16)
17213 .kr(1)
17214 .sr(1)
17215 .m(m)
17216 .n(16)
17217 .k(1)
17218 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017219 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017220 }
17221 }
17222
Marat Dukhande06f492020-04-09 00:19:31 -070017223 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017224 TEST_REQUIRES_X86_AVX;
17225 for (uint32_t n = 1; n <= 16; n++) {
17226 GemmMicrokernelTester()
17227 .mr(3)
17228 .nr(16)
17229 .kr(1)
17230 .sr(1)
17231 .m(3)
17232 .n(n)
17233 .k(1)
17234 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017235 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017236 }
17237 }
17238
Marat Dukhande06f492020-04-09 00:19:31 -070017239 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017240 TEST_REQUIRES_X86_AVX;
17241 for (size_t k = 2; k < 10; k++) {
17242 GemmMicrokernelTester()
17243 .mr(3)
17244 .nr(16)
17245 .kr(1)
17246 .sr(1)
17247 .m(3)
17248 .n(16)
17249 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017250 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017251 }
17252 }
17253
Marat Dukhande06f492020-04-09 00:19:31 -070017254 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017255 TEST_REQUIRES_X86_AVX;
17256 for (size_t k = 2; k < 10; k++) {
17257 GemmMicrokernelTester()
17258 .mr(3)
17259 .nr(16)
17260 .kr(1)
17261 .sr(1)
17262 .m(3)
17263 .n(16)
17264 .k(k)
17265 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017266 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017267 }
17268 }
17269
Marat Dukhande06f492020-04-09 00:19:31 -070017270 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017271 TEST_REQUIRES_X86_AVX;
17272 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017273 for (uint32_t n = 1; n <= 16; n++) {
17274 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017275 GemmMicrokernelTester()
17276 .mr(3)
17277 .nr(16)
17278 .kr(1)
17279 .sr(1)
17280 .m(m)
17281 .n(n)
17282 .k(k)
17283 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017284 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017285 }
17286 }
17287 }
17288 }
17289
Marat Dukhande06f492020-04-09 00:19:31 -070017290 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017291 TEST_REQUIRES_X86_AVX;
17292 for (uint32_t n = 17; n < 32; n++) {
17293 for (size_t k = 1; k <= 5; k += 2) {
17294 GemmMicrokernelTester()
17295 .mr(3)
17296 .nr(16)
17297 .kr(1)
17298 .sr(1)
17299 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017300 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017301 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017302 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017303 }
17304 }
17305 }
17306
Marat Dukhande06f492020-04-09 00:19:31 -070017307 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017308 TEST_REQUIRES_X86_AVX;
17309 for (uint32_t n = 17; n < 32; n++) {
17310 for (size_t k = 1; k <= 5; k += 2) {
17311 GemmMicrokernelTester()
17312 .mr(3)
17313 .nr(16)
17314 .kr(1)
17315 .sr(1)
17316 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017317 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017318 .k(k)
17319 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017320 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017321 }
17322 }
17323 }
17324
Marat Dukhande06f492020-04-09 00:19:31 -070017325 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017326 TEST_REQUIRES_X86_AVX;
17327 for (uint32_t n = 17; n < 32; n++) {
17328 for (size_t k = 1; k <= 5; k += 2) {
17329 GemmMicrokernelTester()
17330 .mr(3)
17331 .nr(16)
17332 .kr(1)
17333 .sr(1)
17334 .m(3)
17335 .n(n)
17336 .k(k)
17337 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017338 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017339 }
17340 }
17341 }
17342
Marat Dukhande06f492020-04-09 00:19:31 -070017343 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017344 TEST_REQUIRES_X86_AVX;
17345 for (uint32_t n = 17; n < 32; n++) {
17346 for (size_t k = 1; k <= 5; k += 2) {
17347 for (uint32_t m = 1; m <= 3; m++) {
17348 GemmMicrokernelTester()
17349 .mr(3)
17350 .nr(16)
17351 .kr(1)
17352 .sr(1)
17353 .m(m)
17354 .n(n)
17355 .k(k)
17356 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017357 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017358 }
17359 }
17360 }
17361 }
17362
Marat Dukhande06f492020-04-09 00:19:31 -070017363 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017364 TEST_REQUIRES_X86_AVX;
17365 for (uint32_t n = 32; n <= 48; n += 16) {
17366 for (size_t k = 1; k <= 5; k += 2) {
17367 GemmMicrokernelTester()
17368 .mr(3)
17369 .nr(16)
17370 .kr(1)
17371 .sr(1)
17372 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017373 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017374 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017375 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017376 }
17377 }
17378 }
17379
Marat Dukhande06f492020-04-09 00:19:31 -070017380 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017381 TEST_REQUIRES_X86_AVX;
17382 for (uint32_t n = 32; n <= 48; n += 16) {
17383 for (size_t k = 1; k <= 5; k += 2) {
17384 GemmMicrokernelTester()
17385 .mr(3)
17386 .nr(16)
17387 .kr(1)
17388 .sr(1)
17389 .m(3)
17390 .n(n)
17391 .k(k)
17392 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017393 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017394 }
17395 }
17396 }
17397
Marat Dukhande06f492020-04-09 00:19:31 -070017398 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017399 TEST_REQUIRES_X86_AVX;
17400 for (uint32_t n = 32; n <= 48; n += 16) {
17401 for (size_t k = 1; k <= 5; k += 2) {
17402 GemmMicrokernelTester()
17403 .mr(3)
17404 .nr(16)
17405 .kr(1)
17406 .sr(1)
17407 .m(3)
17408 .n(n)
17409 .k(k)
17410 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017411 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017412 }
17413 }
17414 }
17415
Marat Dukhande06f492020-04-09 00:19:31 -070017416 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017417 TEST_REQUIRES_X86_AVX;
17418 for (uint32_t n = 32; n <= 48; n += 16) {
17419 for (size_t k = 1; k <= 5; k += 2) {
17420 for (uint32_t m = 1; m <= 3; m++) {
17421 GemmMicrokernelTester()
17422 .mr(3)
17423 .nr(16)
17424 .kr(1)
17425 .sr(1)
17426 .m(m)
17427 .n(n)
17428 .k(k)
17429 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017430 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017431 }
17432 }
17433 }
17434 }
17435
Marat Dukhande06f492020-04-09 00:19:31 -070017436 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017437 TEST_REQUIRES_X86_AVX;
17438 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017439 for (uint32_t n = 1; n <= 16; n++) {
17440 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017441 GemmMicrokernelTester()
17442 .mr(3)
17443 .nr(16)
17444 .kr(1)
17445 .sr(1)
17446 .m(m)
17447 .n(n)
17448 .k(k)
17449 .cm_stride(19)
17450 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017451 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017452 }
17453 }
17454 }
17455 }
17456
Marat Dukhande06f492020-04-09 00:19:31 -070017457 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017458 TEST_REQUIRES_X86_AVX;
17459 GemmMicrokernelTester()
17460 .mr(3)
17461 .nr(16)
17462 .kr(1)
17463 .sr(1)
17464 .m(3)
17465 .n(16)
17466 .k(1)
17467 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017468 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017469 }
17470
Marat Dukhande06f492020-04-09 00:19:31 -070017471 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017472 TEST_REQUIRES_X86_AVX;
17473 GemmMicrokernelTester()
17474 .mr(3)
17475 .nr(16)
17476 .kr(1)
17477 .sr(1)
17478 .m(3)
17479 .n(16)
17480 .k(1)
17481 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017482 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017483 }
17484
Marat Dukhande06f492020-04-09 00:19:31 -070017485 TEST(F32_GEMM_MINMAX_3X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017486 TEST_REQUIRES_X86_AVX;
17487 GemmMicrokernelTester()
17488 .mr(3)
17489 .nr(16)
17490 .kr(1)
17491 .sr(1)
17492 .m(3)
17493 .n(16)
17494 .k(1)
17495 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017496 .Test(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017497 }
17498#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17499
17500
17501#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017502 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017503 TEST_REQUIRES_X86_FMA3;
17504 GemmMicrokernelTester()
17505 .mr(1)
17506 .nr(8)
17507 .kr(1)
17508 .sr(1)
17509 .m(1)
17510 .n(8)
17511 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017512 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017513 }
17514
Marat Dukhande06f492020-04-09 00:19:31 -070017515 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017516 TEST_REQUIRES_X86_FMA3;
17517 GemmMicrokernelTester()
17518 .mr(1)
17519 .nr(8)
17520 .kr(1)
17521 .sr(1)
17522 .m(1)
17523 .n(8)
17524 .k(1)
17525 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017526 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017527 }
17528
Marat Dukhande06f492020-04-09 00:19:31 -070017529 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017530 TEST_REQUIRES_X86_FMA3;
17531 GemmMicrokernelTester()
17532 .mr(1)
17533 .nr(8)
17534 .kr(1)
17535 .sr(1)
17536 .m(1)
17537 .n(8)
17538 .k(1)
17539 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017540 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017541 }
17542
Marat Dukhande06f492020-04-09 00:19:31 -070017543 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017544 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017545 for (uint32_t n = 1; n <= 8; n++) {
17546 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017547 GemmMicrokernelTester()
17548 .mr(1)
17549 .nr(8)
17550 .kr(1)
17551 .sr(1)
17552 .m(m)
17553 .n(n)
17554 .k(1)
17555 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017556 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017557 }
17558 }
17559 }
17560
Marat Dukhande06f492020-04-09 00:19:31 -070017561 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017562 TEST_REQUIRES_X86_FMA3;
17563 for (uint32_t m = 1; m <= 1; m++) {
17564 GemmMicrokernelTester()
17565 .mr(1)
17566 .nr(8)
17567 .kr(1)
17568 .sr(1)
17569 .m(m)
17570 .n(8)
17571 .k(1)
17572 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017573 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017574 }
17575 }
17576
Marat Dukhande06f492020-04-09 00:19:31 -070017577 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017578 TEST_REQUIRES_X86_FMA3;
17579 for (uint32_t n = 1; n <= 8; n++) {
17580 GemmMicrokernelTester()
17581 .mr(1)
17582 .nr(8)
17583 .kr(1)
17584 .sr(1)
17585 .m(1)
17586 .n(n)
17587 .k(1)
17588 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017589 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017590 }
17591 }
17592
Marat Dukhande06f492020-04-09 00:19:31 -070017593 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017594 TEST_REQUIRES_X86_FMA3;
17595 for (size_t k = 2; k < 10; k++) {
17596 GemmMicrokernelTester()
17597 .mr(1)
17598 .nr(8)
17599 .kr(1)
17600 .sr(1)
17601 .m(1)
17602 .n(8)
17603 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017604 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017605 }
17606 }
17607
Marat Dukhande06f492020-04-09 00:19:31 -070017608 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017609 TEST_REQUIRES_X86_FMA3;
17610 for (size_t k = 2; k < 10; k++) {
17611 GemmMicrokernelTester()
17612 .mr(1)
17613 .nr(8)
17614 .kr(1)
17615 .sr(1)
17616 .m(1)
17617 .n(8)
17618 .k(k)
17619 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017620 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017621 }
17622 }
17623
Marat Dukhande06f492020-04-09 00:19:31 -070017624 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017625 TEST_REQUIRES_X86_FMA3;
17626 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017627 for (uint32_t n = 1; n <= 8; n++) {
17628 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017629 GemmMicrokernelTester()
17630 .mr(1)
17631 .nr(8)
17632 .kr(1)
17633 .sr(1)
17634 .m(m)
17635 .n(n)
17636 .k(k)
17637 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017638 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017639 }
17640 }
17641 }
17642 }
17643
Marat Dukhande06f492020-04-09 00:19:31 -070017644 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017645 TEST_REQUIRES_X86_FMA3;
17646 for (uint32_t n = 9; n < 16; n++) {
17647 for (size_t k = 1; k <= 5; k += 2) {
17648 GemmMicrokernelTester()
17649 .mr(1)
17650 .nr(8)
17651 .kr(1)
17652 .sr(1)
17653 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017654 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017655 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017656 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017657 }
17658 }
17659 }
17660
Marat Dukhande06f492020-04-09 00:19:31 -070017661 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017662 TEST_REQUIRES_X86_FMA3;
17663 for (uint32_t n = 9; n < 16; n++) {
17664 for (size_t k = 1; k <= 5; k += 2) {
17665 GemmMicrokernelTester()
17666 .mr(1)
17667 .nr(8)
17668 .kr(1)
17669 .sr(1)
17670 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017671 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017672 .k(k)
17673 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017674 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017675 }
17676 }
17677 }
17678
Marat Dukhande06f492020-04-09 00:19:31 -070017679 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017680 TEST_REQUIRES_X86_FMA3;
17681 for (uint32_t n = 9; n < 16; n++) {
17682 for (size_t k = 1; k <= 5; k += 2) {
17683 GemmMicrokernelTester()
17684 .mr(1)
17685 .nr(8)
17686 .kr(1)
17687 .sr(1)
17688 .m(1)
17689 .n(n)
17690 .k(k)
17691 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017692 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017693 }
17694 }
17695 }
17696
Marat Dukhande06f492020-04-09 00:19:31 -070017697 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017698 TEST_REQUIRES_X86_FMA3;
17699 for (uint32_t n = 9; n < 16; n++) {
17700 for (size_t k = 1; k <= 5; k += 2) {
17701 for (uint32_t m = 1; m <= 1; m++) {
17702 GemmMicrokernelTester()
17703 .mr(1)
17704 .nr(8)
17705 .kr(1)
17706 .sr(1)
17707 .m(m)
17708 .n(n)
17709 .k(k)
17710 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017711 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017712 }
17713 }
17714 }
17715 }
17716
Marat Dukhande06f492020-04-09 00:19:31 -070017717 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017718 TEST_REQUIRES_X86_FMA3;
17719 for (uint32_t n = 16; n <= 24; n += 8) {
17720 for (size_t k = 1; k <= 5; k += 2) {
17721 GemmMicrokernelTester()
17722 .mr(1)
17723 .nr(8)
17724 .kr(1)
17725 .sr(1)
17726 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017727 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017728 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017729 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017730 }
17731 }
17732 }
17733
Marat Dukhande06f492020-04-09 00:19:31 -070017734 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017735 TEST_REQUIRES_X86_FMA3;
17736 for (uint32_t n = 16; n <= 24; n += 8) {
17737 for (size_t k = 1; k <= 5; k += 2) {
17738 GemmMicrokernelTester()
17739 .mr(1)
17740 .nr(8)
17741 .kr(1)
17742 .sr(1)
17743 .m(1)
17744 .n(n)
17745 .k(k)
17746 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017747 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017748 }
17749 }
17750 }
17751
Marat Dukhande06f492020-04-09 00:19:31 -070017752 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017753 TEST_REQUIRES_X86_FMA3;
17754 for (uint32_t n = 16; n <= 24; n += 8) {
17755 for (size_t k = 1; k <= 5; k += 2) {
17756 GemmMicrokernelTester()
17757 .mr(1)
17758 .nr(8)
17759 .kr(1)
17760 .sr(1)
17761 .m(1)
17762 .n(n)
17763 .k(k)
17764 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017765 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017766 }
17767 }
17768 }
17769
Marat Dukhande06f492020-04-09 00:19:31 -070017770 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017771 TEST_REQUIRES_X86_FMA3;
17772 for (uint32_t n = 16; n <= 24; n += 8) {
17773 for (size_t k = 1; k <= 5; k += 2) {
17774 for (uint32_t m = 1; m <= 1; m++) {
17775 GemmMicrokernelTester()
17776 .mr(1)
17777 .nr(8)
17778 .kr(1)
17779 .sr(1)
17780 .m(m)
17781 .n(n)
17782 .k(k)
17783 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017784 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017785 }
17786 }
17787 }
17788 }
17789
Marat Dukhande06f492020-04-09 00:19:31 -070017790 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017791 TEST_REQUIRES_X86_FMA3;
17792 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017793 for (uint32_t n = 1; n <= 8; n++) {
17794 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017795 GemmMicrokernelTester()
17796 .mr(1)
17797 .nr(8)
17798 .kr(1)
17799 .sr(1)
17800 .m(m)
17801 .n(n)
17802 .k(k)
17803 .cm_stride(11)
17804 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017805 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017806 }
17807 }
17808 }
17809 }
17810
Marat Dukhande06f492020-04-09 00:19:31 -070017811 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017812 TEST_REQUIRES_X86_FMA3;
17813 GemmMicrokernelTester()
17814 .mr(1)
17815 .nr(8)
17816 .kr(1)
17817 .sr(1)
17818 .m(1)
17819 .n(8)
17820 .k(1)
17821 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017822 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017823 }
17824
Marat Dukhande06f492020-04-09 00:19:31 -070017825 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017826 TEST_REQUIRES_X86_FMA3;
17827 GemmMicrokernelTester()
17828 .mr(1)
17829 .nr(8)
17830 .kr(1)
17831 .sr(1)
17832 .m(1)
17833 .n(8)
17834 .k(1)
17835 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017836 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017837 }
17838
Marat Dukhande06f492020-04-09 00:19:31 -070017839 TEST(F32_GEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017840 TEST_REQUIRES_X86_FMA3;
17841 GemmMicrokernelTester()
17842 .mr(1)
17843 .nr(8)
17844 .kr(1)
17845 .sr(1)
17846 .m(1)
17847 .n(8)
17848 .k(1)
17849 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017850 .Test(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017851 }
17852#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17853
17854
17855#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017856 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017857 TEST_REQUIRES_X86_FMA3;
17858 GemmMicrokernelTester()
17859 .mr(4)
17860 .nr(8)
17861 .kr(1)
17862 .sr(1)
17863 .m(4)
17864 .n(8)
17865 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017866 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017867 }
17868
Marat Dukhande06f492020-04-09 00:19:31 -070017869 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017870 TEST_REQUIRES_X86_FMA3;
17871 GemmMicrokernelTester()
17872 .mr(4)
17873 .nr(8)
17874 .kr(1)
17875 .sr(1)
17876 .m(4)
17877 .n(8)
17878 .k(1)
17879 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017880 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017881 }
17882
Marat Dukhande06f492020-04-09 00:19:31 -070017883 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017884 TEST_REQUIRES_X86_FMA3;
17885 GemmMicrokernelTester()
17886 .mr(4)
17887 .nr(8)
17888 .kr(1)
17889 .sr(1)
17890 .m(4)
17891 .n(8)
17892 .k(1)
17893 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017894 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017895 }
17896
Marat Dukhande06f492020-04-09 00:19:31 -070017897 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017898 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017899 for (uint32_t n = 1; n <= 8; n++) {
17900 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017901 GemmMicrokernelTester()
17902 .mr(4)
17903 .nr(8)
17904 .kr(1)
17905 .sr(1)
17906 .m(m)
17907 .n(n)
17908 .k(1)
17909 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017910 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017911 }
17912 }
17913 }
17914
Marat Dukhande06f492020-04-09 00:19:31 -070017915 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017916 TEST_REQUIRES_X86_FMA3;
17917 for (uint32_t m = 1; m <= 4; m++) {
17918 GemmMicrokernelTester()
17919 .mr(4)
17920 .nr(8)
17921 .kr(1)
17922 .sr(1)
17923 .m(m)
17924 .n(8)
17925 .k(1)
17926 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017927 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017928 }
17929 }
17930
Marat Dukhande06f492020-04-09 00:19:31 -070017931 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017932 TEST_REQUIRES_X86_FMA3;
17933 for (uint32_t n = 1; n <= 8; n++) {
17934 GemmMicrokernelTester()
17935 .mr(4)
17936 .nr(8)
17937 .kr(1)
17938 .sr(1)
17939 .m(4)
17940 .n(n)
17941 .k(1)
17942 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017943 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017944 }
17945 }
17946
Marat Dukhande06f492020-04-09 00:19:31 -070017947 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017948 TEST_REQUIRES_X86_FMA3;
17949 for (size_t k = 2; k < 10; k++) {
17950 GemmMicrokernelTester()
17951 .mr(4)
17952 .nr(8)
17953 .kr(1)
17954 .sr(1)
17955 .m(4)
17956 .n(8)
17957 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017958 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017959 }
17960 }
17961
Marat Dukhande06f492020-04-09 00:19:31 -070017962 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017963 TEST_REQUIRES_X86_FMA3;
17964 for (size_t k = 2; k < 10; k++) {
17965 GemmMicrokernelTester()
17966 .mr(4)
17967 .nr(8)
17968 .kr(1)
17969 .sr(1)
17970 .m(4)
17971 .n(8)
17972 .k(k)
17973 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017974 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017975 }
17976 }
17977
Marat Dukhande06f492020-04-09 00:19:31 -070017978 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017979 TEST_REQUIRES_X86_FMA3;
17980 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017981 for (uint32_t n = 1; n <= 8; n++) {
17982 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017983 GemmMicrokernelTester()
17984 .mr(4)
17985 .nr(8)
17986 .kr(1)
17987 .sr(1)
17988 .m(m)
17989 .n(n)
17990 .k(k)
17991 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017992 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017993 }
17994 }
17995 }
17996 }
17997
Marat Dukhande06f492020-04-09 00:19:31 -070017998 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017999 TEST_REQUIRES_X86_FMA3;
18000 for (uint32_t n = 9; n < 16; n++) {
18001 for (size_t k = 1; k <= 5; k += 2) {
18002 GemmMicrokernelTester()
18003 .mr(4)
18004 .nr(8)
18005 .kr(1)
18006 .sr(1)
18007 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018008 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018009 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018010 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018011 }
18012 }
18013 }
18014
Marat Dukhande06f492020-04-09 00:19:31 -070018015 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018016 TEST_REQUIRES_X86_FMA3;
18017 for (uint32_t n = 9; n < 16; n++) {
18018 for (size_t k = 1; k <= 5; k += 2) {
18019 GemmMicrokernelTester()
18020 .mr(4)
18021 .nr(8)
18022 .kr(1)
18023 .sr(1)
18024 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018025 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018026 .k(k)
18027 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018028 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018029 }
18030 }
18031 }
18032
Marat Dukhande06f492020-04-09 00:19:31 -070018033 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018034 TEST_REQUIRES_X86_FMA3;
18035 for (uint32_t n = 9; n < 16; n++) {
18036 for (size_t k = 1; k <= 5; k += 2) {
18037 GemmMicrokernelTester()
18038 .mr(4)
18039 .nr(8)
18040 .kr(1)
18041 .sr(1)
18042 .m(4)
18043 .n(n)
18044 .k(k)
18045 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018046 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018047 }
18048 }
18049 }
18050
Marat Dukhande06f492020-04-09 00:19:31 -070018051 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018052 TEST_REQUIRES_X86_FMA3;
18053 for (uint32_t n = 9; n < 16; n++) {
18054 for (size_t k = 1; k <= 5; k += 2) {
18055 for (uint32_t m = 1; m <= 4; m++) {
18056 GemmMicrokernelTester()
18057 .mr(4)
18058 .nr(8)
18059 .kr(1)
18060 .sr(1)
18061 .m(m)
18062 .n(n)
18063 .k(k)
18064 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018065 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018066 }
18067 }
18068 }
18069 }
18070
Marat Dukhande06f492020-04-09 00:19:31 -070018071 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018072 TEST_REQUIRES_X86_FMA3;
18073 for (uint32_t n = 16; n <= 24; n += 8) {
18074 for (size_t k = 1; k <= 5; k += 2) {
18075 GemmMicrokernelTester()
18076 .mr(4)
18077 .nr(8)
18078 .kr(1)
18079 .sr(1)
18080 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018081 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018082 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018083 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018084 }
18085 }
18086 }
18087
Marat Dukhande06f492020-04-09 00:19:31 -070018088 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018089 TEST_REQUIRES_X86_FMA3;
18090 for (uint32_t n = 16; n <= 24; n += 8) {
18091 for (size_t k = 1; k <= 5; k += 2) {
18092 GemmMicrokernelTester()
18093 .mr(4)
18094 .nr(8)
18095 .kr(1)
18096 .sr(1)
18097 .m(4)
18098 .n(n)
18099 .k(k)
18100 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018101 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018102 }
18103 }
18104 }
18105
Marat Dukhande06f492020-04-09 00:19:31 -070018106 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018107 TEST_REQUIRES_X86_FMA3;
18108 for (uint32_t n = 16; n <= 24; n += 8) {
18109 for (size_t k = 1; k <= 5; k += 2) {
18110 GemmMicrokernelTester()
18111 .mr(4)
18112 .nr(8)
18113 .kr(1)
18114 .sr(1)
18115 .m(4)
18116 .n(n)
18117 .k(k)
18118 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018119 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018120 }
18121 }
18122 }
18123
Marat Dukhande06f492020-04-09 00:19:31 -070018124 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018125 TEST_REQUIRES_X86_FMA3;
18126 for (uint32_t n = 16; n <= 24; n += 8) {
18127 for (size_t k = 1; k <= 5; k += 2) {
18128 for (uint32_t m = 1; m <= 4; m++) {
18129 GemmMicrokernelTester()
18130 .mr(4)
18131 .nr(8)
18132 .kr(1)
18133 .sr(1)
18134 .m(m)
18135 .n(n)
18136 .k(k)
18137 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018138 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018139 }
18140 }
18141 }
18142 }
18143
Marat Dukhande06f492020-04-09 00:19:31 -070018144 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018145 TEST_REQUIRES_X86_FMA3;
18146 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018147 for (uint32_t n = 1; n <= 8; n++) {
18148 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018149 GemmMicrokernelTester()
18150 .mr(4)
18151 .nr(8)
18152 .kr(1)
18153 .sr(1)
18154 .m(m)
18155 .n(n)
18156 .k(k)
18157 .cm_stride(11)
18158 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018159 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018160 }
18161 }
18162 }
18163 }
18164
Marat Dukhande06f492020-04-09 00:19:31 -070018165 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018166 TEST_REQUIRES_X86_FMA3;
18167 GemmMicrokernelTester()
18168 .mr(4)
18169 .nr(8)
18170 .kr(1)
18171 .sr(1)
18172 .m(4)
18173 .n(8)
18174 .k(1)
18175 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018176 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018177 }
18178
Marat Dukhande06f492020-04-09 00:19:31 -070018179 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018180 TEST_REQUIRES_X86_FMA3;
18181 GemmMicrokernelTester()
18182 .mr(4)
18183 .nr(8)
18184 .kr(1)
18185 .sr(1)
18186 .m(4)
18187 .n(8)
18188 .k(1)
18189 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018190 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018191 }
18192
Marat Dukhande06f492020-04-09 00:19:31 -070018193 TEST(F32_GEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018194 TEST_REQUIRES_X86_FMA3;
18195 GemmMicrokernelTester()
18196 .mr(4)
18197 .nr(8)
18198 .kr(1)
18199 .sr(1)
18200 .m(4)
18201 .n(8)
18202 .k(1)
18203 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018204 .Test(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018205 }
18206#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18207
18208
18209#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018210 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018211 TEST_REQUIRES_X86_FMA3;
18212 GemmMicrokernelTester()
18213 .mr(5)
18214 .nr(8)
18215 .kr(1)
18216 .sr(1)
18217 .m(5)
18218 .n(8)
18219 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018220 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018221 }
18222
Marat Dukhande06f492020-04-09 00:19:31 -070018223 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018224 TEST_REQUIRES_X86_FMA3;
18225 GemmMicrokernelTester()
18226 .mr(5)
18227 .nr(8)
18228 .kr(1)
18229 .sr(1)
18230 .m(5)
18231 .n(8)
18232 .k(1)
18233 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018234 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018235 }
18236
Marat Dukhande06f492020-04-09 00:19:31 -070018237 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018238 TEST_REQUIRES_X86_FMA3;
18239 GemmMicrokernelTester()
18240 .mr(5)
18241 .nr(8)
18242 .kr(1)
18243 .sr(1)
18244 .m(5)
18245 .n(8)
18246 .k(1)
18247 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018248 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018249 }
18250
Marat Dukhande06f492020-04-09 00:19:31 -070018251 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018252 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018253 for (uint32_t n = 1; n <= 8; n++) {
18254 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018255 GemmMicrokernelTester()
18256 .mr(5)
18257 .nr(8)
18258 .kr(1)
18259 .sr(1)
18260 .m(m)
18261 .n(n)
18262 .k(1)
18263 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018264 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018265 }
18266 }
18267 }
18268
Marat Dukhande06f492020-04-09 00:19:31 -070018269 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018270 TEST_REQUIRES_X86_FMA3;
18271 for (uint32_t m = 1; m <= 5; m++) {
18272 GemmMicrokernelTester()
18273 .mr(5)
18274 .nr(8)
18275 .kr(1)
18276 .sr(1)
18277 .m(m)
18278 .n(8)
18279 .k(1)
18280 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018281 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018282 }
18283 }
18284
Marat Dukhande06f492020-04-09 00:19:31 -070018285 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018286 TEST_REQUIRES_X86_FMA3;
18287 for (uint32_t n = 1; n <= 8; n++) {
18288 GemmMicrokernelTester()
18289 .mr(5)
18290 .nr(8)
18291 .kr(1)
18292 .sr(1)
18293 .m(5)
18294 .n(n)
18295 .k(1)
18296 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018297 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018298 }
18299 }
18300
Marat Dukhande06f492020-04-09 00:19:31 -070018301 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018302 TEST_REQUIRES_X86_FMA3;
18303 for (size_t k = 2; k < 10; k++) {
18304 GemmMicrokernelTester()
18305 .mr(5)
18306 .nr(8)
18307 .kr(1)
18308 .sr(1)
18309 .m(5)
18310 .n(8)
18311 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018312 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018313 }
18314 }
18315
Marat Dukhande06f492020-04-09 00:19:31 -070018316 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018317 TEST_REQUIRES_X86_FMA3;
18318 for (size_t k = 2; k < 10; k++) {
18319 GemmMicrokernelTester()
18320 .mr(5)
18321 .nr(8)
18322 .kr(1)
18323 .sr(1)
18324 .m(5)
18325 .n(8)
18326 .k(k)
18327 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018328 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018329 }
18330 }
18331
Marat Dukhande06f492020-04-09 00:19:31 -070018332 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018333 TEST_REQUIRES_X86_FMA3;
18334 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018335 for (uint32_t n = 1; n <= 8; n++) {
18336 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018337 GemmMicrokernelTester()
18338 .mr(5)
18339 .nr(8)
18340 .kr(1)
18341 .sr(1)
18342 .m(m)
18343 .n(n)
18344 .k(k)
18345 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018346 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018347 }
18348 }
18349 }
18350 }
18351
Marat Dukhande06f492020-04-09 00:19:31 -070018352 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018353 TEST_REQUIRES_X86_FMA3;
18354 for (uint32_t n = 9; n < 16; n++) {
18355 for (size_t k = 1; k <= 5; k += 2) {
18356 GemmMicrokernelTester()
18357 .mr(5)
18358 .nr(8)
18359 .kr(1)
18360 .sr(1)
18361 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018362 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018363 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018364 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018365 }
18366 }
18367 }
18368
Marat Dukhande06f492020-04-09 00:19:31 -070018369 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018370 TEST_REQUIRES_X86_FMA3;
18371 for (uint32_t n = 9; n < 16; n++) {
18372 for (size_t k = 1; k <= 5; k += 2) {
18373 GemmMicrokernelTester()
18374 .mr(5)
18375 .nr(8)
18376 .kr(1)
18377 .sr(1)
18378 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018379 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018380 .k(k)
18381 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018382 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018383 }
18384 }
18385 }
18386
Marat Dukhande06f492020-04-09 00:19:31 -070018387 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018388 TEST_REQUIRES_X86_FMA3;
18389 for (uint32_t n = 9; n < 16; n++) {
18390 for (size_t k = 1; k <= 5; k += 2) {
18391 GemmMicrokernelTester()
18392 .mr(5)
18393 .nr(8)
18394 .kr(1)
18395 .sr(1)
18396 .m(5)
18397 .n(n)
18398 .k(k)
18399 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018400 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018401 }
18402 }
18403 }
18404
Marat Dukhande06f492020-04-09 00:19:31 -070018405 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018406 TEST_REQUIRES_X86_FMA3;
18407 for (uint32_t n = 9; n < 16; n++) {
18408 for (size_t k = 1; k <= 5; k += 2) {
18409 for (uint32_t m = 1; m <= 5; m++) {
18410 GemmMicrokernelTester()
18411 .mr(5)
18412 .nr(8)
18413 .kr(1)
18414 .sr(1)
18415 .m(m)
18416 .n(n)
18417 .k(k)
18418 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018419 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018420 }
18421 }
18422 }
18423 }
18424
Marat Dukhande06f492020-04-09 00:19:31 -070018425 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018426 TEST_REQUIRES_X86_FMA3;
18427 for (uint32_t n = 16; n <= 24; n += 8) {
18428 for (size_t k = 1; k <= 5; k += 2) {
18429 GemmMicrokernelTester()
18430 .mr(5)
18431 .nr(8)
18432 .kr(1)
18433 .sr(1)
18434 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018435 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018436 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018437 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018438 }
18439 }
18440 }
18441
Marat Dukhande06f492020-04-09 00:19:31 -070018442 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018443 TEST_REQUIRES_X86_FMA3;
18444 for (uint32_t n = 16; n <= 24; n += 8) {
18445 for (size_t k = 1; k <= 5; k += 2) {
18446 GemmMicrokernelTester()
18447 .mr(5)
18448 .nr(8)
18449 .kr(1)
18450 .sr(1)
18451 .m(5)
18452 .n(n)
18453 .k(k)
18454 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018455 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018456 }
18457 }
18458 }
18459
Marat Dukhande06f492020-04-09 00:19:31 -070018460 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018461 TEST_REQUIRES_X86_FMA3;
18462 for (uint32_t n = 16; n <= 24; n += 8) {
18463 for (size_t k = 1; k <= 5; k += 2) {
18464 GemmMicrokernelTester()
18465 .mr(5)
18466 .nr(8)
18467 .kr(1)
18468 .sr(1)
18469 .m(5)
18470 .n(n)
18471 .k(k)
18472 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018473 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018474 }
18475 }
18476 }
18477
Marat Dukhande06f492020-04-09 00:19:31 -070018478 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018479 TEST_REQUIRES_X86_FMA3;
18480 for (uint32_t n = 16; n <= 24; n += 8) {
18481 for (size_t k = 1; k <= 5; k += 2) {
18482 for (uint32_t m = 1; m <= 5; m++) {
18483 GemmMicrokernelTester()
18484 .mr(5)
18485 .nr(8)
18486 .kr(1)
18487 .sr(1)
18488 .m(m)
18489 .n(n)
18490 .k(k)
18491 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018492 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018493 }
18494 }
18495 }
18496 }
18497
Marat Dukhande06f492020-04-09 00:19:31 -070018498 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018499 TEST_REQUIRES_X86_FMA3;
18500 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018501 for (uint32_t n = 1; n <= 8; n++) {
18502 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018503 GemmMicrokernelTester()
18504 .mr(5)
18505 .nr(8)
18506 .kr(1)
18507 .sr(1)
18508 .m(m)
18509 .n(n)
18510 .k(k)
18511 .cm_stride(11)
18512 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018513 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018514 }
18515 }
18516 }
18517 }
18518
Marat Dukhande06f492020-04-09 00:19:31 -070018519 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018520 TEST_REQUIRES_X86_FMA3;
18521 GemmMicrokernelTester()
18522 .mr(5)
18523 .nr(8)
18524 .kr(1)
18525 .sr(1)
18526 .m(5)
18527 .n(8)
18528 .k(1)
18529 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018530 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018531 }
18532
Marat Dukhande06f492020-04-09 00:19:31 -070018533 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018534 TEST_REQUIRES_X86_FMA3;
18535 GemmMicrokernelTester()
18536 .mr(5)
18537 .nr(8)
18538 .kr(1)
18539 .sr(1)
18540 .m(5)
18541 .n(8)
18542 .k(1)
18543 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018544 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018545 }
18546
Marat Dukhande06f492020-04-09 00:19:31 -070018547 TEST(F32_GEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018548 TEST_REQUIRES_X86_FMA3;
18549 GemmMicrokernelTester()
18550 .mr(5)
18551 .nr(8)
18552 .kr(1)
18553 .sr(1)
18554 .m(5)
18555 .n(8)
18556 .k(1)
18557 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018558 .Test(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018559 }
18560#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18561
18562
18563#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018564 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018565 TEST_REQUIRES_X86_FMA3;
18566 GemmMicrokernelTester()
18567 .mr(1)
18568 .nr(16)
18569 .kr(1)
18570 .sr(1)
18571 .m(1)
18572 .n(16)
18573 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018574 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018575 }
18576
Marat Dukhande06f492020-04-09 00:19:31 -070018577 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018578 TEST_REQUIRES_X86_FMA3;
18579 GemmMicrokernelTester()
18580 .mr(1)
18581 .nr(16)
18582 .kr(1)
18583 .sr(1)
18584 .m(1)
18585 .n(16)
18586 .k(1)
18587 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018588 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018589 }
18590
Marat Dukhande06f492020-04-09 00:19:31 -070018591 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018592 TEST_REQUIRES_X86_FMA3;
18593 GemmMicrokernelTester()
18594 .mr(1)
18595 .nr(16)
18596 .kr(1)
18597 .sr(1)
18598 .m(1)
18599 .n(16)
18600 .k(1)
18601 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018602 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018603 }
18604
Marat Dukhande06f492020-04-09 00:19:31 -070018605 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018606 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018607 for (uint32_t n = 1; n <= 16; n++) {
18608 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018609 GemmMicrokernelTester()
18610 .mr(1)
18611 .nr(16)
18612 .kr(1)
18613 .sr(1)
18614 .m(m)
18615 .n(n)
18616 .k(1)
18617 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018618 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018619 }
18620 }
18621 }
18622
Marat Dukhande06f492020-04-09 00:19:31 -070018623 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018624 TEST_REQUIRES_X86_FMA3;
18625 for (uint32_t m = 1; m <= 1; m++) {
18626 GemmMicrokernelTester()
18627 .mr(1)
18628 .nr(16)
18629 .kr(1)
18630 .sr(1)
18631 .m(m)
18632 .n(16)
18633 .k(1)
18634 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018635 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018636 }
18637 }
18638
Marat Dukhande06f492020-04-09 00:19:31 -070018639 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018640 TEST_REQUIRES_X86_FMA3;
18641 for (uint32_t n = 1; n <= 16; n++) {
18642 GemmMicrokernelTester()
18643 .mr(1)
18644 .nr(16)
18645 .kr(1)
18646 .sr(1)
18647 .m(1)
18648 .n(n)
18649 .k(1)
18650 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018651 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018652 }
18653 }
18654
Marat Dukhande06f492020-04-09 00:19:31 -070018655 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018656 TEST_REQUIRES_X86_FMA3;
18657 for (size_t k = 2; k < 10; k++) {
18658 GemmMicrokernelTester()
18659 .mr(1)
18660 .nr(16)
18661 .kr(1)
18662 .sr(1)
18663 .m(1)
18664 .n(16)
18665 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018666 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018667 }
18668 }
18669
Marat Dukhande06f492020-04-09 00:19:31 -070018670 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018671 TEST_REQUIRES_X86_FMA3;
18672 for (size_t k = 2; k < 10; k++) {
18673 GemmMicrokernelTester()
18674 .mr(1)
18675 .nr(16)
18676 .kr(1)
18677 .sr(1)
18678 .m(1)
18679 .n(16)
18680 .k(k)
18681 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018682 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018683 }
18684 }
18685
Marat Dukhande06f492020-04-09 00:19:31 -070018686 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018687 TEST_REQUIRES_X86_FMA3;
18688 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018689 for (uint32_t n = 1; n <= 16; n++) {
18690 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018691 GemmMicrokernelTester()
18692 .mr(1)
18693 .nr(16)
18694 .kr(1)
18695 .sr(1)
18696 .m(m)
18697 .n(n)
18698 .k(k)
18699 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018700 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018701 }
18702 }
18703 }
18704 }
18705
Marat Dukhande06f492020-04-09 00:19:31 -070018706 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018707 TEST_REQUIRES_X86_FMA3;
18708 for (uint32_t n = 17; n < 32; n++) {
18709 for (size_t k = 1; k <= 5; k += 2) {
18710 GemmMicrokernelTester()
18711 .mr(1)
18712 .nr(16)
18713 .kr(1)
18714 .sr(1)
18715 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018716 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018717 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018718 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018719 }
18720 }
18721 }
18722
Marat Dukhande06f492020-04-09 00:19:31 -070018723 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018724 TEST_REQUIRES_X86_FMA3;
18725 for (uint32_t n = 17; n < 32; n++) {
18726 for (size_t k = 1; k <= 5; k += 2) {
18727 GemmMicrokernelTester()
18728 .mr(1)
18729 .nr(16)
18730 .kr(1)
18731 .sr(1)
18732 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018733 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018734 .k(k)
18735 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018736 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018737 }
18738 }
18739 }
18740
Marat Dukhande06f492020-04-09 00:19:31 -070018741 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018742 TEST_REQUIRES_X86_FMA3;
18743 for (uint32_t n = 17; n < 32; n++) {
18744 for (size_t k = 1; k <= 5; k += 2) {
18745 GemmMicrokernelTester()
18746 .mr(1)
18747 .nr(16)
18748 .kr(1)
18749 .sr(1)
18750 .m(1)
18751 .n(n)
18752 .k(k)
18753 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018754 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018755 }
18756 }
18757 }
18758
Marat Dukhande06f492020-04-09 00:19:31 -070018759 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018760 TEST_REQUIRES_X86_FMA3;
18761 for (uint32_t n = 17; n < 32; n++) {
18762 for (size_t k = 1; k <= 5; k += 2) {
18763 for (uint32_t m = 1; m <= 1; m++) {
18764 GemmMicrokernelTester()
18765 .mr(1)
18766 .nr(16)
18767 .kr(1)
18768 .sr(1)
18769 .m(m)
18770 .n(n)
18771 .k(k)
18772 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018773 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018774 }
18775 }
18776 }
18777 }
18778
Marat Dukhande06f492020-04-09 00:19:31 -070018779 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018780 TEST_REQUIRES_X86_FMA3;
18781 for (uint32_t n = 32; n <= 48; n += 16) {
18782 for (size_t k = 1; k <= 5; k += 2) {
18783 GemmMicrokernelTester()
18784 .mr(1)
18785 .nr(16)
18786 .kr(1)
18787 .sr(1)
18788 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018789 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018790 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018791 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018792 }
18793 }
18794 }
18795
Marat Dukhande06f492020-04-09 00:19:31 -070018796 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018797 TEST_REQUIRES_X86_FMA3;
18798 for (uint32_t n = 32; n <= 48; n += 16) {
18799 for (size_t k = 1; k <= 5; k += 2) {
18800 GemmMicrokernelTester()
18801 .mr(1)
18802 .nr(16)
18803 .kr(1)
18804 .sr(1)
18805 .m(1)
18806 .n(n)
18807 .k(k)
18808 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018809 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018810 }
18811 }
18812 }
18813
Marat Dukhande06f492020-04-09 00:19:31 -070018814 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018815 TEST_REQUIRES_X86_FMA3;
18816 for (uint32_t n = 32; n <= 48; n += 16) {
18817 for (size_t k = 1; k <= 5; k += 2) {
18818 GemmMicrokernelTester()
18819 .mr(1)
18820 .nr(16)
18821 .kr(1)
18822 .sr(1)
18823 .m(1)
18824 .n(n)
18825 .k(k)
18826 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018827 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018828 }
18829 }
18830 }
18831
Marat Dukhande06f492020-04-09 00:19:31 -070018832 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018833 TEST_REQUIRES_X86_FMA3;
18834 for (uint32_t n = 32; n <= 48; n += 16) {
18835 for (size_t k = 1; k <= 5; k += 2) {
18836 for (uint32_t m = 1; m <= 1; m++) {
18837 GemmMicrokernelTester()
18838 .mr(1)
18839 .nr(16)
18840 .kr(1)
18841 .sr(1)
18842 .m(m)
18843 .n(n)
18844 .k(k)
18845 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018846 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018847 }
18848 }
18849 }
18850 }
18851
Marat Dukhande06f492020-04-09 00:19:31 -070018852 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018853 TEST_REQUIRES_X86_FMA3;
18854 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018855 for (uint32_t n = 1; n <= 16; n++) {
18856 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018857 GemmMicrokernelTester()
18858 .mr(1)
18859 .nr(16)
18860 .kr(1)
18861 .sr(1)
18862 .m(m)
18863 .n(n)
18864 .k(k)
18865 .cm_stride(19)
18866 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018867 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018868 }
18869 }
18870 }
18871 }
18872
Marat Dukhande06f492020-04-09 00:19:31 -070018873 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018874 TEST_REQUIRES_X86_FMA3;
18875 GemmMicrokernelTester()
18876 .mr(1)
18877 .nr(16)
18878 .kr(1)
18879 .sr(1)
18880 .m(1)
18881 .n(16)
18882 .k(1)
18883 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018884 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018885 }
18886
Marat Dukhande06f492020-04-09 00:19:31 -070018887 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018888 TEST_REQUIRES_X86_FMA3;
18889 GemmMicrokernelTester()
18890 .mr(1)
18891 .nr(16)
18892 .kr(1)
18893 .sr(1)
18894 .m(1)
18895 .n(16)
18896 .k(1)
18897 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018898 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018899 }
18900
Marat Dukhande06f492020-04-09 00:19:31 -070018901 TEST(F32_GEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018902 TEST_REQUIRES_X86_FMA3;
18903 GemmMicrokernelTester()
18904 .mr(1)
18905 .nr(16)
18906 .kr(1)
18907 .sr(1)
18908 .m(1)
18909 .n(16)
18910 .k(1)
18911 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018912 .Test(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018913 }
18914#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18915
18916
18917#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018918 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018919 TEST_REQUIRES_X86_FMA3;
18920 GemmMicrokernelTester()
18921 .mr(3)
18922 .nr(16)
18923 .kr(1)
18924 .sr(1)
18925 .m(3)
18926 .n(16)
18927 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018928 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018929 }
18930
Marat Dukhande06f492020-04-09 00:19:31 -070018931 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018932 TEST_REQUIRES_X86_FMA3;
18933 GemmMicrokernelTester()
18934 .mr(3)
18935 .nr(16)
18936 .kr(1)
18937 .sr(1)
18938 .m(3)
18939 .n(16)
18940 .k(1)
18941 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018942 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018943 }
18944
Marat Dukhande06f492020-04-09 00:19:31 -070018945 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018946 TEST_REQUIRES_X86_FMA3;
18947 GemmMicrokernelTester()
18948 .mr(3)
18949 .nr(16)
18950 .kr(1)
18951 .sr(1)
18952 .m(3)
18953 .n(16)
18954 .k(1)
18955 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018956 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018957 }
18958
Marat Dukhande06f492020-04-09 00:19:31 -070018959 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018960 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018961 for (uint32_t n = 1; n <= 16; n++) {
18962 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018963 GemmMicrokernelTester()
18964 .mr(3)
18965 .nr(16)
18966 .kr(1)
18967 .sr(1)
18968 .m(m)
18969 .n(n)
18970 .k(1)
18971 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018972 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018973 }
18974 }
18975 }
18976
Marat Dukhande06f492020-04-09 00:19:31 -070018977 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018978 TEST_REQUIRES_X86_FMA3;
18979 for (uint32_t m = 1; m <= 3; m++) {
18980 GemmMicrokernelTester()
18981 .mr(3)
18982 .nr(16)
18983 .kr(1)
18984 .sr(1)
18985 .m(m)
18986 .n(16)
18987 .k(1)
18988 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018989 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018990 }
18991 }
18992
Marat Dukhande06f492020-04-09 00:19:31 -070018993 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018994 TEST_REQUIRES_X86_FMA3;
18995 for (uint32_t n = 1; n <= 16; n++) {
18996 GemmMicrokernelTester()
18997 .mr(3)
18998 .nr(16)
18999 .kr(1)
19000 .sr(1)
19001 .m(3)
19002 .n(n)
19003 .k(1)
19004 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019005 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019006 }
19007 }
19008
Marat Dukhande06f492020-04-09 00:19:31 -070019009 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019010 TEST_REQUIRES_X86_FMA3;
19011 for (size_t k = 2; k < 10; k++) {
19012 GemmMicrokernelTester()
19013 .mr(3)
19014 .nr(16)
19015 .kr(1)
19016 .sr(1)
19017 .m(3)
19018 .n(16)
19019 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019020 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019021 }
19022 }
19023
Marat Dukhande06f492020-04-09 00:19:31 -070019024 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019025 TEST_REQUIRES_X86_FMA3;
19026 for (size_t k = 2; k < 10; k++) {
19027 GemmMicrokernelTester()
19028 .mr(3)
19029 .nr(16)
19030 .kr(1)
19031 .sr(1)
19032 .m(3)
19033 .n(16)
19034 .k(k)
19035 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019036 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019037 }
19038 }
19039
Marat Dukhande06f492020-04-09 00:19:31 -070019040 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019041 TEST_REQUIRES_X86_FMA3;
19042 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019043 for (uint32_t n = 1; n <= 16; n++) {
19044 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019045 GemmMicrokernelTester()
19046 .mr(3)
19047 .nr(16)
19048 .kr(1)
19049 .sr(1)
19050 .m(m)
19051 .n(n)
19052 .k(k)
19053 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019054 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019055 }
19056 }
19057 }
19058 }
19059
Marat Dukhande06f492020-04-09 00:19:31 -070019060 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019061 TEST_REQUIRES_X86_FMA3;
19062 for (uint32_t n = 17; n < 32; n++) {
19063 for (size_t k = 1; k <= 5; k += 2) {
19064 GemmMicrokernelTester()
19065 .mr(3)
19066 .nr(16)
19067 .kr(1)
19068 .sr(1)
19069 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019070 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019071 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019072 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019073 }
19074 }
19075 }
19076
Marat Dukhande06f492020-04-09 00:19:31 -070019077 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019078 TEST_REQUIRES_X86_FMA3;
19079 for (uint32_t n = 17; n < 32; n++) {
19080 for (size_t k = 1; k <= 5; k += 2) {
19081 GemmMicrokernelTester()
19082 .mr(3)
19083 .nr(16)
19084 .kr(1)
19085 .sr(1)
19086 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019087 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019088 .k(k)
19089 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019090 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019091 }
19092 }
19093 }
19094
Marat Dukhande06f492020-04-09 00:19:31 -070019095 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019096 TEST_REQUIRES_X86_FMA3;
19097 for (uint32_t n = 17; n < 32; n++) {
19098 for (size_t k = 1; k <= 5; k += 2) {
19099 GemmMicrokernelTester()
19100 .mr(3)
19101 .nr(16)
19102 .kr(1)
19103 .sr(1)
19104 .m(3)
19105 .n(n)
19106 .k(k)
19107 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019108 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019109 }
19110 }
19111 }
19112
Marat Dukhande06f492020-04-09 00:19:31 -070019113 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019114 TEST_REQUIRES_X86_FMA3;
19115 for (uint32_t n = 17; n < 32; n++) {
19116 for (size_t k = 1; k <= 5; k += 2) {
19117 for (uint32_t m = 1; m <= 3; m++) {
19118 GemmMicrokernelTester()
19119 .mr(3)
19120 .nr(16)
19121 .kr(1)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019127 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019128 }
19129 }
19130 }
19131 }
19132
Marat Dukhande06f492020-04-09 00:19:31 -070019133 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019134 TEST_REQUIRES_X86_FMA3;
19135 for (uint32_t n = 32; n <= 48; n += 16) {
19136 for (size_t k = 1; k <= 5; k += 2) {
19137 GemmMicrokernelTester()
19138 .mr(3)
19139 .nr(16)
19140 .kr(1)
19141 .sr(1)
19142 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019143 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019144 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019145 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019146 }
19147 }
19148 }
19149
Marat Dukhande06f492020-04-09 00:19:31 -070019150 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019151 TEST_REQUIRES_X86_FMA3;
19152 for (uint32_t n = 32; n <= 48; n += 16) {
19153 for (size_t k = 1; k <= 5; k += 2) {
19154 GemmMicrokernelTester()
19155 .mr(3)
19156 .nr(16)
19157 .kr(1)
19158 .sr(1)
19159 .m(3)
19160 .n(n)
19161 .k(k)
19162 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019163 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019164 }
19165 }
19166 }
19167
Marat Dukhande06f492020-04-09 00:19:31 -070019168 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019169 TEST_REQUIRES_X86_FMA3;
19170 for (uint32_t n = 32; n <= 48; n += 16) {
19171 for (size_t k = 1; k <= 5; k += 2) {
19172 GemmMicrokernelTester()
19173 .mr(3)
19174 .nr(16)
19175 .kr(1)
19176 .sr(1)
19177 .m(3)
19178 .n(n)
19179 .k(k)
19180 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019181 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019182 }
19183 }
19184 }
19185
Marat Dukhande06f492020-04-09 00:19:31 -070019186 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019187 TEST_REQUIRES_X86_FMA3;
19188 for (uint32_t n = 32; n <= 48; n += 16) {
19189 for (size_t k = 1; k <= 5; k += 2) {
19190 for (uint32_t m = 1; m <= 3; m++) {
19191 GemmMicrokernelTester()
19192 .mr(3)
19193 .nr(16)
19194 .kr(1)
19195 .sr(1)
19196 .m(m)
19197 .n(n)
19198 .k(k)
19199 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019200 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019201 }
19202 }
19203 }
19204 }
19205
Marat Dukhande06f492020-04-09 00:19:31 -070019206 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019207 TEST_REQUIRES_X86_FMA3;
19208 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019209 for (uint32_t n = 1; n <= 16; n++) {
19210 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019211 GemmMicrokernelTester()
19212 .mr(3)
19213 .nr(16)
19214 .kr(1)
19215 .sr(1)
19216 .m(m)
19217 .n(n)
19218 .k(k)
19219 .cm_stride(19)
19220 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019221 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019222 }
19223 }
19224 }
19225 }
19226
Marat Dukhande06f492020-04-09 00:19:31 -070019227 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019228 TEST_REQUIRES_X86_FMA3;
19229 GemmMicrokernelTester()
19230 .mr(3)
19231 .nr(16)
19232 .kr(1)
19233 .sr(1)
19234 .m(3)
19235 .n(16)
19236 .k(1)
19237 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019238 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019239 }
19240
Marat Dukhande06f492020-04-09 00:19:31 -070019241 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019242 TEST_REQUIRES_X86_FMA3;
19243 GemmMicrokernelTester()
19244 .mr(3)
19245 .nr(16)
19246 .kr(1)
19247 .sr(1)
19248 .m(3)
19249 .n(16)
19250 .k(1)
19251 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019252 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019253 }
19254
Marat Dukhande06f492020-04-09 00:19:31 -070019255 TEST(F32_GEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019256 TEST_REQUIRES_X86_FMA3;
19257 GemmMicrokernelTester()
19258 .mr(3)
19259 .nr(16)
19260 .kr(1)
19261 .sr(1)
19262 .m(3)
19263 .n(16)
19264 .k(1)
19265 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019266 .Test(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019267 }
19268#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19269
19270
19271#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019272 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019273 TEST_REQUIRES_X86_AVX512F;
19274 GemmMicrokernelTester()
19275 .mr(1)
19276 .nr(16)
19277 .kr(1)
19278 .sr(1)
19279 .m(1)
19280 .n(16)
19281 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019282 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019283 }
19284
Marat Dukhande06f492020-04-09 00:19:31 -070019285 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019286 TEST_REQUIRES_X86_AVX512F;
19287 GemmMicrokernelTester()
19288 .mr(1)
19289 .nr(16)
19290 .kr(1)
19291 .sr(1)
19292 .m(1)
19293 .n(16)
19294 .k(1)
19295 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019296 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019297 }
19298
Marat Dukhande06f492020-04-09 00:19:31 -070019299 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019300 TEST_REQUIRES_X86_AVX512F;
19301 GemmMicrokernelTester()
19302 .mr(1)
19303 .nr(16)
19304 .kr(1)
19305 .sr(1)
19306 .m(1)
19307 .n(16)
19308 .k(1)
19309 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019310 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019311 }
19312
Marat Dukhande06f492020-04-09 00:19:31 -070019313 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019314 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019315 for (uint32_t n = 1; n <= 16; n++) {
19316 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019317 GemmMicrokernelTester()
19318 .mr(1)
19319 .nr(16)
19320 .kr(1)
19321 .sr(1)
19322 .m(m)
19323 .n(n)
19324 .k(1)
19325 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019326 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019327 }
19328 }
19329 }
19330
Marat Dukhande06f492020-04-09 00:19:31 -070019331 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019332 TEST_REQUIRES_X86_AVX512F;
19333 for (uint32_t m = 1; m <= 1; m++) {
19334 GemmMicrokernelTester()
19335 .mr(1)
19336 .nr(16)
19337 .kr(1)
19338 .sr(1)
19339 .m(m)
19340 .n(16)
19341 .k(1)
19342 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019343 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019344 }
19345 }
19346
Marat Dukhande06f492020-04-09 00:19:31 -070019347 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019348 TEST_REQUIRES_X86_AVX512F;
19349 for (uint32_t n = 1; n <= 16; n++) {
19350 GemmMicrokernelTester()
19351 .mr(1)
19352 .nr(16)
19353 .kr(1)
19354 .sr(1)
19355 .m(1)
19356 .n(n)
19357 .k(1)
19358 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019359 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019360 }
19361 }
19362
Marat Dukhande06f492020-04-09 00:19:31 -070019363 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019364 TEST_REQUIRES_X86_AVX512F;
19365 for (size_t k = 2; k < 10; k++) {
19366 GemmMicrokernelTester()
19367 .mr(1)
19368 .nr(16)
19369 .kr(1)
19370 .sr(1)
19371 .m(1)
19372 .n(16)
19373 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019374 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019375 }
19376 }
19377
Marat Dukhande06f492020-04-09 00:19:31 -070019378 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019379 TEST_REQUIRES_X86_AVX512F;
19380 for (size_t k = 2; k < 10; k++) {
19381 GemmMicrokernelTester()
19382 .mr(1)
19383 .nr(16)
19384 .kr(1)
19385 .sr(1)
19386 .m(1)
19387 .n(16)
19388 .k(k)
19389 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019390 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019391 }
19392 }
19393
Marat Dukhande06f492020-04-09 00:19:31 -070019394 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019395 TEST_REQUIRES_X86_AVX512F;
19396 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019397 for (uint32_t n = 1; n <= 16; n++) {
19398 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019399 GemmMicrokernelTester()
19400 .mr(1)
19401 .nr(16)
19402 .kr(1)
19403 .sr(1)
19404 .m(m)
19405 .n(n)
19406 .k(k)
19407 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019408 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019409 }
19410 }
19411 }
19412 }
19413
Marat Dukhande06f492020-04-09 00:19:31 -070019414 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019415 TEST_REQUIRES_X86_AVX512F;
19416 for (uint32_t n = 17; n < 32; n++) {
19417 for (size_t k = 1; k <= 5; k += 2) {
19418 GemmMicrokernelTester()
19419 .mr(1)
19420 .nr(16)
19421 .kr(1)
19422 .sr(1)
19423 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019424 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019425 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019426 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019427 }
19428 }
19429 }
19430
Marat Dukhande06f492020-04-09 00:19:31 -070019431 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019432 TEST_REQUIRES_X86_AVX512F;
19433 for (uint32_t n = 17; n < 32; n++) {
19434 for (size_t k = 1; k <= 5; k += 2) {
19435 GemmMicrokernelTester()
19436 .mr(1)
19437 .nr(16)
19438 .kr(1)
19439 .sr(1)
19440 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019441 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019442 .k(k)
19443 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019444 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019445 }
19446 }
19447 }
19448
Marat Dukhande06f492020-04-09 00:19:31 -070019449 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019450 TEST_REQUIRES_X86_AVX512F;
19451 for (uint32_t n = 17; n < 32; n++) {
19452 for (size_t k = 1; k <= 5; k += 2) {
19453 GemmMicrokernelTester()
19454 .mr(1)
19455 .nr(16)
19456 .kr(1)
19457 .sr(1)
19458 .m(1)
19459 .n(n)
19460 .k(k)
19461 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019462 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019463 }
19464 }
19465 }
19466
Marat Dukhande06f492020-04-09 00:19:31 -070019467 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019468 TEST_REQUIRES_X86_AVX512F;
19469 for (uint32_t n = 17; n < 32; n++) {
19470 for (size_t k = 1; k <= 5; k += 2) {
19471 for (uint32_t m = 1; m <= 1; m++) {
19472 GemmMicrokernelTester()
19473 .mr(1)
19474 .nr(16)
19475 .kr(1)
19476 .sr(1)
19477 .m(m)
19478 .n(n)
19479 .k(k)
19480 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019481 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019482 }
19483 }
19484 }
19485 }
19486
Marat Dukhande06f492020-04-09 00:19:31 -070019487 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019488 TEST_REQUIRES_X86_AVX512F;
19489 for (uint32_t n = 32; n <= 48; n += 16) {
19490 for (size_t k = 1; k <= 5; k += 2) {
19491 GemmMicrokernelTester()
19492 .mr(1)
19493 .nr(16)
19494 .kr(1)
19495 .sr(1)
19496 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019497 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019498 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019499 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019500 }
19501 }
19502 }
19503
Marat Dukhande06f492020-04-09 00:19:31 -070019504 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019505 TEST_REQUIRES_X86_AVX512F;
19506 for (uint32_t n = 32; n <= 48; n += 16) {
19507 for (size_t k = 1; k <= 5; k += 2) {
19508 GemmMicrokernelTester()
19509 .mr(1)
19510 .nr(16)
19511 .kr(1)
19512 .sr(1)
19513 .m(1)
19514 .n(n)
19515 .k(k)
19516 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019517 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019518 }
19519 }
19520 }
19521
Marat Dukhande06f492020-04-09 00:19:31 -070019522 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019523 TEST_REQUIRES_X86_AVX512F;
19524 for (uint32_t n = 32; n <= 48; n += 16) {
19525 for (size_t k = 1; k <= 5; k += 2) {
19526 GemmMicrokernelTester()
19527 .mr(1)
19528 .nr(16)
19529 .kr(1)
19530 .sr(1)
19531 .m(1)
19532 .n(n)
19533 .k(k)
19534 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019535 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019536 }
19537 }
19538 }
19539
Marat Dukhande06f492020-04-09 00:19:31 -070019540 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019541 TEST_REQUIRES_X86_AVX512F;
19542 for (uint32_t n = 32; n <= 48; n += 16) {
19543 for (size_t k = 1; k <= 5; k += 2) {
19544 for (uint32_t m = 1; m <= 1; m++) {
19545 GemmMicrokernelTester()
19546 .mr(1)
19547 .nr(16)
19548 .kr(1)
19549 .sr(1)
19550 .m(m)
19551 .n(n)
19552 .k(k)
19553 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019554 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019555 }
19556 }
19557 }
19558 }
19559
Marat Dukhande06f492020-04-09 00:19:31 -070019560 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019561 TEST_REQUIRES_X86_AVX512F;
19562 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019563 for (uint32_t n = 1; n <= 16; n++) {
19564 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019565 GemmMicrokernelTester()
19566 .mr(1)
19567 .nr(16)
19568 .kr(1)
19569 .sr(1)
19570 .m(m)
19571 .n(n)
19572 .k(k)
19573 .cm_stride(19)
19574 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019575 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019576 }
19577 }
19578 }
19579 }
19580
Marat Dukhande06f492020-04-09 00:19:31 -070019581 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019582 TEST_REQUIRES_X86_AVX512F;
19583 GemmMicrokernelTester()
19584 .mr(1)
19585 .nr(16)
19586 .kr(1)
19587 .sr(1)
19588 .m(1)
19589 .n(16)
19590 .k(1)
19591 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019592 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019593 }
19594
Marat Dukhande06f492020-04-09 00:19:31 -070019595 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019596 TEST_REQUIRES_X86_AVX512F;
19597 GemmMicrokernelTester()
19598 .mr(1)
19599 .nr(16)
19600 .kr(1)
19601 .sr(1)
19602 .m(1)
19603 .n(16)
19604 .k(1)
19605 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019606 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019607 }
19608
Marat Dukhande06f492020-04-09 00:19:31 -070019609 TEST(F32_GEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019610 TEST_REQUIRES_X86_AVX512F;
19611 GemmMicrokernelTester()
19612 .mr(1)
19613 .nr(16)
19614 .kr(1)
19615 .sr(1)
19616 .m(1)
19617 .n(16)
19618 .k(1)
19619 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019620 .Test(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019621 }
19622#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19623
19624
19625#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019626 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019627 TEST_REQUIRES_X86_AVX512F;
19628 GemmMicrokernelTester()
19629 .mr(6)
19630 .nr(16)
19631 .kr(1)
19632 .sr(1)
19633 .m(6)
19634 .n(16)
19635 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019636 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019637 }
19638
Marat Dukhande06f492020-04-09 00:19:31 -070019639 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019640 TEST_REQUIRES_X86_AVX512F;
19641 GemmMicrokernelTester()
19642 .mr(6)
19643 .nr(16)
19644 .kr(1)
19645 .sr(1)
19646 .m(6)
19647 .n(16)
19648 .k(1)
19649 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019650 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019651 }
19652
Marat Dukhande06f492020-04-09 00:19:31 -070019653 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019654 TEST_REQUIRES_X86_AVX512F;
19655 GemmMicrokernelTester()
19656 .mr(6)
19657 .nr(16)
19658 .kr(1)
19659 .sr(1)
19660 .m(6)
19661 .n(16)
19662 .k(1)
19663 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019664 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019665 }
19666
Marat Dukhande06f492020-04-09 00:19:31 -070019667 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019668 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019669 for (uint32_t n = 1; n <= 16; n++) {
19670 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019671 GemmMicrokernelTester()
19672 .mr(6)
19673 .nr(16)
19674 .kr(1)
19675 .sr(1)
19676 .m(m)
19677 .n(n)
19678 .k(1)
19679 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019680 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019681 }
19682 }
19683 }
19684
Marat Dukhande06f492020-04-09 00:19:31 -070019685 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019686 TEST_REQUIRES_X86_AVX512F;
19687 for (uint32_t m = 1; m <= 6; m++) {
19688 GemmMicrokernelTester()
19689 .mr(6)
19690 .nr(16)
19691 .kr(1)
19692 .sr(1)
19693 .m(m)
19694 .n(16)
19695 .k(1)
19696 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019697 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019698 }
19699 }
19700
Marat Dukhande06f492020-04-09 00:19:31 -070019701 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019702 TEST_REQUIRES_X86_AVX512F;
19703 for (uint32_t n = 1; n <= 16; n++) {
19704 GemmMicrokernelTester()
19705 .mr(6)
19706 .nr(16)
19707 .kr(1)
19708 .sr(1)
19709 .m(6)
19710 .n(n)
19711 .k(1)
19712 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019713 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019714 }
19715 }
19716
Marat Dukhande06f492020-04-09 00:19:31 -070019717 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019718 TEST_REQUIRES_X86_AVX512F;
19719 for (size_t k = 2; k < 10; k++) {
19720 GemmMicrokernelTester()
19721 .mr(6)
19722 .nr(16)
19723 .kr(1)
19724 .sr(1)
19725 .m(6)
19726 .n(16)
19727 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019728 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019729 }
19730 }
19731
Marat Dukhande06f492020-04-09 00:19:31 -070019732 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019733 TEST_REQUIRES_X86_AVX512F;
19734 for (size_t k = 2; k < 10; k++) {
19735 GemmMicrokernelTester()
19736 .mr(6)
19737 .nr(16)
19738 .kr(1)
19739 .sr(1)
19740 .m(6)
19741 .n(16)
19742 .k(k)
19743 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019744 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019745 }
19746 }
19747
Marat Dukhande06f492020-04-09 00:19:31 -070019748 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019749 TEST_REQUIRES_X86_AVX512F;
19750 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019751 for (uint32_t n = 1; n <= 16; n++) {
19752 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019753 GemmMicrokernelTester()
19754 .mr(6)
19755 .nr(16)
19756 .kr(1)
19757 .sr(1)
19758 .m(m)
19759 .n(n)
19760 .k(k)
19761 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019762 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019763 }
19764 }
19765 }
19766 }
19767
Marat Dukhande06f492020-04-09 00:19:31 -070019768 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019769 TEST_REQUIRES_X86_AVX512F;
19770 for (uint32_t n = 17; n < 32; n++) {
19771 for (size_t k = 1; k <= 5; k += 2) {
19772 GemmMicrokernelTester()
19773 .mr(6)
19774 .nr(16)
19775 .kr(1)
19776 .sr(1)
19777 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019778 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019779 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019780 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019781 }
19782 }
19783 }
19784
Marat Dukhande06f492020-04-09 00:19:31 -070019785 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019786 TEST_REQUIRES_X86_AVX512F;
19787 for (uint32_t n = 17; n < 32; n++) {
19788 for (size_t k = 1; k <= 5; k += 2) {
19789 GemmMicrokernelTester()
19790 .mr(6)
19791 .nr(16)
19792 .kr(1)
19793 .sr(1)
19794 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019795 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019796 .k(k)
19797 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019798 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019799 }
19800 }
19801 }
19802
Marat Dukhande06f492020-04-09 00:19:31 -070019803 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019804 TEST_REQUIRES_X86_AVX512F;
19805 for (uint32_t n = 17; n < 32; n++) {
19806 for (size_t k = 1; k <= 5; k += 2) {
19807 GemmMicrokernelTester()
19808 .mr(6)
19809 .nr(16)
19810 .kr(1)
19811 .sr(1)
19812 .m(6)
19813 .n(n)
19814 .k(k)
19815 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019816 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019817 }
19818 }
19819 }
19820
Marat Dukhande06f492020-04-09 00:19:31 -070019821 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019822 TEST_REQUIRES_X86_AVX512F;
19823 for (uint32_t n = 17; n < 32; n++) {
19824 for (size_t k = 1; k <= 5; k += 2) {
19825 for (uint32_t m = 1; m <= 6; m++) {
19826 GemmMicrokernelTester()
19827 .mr(6)
19828 .nr(16)
19829 .kr(1)
19830 .sr(1)
19831 .m(m)
19832 .n(n)
19833 .k(k)
19834 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019835 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019836 }
19837 }
19838 }
19839 }
19840
Marat Dukhande06f492020-04-09 00:19:31 -070019841 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019842 TEST_REQUIRES_X86_AVX512F;
19843 for (uint32_t n = 32; n <= 48; n += 16) {
19844 for (size_t k = 1; k <= 5; k += 2) {
19845 GemmMicrokernelTester()
19846 .mr(6)
19847 .nr(16)
19848 .kr(1)
19849 .sr(1)
19850 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019851 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019852 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019853 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019854 }
19855 }
19856 }
19857
Marat Dukhande06f492020-04-09 00:19:31 -070019858 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019859 TEST_REQUIRES_X86_AVX512F;
19860 for (uint32_t n = 32; n <= 48; n += 16) {
19861 for (size_t k = 1; k <= 5; k += 2) {
19862 GemmMicrokernelTester()
19863 .mr(6)
19864 .nr(16)
19865 .kr(1)
19866 .sr(1)
19867 .m(6)
19868 .n(n)
19869 .k(k)
19870 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019871 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019872 }
19873 }
19874 }
19875
Marat Dukhande06f492020-04-09 00:19:31 -070019876 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019877 TEST_REQUIRES_X86_AVX512F;
19878 for (uint32_t n = 32; n <= 48; n += 16) {
19879 for (size_t k = 1; k <= 5; k += 2) {
19880 GemmMicrokernelTester()
19881 .mr(6)
19882 .nr(16)
19883 .kr(1)
19884 .sr(1)
19885 .m(6)
19886 .n(n)
19887 .k(k)
19888 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019889 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019890 }
19891 }
19892 }
19893
Marat Dukhande06f492020-04-09 00:19:31 -070019894 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019895 TEST_REQUIRES_X86_AVX512F;
19896 for (uint32_t n = 32; n <= 48; n += 16) {
19897 for (size_t k = 1; k <= 5; k += 2) {
19898 for (uint32_t m = 1; m <= 6; m++) {
19899 GemmMicrokernelTester()
19900 .mr(6)
19901 .nr(16)
19902 .kr(1)
19903 .sr(1)
19904 .m(m)
19905 .n(n)
19906 .k(k)
19907 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019908 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019909 }
19910 }
19911 }
19912 }
19913
Marat Dukhande06f492020-04-09 00:19:31 -070019914 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019915 TEST_REQUIRES_X86_AVX512F;
19916 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019917 for (uint32_t n = 1; n <= 16; n++) {
19918 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019919 GemmMicrokernelTester()
19920 .mr(6)
19921 .nr(16)
19922 .kr(1)
19923 .sr(1)
19924 .m(m)
19925 .n(n)
19926 .k(k)
19927 .cm_stride(19)
19928 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019929 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019930 }
19931 }
19932 }
19933 }
19934
Marat Dukhande06f492020-04-09 00:19:31 -070019935 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019936 TEST_REQUIRES_X86_AVX512F;
19937 GemmMicrokernelTester()
19938 .mr(6)
19939 .nr(16)
19940 .kr(1)
19941 .sr(1)
19942 .m(6)
19943 .n(16)
19944 .k(1)
19945 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019946 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019947 }
19948
Marat Dukhande06f492020-04-09 00:19:31 -070019949 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019950 TEST_REQUIRES_X86_AVX512F;
19951 GemmMicrokernelTester()
19952 .mr(6)
19953 .nr(16)
19954 .kr(1)
19955 .sr(1)
19956 .m(6)
19957 .n(16)
19958 .k(1)
19959 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019960 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019961 }
19962
Marat Dukhande06f492020-04-09 00:19:31 -070019963 TEST(F32_GEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019964 TEST_REQUIRES_X86_AVX512F;
19965 GemmMicrokernelTester()
19966 .mr(6)
19967 .nr(16)
19968 .kr(1)
19969 .sr(1)
19970 .m(6)
19971 .n(16)
19972 .k(1)
19973 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019974 .Test(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019975 }
19976#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19977
19978
19979#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019980 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019981 TEST_REQUIRES_X86_AVX512F;
19982 GemmMicrokernelTester()
19983 .mr(7)
19984 .nr(16)
19985 .kr(1)
19986 .sr(1)
19987 .m(7)
19988 .n(16)
19989 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019990 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019991 }
19992
Marat Dukhande06f492020-04-09 00:19:31 -070019993 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019994 TEST_REQUIRES_X86_AVX512F;
19995 GemmMicrokernelTester()
19996 .mr(7)
19997 .nr(16)
19998 .kr(1)
19999 .sr(1)
20000 .m(7)
20001 .n(16)
20002 .k(1)
20003 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020004 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020005 }
20006
Marat Dukhande06f492020-04-09 00:19:31 -070020007 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020008 TEST_REQUIRES_X86_AVX512F;
20009 GemmMicrokernelTester()
20010 .mr(7)
20011 .nr(16)
20012 .kr(1)
20013 .sr(1)
20014 .m(7)
20015 .n(16)
20016 .k(1)
20017 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020018 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020019 }
20020
Marat Dukhande06f492020-04-09 00:19:31 -070020021 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020022 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020023 for (uint32_t n = 1; n <= 16; n++) {
20024 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020025 GemmMicrokernelTester()
20026 .mr(7)
20027 .nr(16)
20028 .kr(1)
20029 .sr(1)
20030 .m(m)
20031 .n(n)
20032 .k(1)
20033 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020034 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020035 }
20036 }
20037 }
20038
Marat Dukhande06f492020-04-09 00:19:31 -070020039 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020040 TEST_REQUIRES_X86_AVX512F;
20041 for (uint32_t m = 1; m <= 7; m++) {
20042 GemmMicrokernelTester()
20043 .mr(7)
20044 .nr(16)
20045 .kr(1)
20046 .sr(1)
20047 .m(m)
20048 .n(16)
20049 .k(1)
20050 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020051 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020052 }
20053 }
20054
Marat Dukhande06f492020-04-09 00:19:31 -070020055 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020056 TEST_REQUIRES_X86_AVX512F;
20057 for (uint32_t n = 1; n <= 16; n++) {
20058 GemmMicrokernelTester()
20059 .mr(7)
20060 .nr(16)
20061 .kr(1)
20062 .sr(1)
20063 .m(7)
20064 .n(n)
20065 .k(1)
20066 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020067 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020068 }
20069 }
20070
Marat Dukhande06f492020-04-09 00:19:31 -070020071 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020072 TEST_REQUIRES_X86_AVX512F;
20073 for (size_t k = 2; k < 10; k++) {
20074 GemmMicrokernelTester()
20075 .mr(7)
20076 .nr(16)
20077 .kr(1)
20078 .sr(1)
20079 .m(7)
20080 .n(16)
20081 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020082 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020083 }
20084 }
20085
Marat Dukhande06f492020-04-09 00:19:31 -070020086 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020087 TEST_REQUIRES_X86_AVX512F;
20088 for (size_t k = 2; k < 10; k++) {
20089 GemmMicrokernelTester()
20090 .mr(7)
20091 .nr(16)
20092 .kr(1)
20093 .sr(1)
20094 .m(7)
20095 .n(16)
20096 .k(k)
20097 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020098 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020099 }
20100 }
20101
Marat Dukhande06f492020-04-09 00:19:31 -070020102 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020103 TEST_REQUIRES_X86_AVX512F;
20104 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020105 for (uint32_t n = 1; n <= 16; n++) {
20106 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020107 GemmMicrokernelTester()
20108 .mr(7)
20109 .nr(16)
20110 .kr(1)
20111 .sr(1)
20112 .m(m)
20113 .n(n)
20114 .k(k)
20115 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020116 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020117 }
20118 }
20119 }
20120 }
20121
Marat Dukhande06f492020-04-09 00:19:31 -070020122 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020123 TEST_REQUIRES_X86_AVX512F;
20124 for (uint32_t n = 17; n < 32; n++) {
20125 for (size_t k = 1; k <= 5; k += 2) {
20126 GemmMicrokernelTester()
20127 .mr(7)
20128 .nr(16)
20129 .kr(1)
20130 .sr(1)
20131 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020132 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020133 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020134 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020135 }
20136 }
20137 }
20138
Marat Dukhande06f492020-04-09 00:19:31 -070020139 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020140 TEST_REQUIRES_X86_AVX512F;
20141 for (uint32_t n = 17; n < 32; n++) {
20142 for (size_t k = 1; k <= 5; k += 2) {
20143 GemmMicrokernelTester()
20144 .mr(7)
20145 .nr(16)
20146 .kr(1)
20147 .sr(1)
20148 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020149 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020150 .k(k)
20151 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020152 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020153 }
20154 }
20155 }
20156
Marat Dukhande06f492020-04-09 00:19:31 -070020157 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020158 TEST_REQUIRES_X86_AVX512F;
20159 for (uint32_t n = 17; n < 32; n++) {
20160 for (size_t k = 1; k <= 5; k += 2) {
20161 GemmMicrokernelTester()
20162 .mr(7)
20163 .nr(16)
20164 .kr(1)
20165 .sr(1)
20166 .m(7)
20167 .n(n)
20168 .k(k)
20169 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020170 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020171 }
20172 }
20173 }
20174
Marat Dukhande06f492020-04-09 00:19:31 -070020175 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020176 TEST_REQUIRES_X86_AVX512F;
20177 for (uint32_t n = 17; n < 32; n++) {
20178 for (size_t k = 1; k <= 5; k += 2) {
20179 for (uint32_t m = 1; m <= 7; m++) {
20180 GemmMicrokernelTester()
20181 .mr(7)
20182 .nr(16)
20183 .kr(1)
20184 .sr(1)
20185 .m(m)
20186 .n(n)
20187 .k(k)
20188 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020189 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020190 }
20191 }
20192 }
20193 }
20194
Marat Dukhande06f492020-04-09 00:19:31 -070020195 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020196 TEST_REQUIRES_X86_AVX512F;
20197 for (uint32_t n = 32; n <= 48; n += 16) {
20198 for (size_t k = 1; k <= 5; k += 2) {
20199 GemmMicrokernelTester()
20200 .mr(7)
20201 .nr(16)
20202 .kr(1)
20203 .sr(1)
20204 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020205 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020206 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020207 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020208 }
20209 }
20210 }
20211
Marat Dukhande06f492020-04-09 00:19:31 -070020212 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020213 TEST_REQUIRES_X86_AVX512F;
20214 for (uint32_t n = 32; n <= 48; n += 16) {
20215 for (size_t k = 1; k <= 5; k += 2) {
20216 GemmMicrokernelTester()
20217 .mr(7)
20218 .nr(16)
20219 .kr(1)
20220 .sr(1)
20221 .m(7)
20222 .n(n)
20223 .k(k)
20224 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020225 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020226 }
20227 }
20228 }
20229
Marat Dukhande06f492020-04-09 00:19:31 -070020230 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020231 TEST_REQUIRES_X86_AVX512F;
20232 for (uint32_t n = 32; n <= 48; n += 16) {
20233 for (size_t k = 1; k <= 5; k += 2) {
20234 GemmMicrokernelTester()
20235 .mr(7)
20236 .nr(16)
20237 .kr(1)
20238 .sr(1)
20239 .m(7)
20240 .n(n)
20241 .k(k)
20242 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020243 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020244 }
20245 }
20246 }
20247
Marat Dukhande06f492020-04-09 00:19:31 -070020248 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020249 TEST_REQUIRES_X86_AVX512F;
20250 for (uint32_t n = 32; n <= 48; n += 16) {
20251 for (size_t k = 1; k <= 5; k += 2) {
20252 for (uint32_t m = 1; m <= 7; m++) {
20253 GemmMicrokernelTester()
20254 .mr(7)
20255 .nr(16)
20256 .kr(1)
20257 .sr(1)
20258 .m(m)
20259 .n(n)
20260 .k(k)
20261 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020262 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020263 }
20264 }
20265 }
20266 }
20267
Marat Dukhande06f492020-04-09 00:19:31 -070020268 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020269 TEST_REQUIRES_X86_AVX512F;
20270 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020271 for (uint32_t n = 1; n <= 16; n++) {
20272 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020273 GemmMicrokernelTester()
20274 .mr(7)
20275 .nr(16)
20276 .kr(1)
20277 .sr(1)
20278 .m(m)
20279 .n(n)
20280 .k(k)
20281 .cm_stride(19)
20282 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020283 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020284 }
20285 }
20286 }
20287 }
20288
Marat Dukhande06f492020-04-09 00:19:31 -070020289 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020290 TEST_REQUIRES_X86_AVX512F;
20291 GemmMicrokernelTester()
20292 .mr(7)
20293 .nr(16)
20294 .kr(1)
20295 .sr(1)
20296 .m(7)
20297 .n(16)
20298 .k(1)
20299 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020300 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020301 }
20302
Marat Dukhande06f492020-04-09 00:19:31 -070020303 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020304 TEST_REQUIRES_X86_AVX512F;
20305 GemmMicrokernelTester()
20306 .mr(7)
20307 .nr(16)
20308 .kr(1)
20309 .sr(1)
20310 .m(7)
20311 .n(16)
20312 .k(1)
20313 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020314 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020315 }
20316
Marat Dukhande06f492020-04-09 00:19:31 -070020317 TEST(F32_GEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020318 TEST_REQUIRES_X86_AVX512F;
20319 GemmMicrokernelTester()
20320 .mr(7)
20321 .nr(16)
20322 .kr(1)
20323 .sr(1)
20324 .m(7)
20325 .n(16)
20326 .k(1)
20327 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020328 .Test(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020329 }
20330#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20331
20332
20333#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020334 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020335 TEST_REQUIRES_X86_AVX512F;
20336 GemmMicrokernelTester()
20337 .mr(8)
20338 .nr(16)
20339 .kr(1)
20340 .sr(1)
20341 .m(8)
20342 .n(16)
20343 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020344 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020345 }
20346
Marat Dukhande06f492020-04-09 00:19:31 -070020347 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020348 TEST_REQUIRES_X86_AVX512F;
20349 GemmMicrokernelTester()
20350 .mr(8)
20351 .nr(16)
20352 .kr(1)
20353 .sr(1)
20354 .m(8)
20355 .n(16)
20356 .k(1)
20357 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020358 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020359 }
20360
Marat Dukhande06f492020-04-09 00:19:31 -070020361 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020362 TEST_REQUIRES_X86_AVX512F;
20363 GemmMicrokernelTester()
20364 .mr(8)
20365 .nr(16)
20366 .kr(1)
20367 .sr(1)
20368 .m(8)
20369 .n(16)
20370 .k(1)
20371 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020372 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020373 }
20374
Marat Dukhande06f492020-04-09 00:19:31 -070020375 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020376 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020377 for (uint32_t n = 1; n <= 16; n++) {
20378 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020379 GemmMicrokernelTester()
20380 .mr(8)
20381 .nr(16)
20382 .kr(1)
20383 .sr(1)
20384 .m(m)
20385 .n(n)
20386 .k(1)
20387 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020388 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020389 }
20390 }
20391 }
20392
Marat Dukhande06f492020-04-09 00:19:31 -070020393 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020394 TEST_REQUIRES_X86_AVX512F;
20395 for (uint32_t m = 1; m <= 8; m++) {
20396 GemmMicrokernelTester()
20397 .mr(8)
20398 .nr(16)
20399 .kr(1)
20400 .sr(1)
20401 .m(m)
20402 .n(16)
20403 .k(1)
20404 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020405 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020406 }
20407 }
20408
Marat Dukhande06f492020-04-09 00:19:31 -070020409 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020410 TEST_REQUIRES_X86_AVX512F;
20411 for (uint32_t n = 1; n <= 16; n++) {
20412 GemmMicrokernelTester()
20413 .mr(8)
20414 .nr(16)
20415 .kr(1)
20416 .sr(1)
20417 .m(8)
20418 .n(n)
20419 .k(1)
20420 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020421 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020422 }
20423 }
20424
Marat Dukhande06f492020-04-09 00:19:31 -070020425 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020426 TEST_REQUIRES_X86_AVX512F;
20427 for (size_t k = 2; k < 10; k++) {
20428 GemmMicrokernelTester()
20429 .mr(8)
20430 .nr(16)
20431 .kr(1)
20432 .sr(1)
20433 .m(8)
20434 .n(16)
20435 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020436 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020437 }
20438 }
20439
Marat Dukhande06f492020-04-09 00:19:31 -070020440 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020441 TEST_REQUIRES_X86_AVX512F;
20442 for (size_t k = 2; k < 10; k++) {
20443 GemmMicrokernelTester()
20444 .mr(8)
20445 .nr(16)
20446 .kr(1)
20447 .sr(1)
20448 .m(8)
20449 .n(16)
20450 .k(k)
20451 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020452 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020453 }
20454 }
20455
Marat Dukhande06f492020-04-09 00:19:31 -070020456 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020457 TEST_REQUIRES_X86_AVX512F;
20458 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020459 for (uint32_t n = 1; n <= 16; n++) {
20460 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020461 GemmMicrokernelTester()
20462 .mr(8)
20463 .nr(16)
20464 .kr(1)
20465 .sr(1)
20466 .m(m)
20467 .n(n)
20468 .k(k)
20469 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020470 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020471 }
20472 }
20473 }
20474 }
20475
Marat Dukhande06f492020-04-09 00:19:31 -070020476 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020477 TEST_REQUIRES_X86_AVX512F;
20478 for (uint32_t n = 17; n < 32; n++) {
20479 for (size_t k = 1; k <= 5; k += 2) {
20480 GemmMicrokernelTester()
20481 .mr(8)
20482 .nr(16)
20483 .kr(1)
20484 .sr(1)
20485 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020486 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020487 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020488 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020489 }
20490 }
20491 }
20492
Marat Dukhande06f492020-04-09 00:19:31 -070020493 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020494 TEST_REQUIRES_X86_AVX512F;
20495 for (uint32_t n = 17; n < 32; n++) {
20496 for (size_t k = 1; k <= 5; k += 2) {
20497 GemmMicrokernelTester()
20498 .mr(8)
20499 .nr(16)
20500 .kr(1)
20501 .sr(1)
20502 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020503 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020504 .k(k)
20505 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020506 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020507 }
20508 }
20509 }
20510
Marat Dukhande06f492020-04-09 00:19:31 -070020511 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020512 TEST_REQUIRES_X86_AVX512F;
20513 for (uint32_t n = 17; n < 32; n++) {
20514 for (size_t k = 1; k <= 5; k += 2) {
20515 GemmMicrokernelTester()
20516 .mr(8)
20517 .nr(16)
20518 .kr(1)
20519 .sr(1)
20520 .m(8)
20521 .n(n)
20522 .k(k)
20523 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020524 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020525 }
20526 }
20527 }
20528
Marat Dukhande06f492020-04-09 00:19:31 -070020529 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020530 TEST_REQUIRES_X86_AVX512F;
20531 for (uint32_t n = 17; n < 32; n++) {
20532 for (size_t k = 1; k <= 5; k += 2) {
20533 for (uint32_t m = 1; m <= 8; m++) {
20534 GemmMicrokernelTester()
20535 .mr(8)
20536 .nr(16)
20537 .kr(1)
20538 .sr(1)
20539 .m(m)
20540 .n(n)
20541 .k(k)
20542 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020543 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020544 }
20545 }
20546 }
20547 }
20548
Marat Dukhande06f492020-04-09 00:19:31 -070020549 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020550 TEST_REQUIRES_X86_AVX512F;
20551 for (uint32_t n = 32; n <= 48; n += 16) {
20552 for (size_t k = 1; k <= 5; k += 2) {
20553 GemmMicrokernelTester()
20554 .mr(8)
20555 .nr(16)
20556 .kr(1)
20557 .sr(1)
20558 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020559 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020560 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020561 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020562 }
20563 }
20564 }
20565
Marat Dukhande06f492020-04-09 00:19:31 -070020566 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020567 TEST_REQUIRES_X86_AVX512F;
20568 for (uint32_t n = 32; n <= 48; n += 16) {
20569 for (size_t k = 1; k <= 5; k += 2) {
20570 GemmMicrokernelTester()
20571 .mr(8)
20572 .nr(16)
20573 .kr(1)
20574 .sr(1)
20575 .m(8)
20576 .n(n)
20577 .k(k)
20578 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020579 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020580 }
20581 }
20582 }
20583
Marat Dukhande06f492020-04-09 00:19:31 -070020584 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020585 TEST_REQUIRES_X86_AVX512F;
20586 for (uint32_t n = 32; n <= 48; n += 16) {
20587 for (size_t k = 1; k <= 5; k += 2) {
20588 GemmMicrokernelTester()
20589 .mr(8)
20590 .nr(16)
20591 .kr(1)
20592 .sr(1)
20593 .m(8)
20594 .n(n)
20595 .k(k)
20596 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020597 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020598 }
20599 }
20600 }
20601
Marat Dukhande06f492020-04-09 00:19:31 -070020602 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020603 TEST_REQUIRES_X86_AVX512F;
20604 for (uint32_t n = 32; n <= 48; n += 16) {
20605 for (size_t k = 1; k <= 5; k += 2) {
20606 for (uint32_t m = 1; m <= 8; m++) {
20607 GemmMicrokernelTester()
20608 .mr(8)
20609 .nr(16)
20610 .kr(1)
20611 .sr(1)
20612 .m(m)
20613 .n(n)
20614 .k(k)
20615 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020616 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020617 }
20618 }
20619 }
20620 }
20621
Marat Dukhande06f492020-04-09 00:19:31 -070020622 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020623 TEST_REQUIRES_X86_AVX512F;
20624 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020625 for (uint32_t n = 1; n <= 16; n++) {
20626 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020627 GemmMicrokernelTester()
20628 .mr(8)
20629 .nr(16)
20630 .kr(1)
20631 .sr(1)
20632 .m(m)
20633 .n(n)
20634 .k(k)
20635 .cm_stride(19)
20636 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020637 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020638 }
20639 }
20640 }
20641 }
20642
Marat Dukhande06f492020-04-09 00:19:31 -070020643 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020644 TEST_REQUIRES_X86_AVX512F;
20645 GemmMicrokernelTester()
20646 .mr(8)
20647 .nr(16)
20648 .kr(1)
20649 .sr(1)
20650 .m(8)
20651 .n(16)
20652 .k(1)
20653 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020654 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020655 }
20656
Marat Dukhande06f492020-04-09 00:19:31 -070020657 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020658 TEST_REQUIRES_X86_AVX512F;
20659 GemmMicrokernelTester()
20660 .mr(8)
20661 .nr(16)
20662 .kr(1)
20663 .sr(1)
20664 .m(8)
20665 .n(16)
20666 .k(1)
20667 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020668 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020669 }
20670
Marat Dukhande06f492020-04-09 00:19:31 -070020671 TEST(F32_GEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020672 TEST_REQUIRES_X86_AVX512F;
20673 GemmMicrokernelTester()
20674 .mr(8)
20675 .nr(16)
20676 .kr(1)
20677 .sr(1)
20678 .m(8)
20679 .n(16)
20680 .k(1)
20681 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020682 .Test(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020683 }
20684#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20685
20686
Marat Dukhan4c617792021-12-21 15:47:58 -080020687#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080020688 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020689 GemmMicrokernelTester()
20690 .mr(5)
20691 .nr(8)
20692 .kr(1)
20693 .sr(1)
20694 .m(5)
20695 .n(8)
20696 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020697 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020698 }
20699
Frank Barchard0725b8d2020-12-07 11:07:35 -080020700 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020701 GemmMicrokernelTester()
20702 .mr(5)
20703 .nr(8)
20704 .kr(1)
20705 .sr(1)
20706 .m(5)
20707 .n(8)
20708 .k(1)
20709 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020710 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020711 }
20712
Frank Barchard0725b8d2020-12-07 11:07:35 -080020713 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020714 GemmMicrokernelTester()
20715 .mr(5)
20716 .nr(8)
20717 .kr(1)
20718 .sr(1)
20719 .m(5)
20720 .n(8)
20721 .k(1)
20722 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020723 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020724 }
20725
Frank Barchard0725b8d2020-12-07 11:07:35 -080020726 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020727 for (uint32_t n = 1; n <= 8; n++) {
20728 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020729 GemmMicrokernelTester()
20730 .mr(5)
20731 .nr(8)
20732 .kr(1)
20733 .sr(1)
20734 .m(m)
20735 .n(n)
20736 .k(1)
20737 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020738 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020739 }
20740 }
20741 }
20742
Frank Barchard0725b8d2020-12-07 11:07:35 -080020743 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020744 for (uint32_t m = 1; m <= 5; m++) {
20745 GemmMicrokernelTester()
20746 .mr(5)
20747 .nr(8)
20748 .kr(1)
20749 .sr(1)
20750 .m(m)
20751 .n(8)
20752 .k(1)
20753 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020754 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020755 }
20756 }
20757
Frank Barchard0725b8d2020-12-07 11:07:35 -080020758 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020759 for (uint32_t n = 1; n <= 8; n++) {
20760 GemmMicrokernelTester()
20761 .mr(5)
20762 .nr(8)
20763 .kr(1)
20764 .sr(1)
20765 .m(5)
20766 .n(n)
20767 .k(1)
20768 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020769 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020770 }
20771 }
20772
Frank Barchard0725b8d2020-12-07 11:07:35 -080020773 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020774 for (size_t k = 2; k < 10; k++) {
20775 GemmMicrokernelTester()
20776 .mr(5)
20777 .nr(8)
20778 .kr(1)
20779 .sr(1)
20780 .m(5)
20781 .n(8)
20782 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020783 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020784 }
20785 }
20786
Frank Barchard0725b8d2020-12-07 11:07:35 -080020787 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020788 for (size_t k = 2; k < 10; k++) {
20789 GemmMicrokernelTester()
20790 .mr(5)
20791 .nr(8)
20792 .kr(1)
20793 .sr(1)
20794 .m(5)
20795 .n(8)
20796 .k(k)
20797 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020798 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020799 }
20800 }
20801
Frank Barchard0725b8d2020-12-07 11:07:35 -080020802 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020803 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020804 for (uint32_t n = 1; n <= 8; n++) {
20805 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020806 GemmMicrokernelTester()
20807 .mr(5)
20808 .nr(8)
20809 .kr(1)
20810 .sr(1)
20811 .m(m)
20812 .n(n)
20813 .k(k)
20814 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020815 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020816 }
20817 }
20818 }
20819 }
20820
Frank Barchard0725b8d2020-12-07 11:07:35 -080020821 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020822 for (uint32_t n = 9; n < 16; n++) {
20823 for (size_t k = 1; k <= 5; k += 2) {
20824 GemmMicrokernelTester()
20825 .mr(5)
20826 .nr(8)
20827 .kr(1)
20828 .sr(1)
20829 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020830 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020831 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020832 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020833 }
20834 }
20835 }
20836
Frank Barchard0725b8d2020-12-07 11:07:35 -080020837 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020838 for (uint32_t n = 9; n < 16; n++) {
20839 for (size_t k = 1; k <= 5; k += 2) {
20840 GemmMicrokernelTester()
20841 .mr(5)
20842 .nr(8)
20843 .kr(1)
20844 .sr(1)
20845 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020846 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020847 .k(k)
20848 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020849 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020850 }
20851 }
20852 }
20853
Frank Barchard0725b8d2020-12-07 11:07:35 -080020854 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020855 for (uint32_t n = 9; n < 16; n++) {
20856 for (size_t k = 1; k <= 5; k += 2) {
20857 GemmMicrokernelTester()
20858 .mr(5)
20859 .nr(8)
20860 .kr(1)
20861 .sr(1)
20862 .m(5)
20863 .n(n)
20864 .k(k)
20865 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020866 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020867 }
20868 }
20869 }
20870
Frank Barchard0725b8d2020-12-07 11:07:35 -080020871 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020872 for (uint32_t n = 9; n < 16; n++) {
20873 for (size_t k = 1; k <= 5; k += 2) {
20874 for (uint32_t m = 1; m <= 5; m++) {
20875 GemmMicrokernelTester()
20876 .mr(5)
20877 .nr(8)
20878 .kr(1)
20879 .sr(1)
20880 .m(m)
20881 .n(n)
20882 .k(k)
20883 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020884 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020885 }
20886 }
20887 }
20888 }
20889
Frank Barchard0725b8d2020-12-07 11:07:35 -080020890 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020891 for (uint32_t n = 16; n <= 24; n += 8) {
20892 for (size_t k = 1; k <= 5; k += 2) {
20893 GemmMicrokernelTester()
20894 .mr(5)
20895 .nr(8)
20896 .kr(1)
20897 .sr(1)
20898 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020899 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020900 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020901 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020902 }
20903 }
20904 }
20905
Frank Barchard0725b8d2020-12-07 11:07:35 -080020906 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020907 for (uint32_t n = 16; n <= 24; n += 8) {
20908 for (size_t k = 1; k <= 5; k += 2) {
20909 GemmMicrokernelTester()
20910 .mr(5)
20911 .nr(8)
20912 .kr(1)
20913 .sr(1)
20914 .m(5)
20915 .n(n)
20916 .k(k)
20917 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020918 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020919 }
20920 }
20921 }
20922
Frank Barchard0725b8d2020-12-07 11:07:35 -080020923 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020924 for (uint32_t n = 16; n <= 24; n += 8) {
20925 for (size_t k = 1; k <= 5; k += 2) {
20926 GemmMicrokernelTester()
20927 .mr(5)
20928 .nr(8)
20929 .kr(1)
20930 .sr(1)
20931 .m(5)
20932 .n(n)
20933 .k(k)
20934 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020935 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020936 }
20937 }
20938 }
20939
Frank Barchard0725b8d2020-12-07 11:07:35 -080020940 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020941 for (uint32_t n = 16; n <= 24; n += 8) {
20942 for (size_t k = 1; k <= 5; k += 2) {
20943 for (uint32_t m = 1; m <= 5; m++) {
20944 GemmMicrokernelTester()
20945 .mr(5)
20946 .nr(8)
20947 .kr(1)
20948 .sr(1)
20949 .m(m)
20950 .n(n)
20951 .k(k)
20952 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020953 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020954 }
20955 }
20956 }
20957 }
20958
Frank Barchard0725b8d2020-12-07 11:07:35 -080020959 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020960 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020961 for (uint32_t n = 1; n <= 8; n++) {
20962 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020963 GemmMicrokernelTester()
20964 .mr(5)
20965 .nr(8)
20966 .kr(1)
20967 .sr(1)
20968 .m(m)
20969 .n(n)
20970 .k(k)
20971 .cm_stride(11)
20972 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020973 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020974 }
20975 }
20976 }
20977 }
20978
Frank Barchard0725b8d2020-12-07 11:07:35 -080020979 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020980 GemmMicrokernelTester()
20981 .mr(5)
20982 .nr(8)
20983 .kr(1)
20984 .sr(1)
20985 .m(5)
20986 .n(8)
20987 .k(1)
20988 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080020989 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020990 }
20991
Frank Barchard0725b8d2020-12-07 11:07:35 -080020992 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070020993 GemmMicrokernelTester()
20994 .mr(5)
20995 .nr(8)
20996 .kr(1)
20997 .sr(1)
20998 .m(5)
20999 .n(8)
21000 .k(1)
21001 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021002 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021003 }
21004
Frank Barchard0725b8d2020-12-07 11:07:35 -080021005 TEST(F32_GEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021006 GemmMicrokernelTester()
21007 .mr(5)
21008 .nr(8)
21009 .kr(1)
21010 .sr(1)
21011 .m(5)
21012 .n(8)
21013 .k(1)
21014 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021015 .Test(xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021016 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021017#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021018
21019
Marat Dukhan4c617792021-12-21 15:47:58 -080021020#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080021021 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021022 GemmMicrokernelTester()
21023 .mr(1)
21024 .nr(8)
21025 .kr(1)
21026 .sr(1)
21027 .m(1)
21028 .n(8)
21029 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021030 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021031 }
21032
Frank Barchard0725b8d2020-12-07 11:07:35 -080021033 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021034 GemmMicrokernelTester()
21035 .mr(1)
21036 .nr(8)
21037 .kr(1)
21038 .sr(1)
21039 .m(1)
21040 .n(8)
21041 .k(1)
21042 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021043 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021044 }
21045
Frank Barchard0725b8d2020-12-07 11:07:35 -080021046 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021047 GemmMicrokernelTester()
21048 .mr(1)
21049 .nr(8)
21050 .kr(1)
21051 .sr(1)
21052 .m(1)
21053 .n(8)
21054 .k(1)
21055 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021056 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021057 }
21058
Frank Barchard0725b8d2020-12-07 11:07:35 -080021059 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021060 for (uint32_t n = 1; n <= 8; n++) {
21061 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021062 GemmMicrokernelTester()
21063 .mr(1)
21064 .nr(8)
21065 .kr(1)
21066 .sr(1)
21067 .m(m)
21068 .n(n)
21069 .k(1)
21070 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021071 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021072 }
21073 }
21074 }
21075
Frank Barchard0725b8d2020-12-07 11:07:35 -080021076 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021077 for (uint32_t m = 1; m <= 1; m++) {
21078 GemmMicrokernelTester()
21079 .mr(1)
21080 .nr(8)
21081 .kr(1)
21082 .sr(1)
21083 .m(m)
21084 .n(8)
21085 .k(1)
21086 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021087 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021088 }
21089 }
21090
Frank Barchard0725b8d2020-12-07 11:07:35 -080021091 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021092 for (uint32_t n = 1; n <= 8; n++) {
21093 GemmMicrokernelTester()
21094 .mr(1)
21095 .nr(8)
21096 .kr(1)
21097 .sr(1)
21098 .m(1)
21099 .n(n)
21100 .k(1)
21101 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021102 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021103 }
21104 }
21105
Frank Barchard0725b8d2020-12-07 11:07:35 -080021106 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021107 for (size_t k = 2; k < 10; k++) {
21108 GemmMicrokernelTester()
21109 .mr(1)
21110 .nr(8)
21111 .kr(1)
21112 .sr(1)
21113 .m(1)
21114 .n(8)
21115 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021116 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021117 }
21118 }
21119
Frank Barchard0725b8d2020-12-07 11:07:35 -080021120 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021121 for (size_t k = 2; k < 10; k++) {
21122 GemmMicrokernelTester()
21123 .mr(1)
21124 .nr(8)
21125 .kr(1)
21126 .sr(1)
21127 .m(1)
21128 .n(8)
21129 .k(k)
21130 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021131 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021132 }
21133 }
21134
Frank Barchard0725b8d2020-12-07 11:07:35 -080021135 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021136 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021137 for (uint32_t n = 1; n <= 8; n++) {
21138 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021139 GemmMicrokernelTester()
21140 .mr(1)
21141 .nr(8)
21142 .kr(1)
21143 .sr(1)
21144 .m(m)
21145 .n(n)
21146 .k(k)
21147 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021148 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021149 }
21150 }
21151 }
21152 }
21153
Frank Barchard0725b8d2020-12-07 11:07:35 -080021154 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021155 for (uint32_t n = 9; n < 16; n++) {
21156 for (size_t k = 1; k <= 5; k += 2) {
21157 GemmMicrokernelTester()
21158 .mr(1)
21159 .nr(8)
21160 .kr(1)
21161 .sr(1)
21162 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021163 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021164 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021165 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021166 }
21167 }
21168 }
21169
Frank Barchard0725b8d2020-12-07 11:07:35 -080021170 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021171 for (uint32_t n = 9; n < 16; n++) {
21172 for (size_t k = 1; k <= 5; k += 2) {
21173 GemmMicrokernelTester()
21174 .mr(1)
21175 .nr(8)
21176 .kr(1)
21177 .sr(1)
21178 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021179 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021180 .k(k)
21181 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021182 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021183 }
21184 }
21185 }
21186
Frank Barchard0725b8d2020-12-07 11:07:35 -080021187 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021188 for (uint32_t n = 9; n < 16; n++) {
21189 for (size_t k = 1; k <= 5; k += 2) {
21190 GemmMicrokernelTester()
21191 .mr(1)
21192 .nr(8)
21193 .kr(1)
21194 .sr(1)
21195 .m(1)
21196 .n(n)
21197 .k(k)
21198 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021199 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021200 }
21201 }
21202 }
21203
Frank Barchard0725b8d2020-12-07 11:07:35 -080021204 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021205 for (uint32_t n = 9; n < 16; n++) {
21206 for (size_t k = 1; k <= 5; k += 2) {
21207 for (uint32_t m = 1; m <= 1; m++) {
21208 GemmMicrokernelTester()
21209 .mr(1)
21210 .nr(8)
21211 .kr(1)
21212 .sr(1)
21213 .m(m)
21214 .n(n)
21215 .k(k)
21216 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021217 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021218 }
21219 }
21220 }
21221 }
21222
Frank Barchard0725b8d2020-12-07 11:07:35 -080021223 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021224 for (uint32_t n = 16; n <= 24; n += 8) {
21225 for (size_t k = 1; k <= 5; k += 2) {
21226 GemmMicrokernelTester()
21227 .mr(1)
21228 .nr(8)
21229 .kr(1)
21230 .sr(1)
21231 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021232 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021233 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021234 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021235 }
21236 }
21237 }
21238
Frank Barchard0725b8d2020-12-07 11:07:35 -080021239 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021240 for (uint32_t n = 16; n <= 24; n += 8) {
21241 for (size_t k = 1; k <= 5; k += 2) {
21242 GemmMicrokernelTester()
21243 .mr(1)
21244 .nr(8)
21245 .kr(1)
21246 .sr(1)
21247 .m(1)
21248 .n(n)
21249 .k(k)
21250 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021251 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021252 }
21253 }
21254 }
21255
Frank Barchard0725b8d2020-12-07 11:07:35 -080021256 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021257 for (uint32_t n = 16; n <= 24; n += 8) {
21258 for (size_t k = 1; k <= 5; k += 2) {
21259 GemmMicrokernelTester()
21260 .mr(1)
21261 .nr(8)
21262 .kr(1)
21263 .sr(1)
21264 .m(1)
21265 .n(n)
21266 .k(k)
21267 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021268 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021269 }
21270 }
21271 }
21272
Frank Barchard0725b8d2020-12-07 11:07:35 -080021273 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021274 for (uint32_t n = 16; n <= 24; n += 8) {
21275 for (size_t k = 1; k <= 5; k += 2) {
21276 for (uint32_t m = 1; m <= 1; m++) {
21277 GemmMicrokernelTester()
21278 .mr(1)
21279 .nr(8)
21280 .kr(1)
21281 .sr(1)
21282 .m(m)
21283 .n(n)
21284 .k(k)
21285 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021286 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021287 }
21288 }
21289 }
21290 }
21291
Frank Barchard0725b8d2020-12-07 11:07:35 -080021292 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021293 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021294 for (uint32_t n = 1; n <= 8; n++) {
21295 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021296 GemmMicrokernelTester()
21297 .mr(1)
21298 .nr(8)
21299 .kr(1)
21300 .sr(1)
21301 .m(m)
21302 .n(n)
21303 .k(k)
21304 .cm_stride(11)
21305 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021306 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021307 }
21308 }
21309 }
21310 }
21311
Frank Barchard0725b8d2020-12-07 11:07:35 -080021312 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021313 GemmMicrokernelTester()
21314 .mr(1)
21315 .nr(8)
21316 .kr(1)
21317 .sr(1)
21318 .m(1)
21319 .n(8)
21320 .k(1)
21321 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021322 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021323 }
21324
Frank Barchard0725b8d2020-12-07 11:07:35 -080021325 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021326 GemmMicrokernelTester()
21327 .mr(1)
21328 .nr(8)
21329 .kr(1)
21330 .sr(1)
21331 .m(1)
21332 .n(8)
21333 .k(1)
21334 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021335 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021336 }
21337
Frank Barchard0725b8d2020-12-07 11:07:35 -080021338 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021339 GemmMicrokernelTester()
21340 .mr(1)
21341 .nr(8)
21342 .kr(1)
21343 .sr(1)
21344 .m(1)
21345 .n(8)
21346 .k(1)
21347 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021348 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021349 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021350#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021351
21352
Marat Dukhan4c617792021-12-21 15:47:58 -080021353#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080021354 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021355 GemmMicrokernelTester()
21356 .mr(3)
21357 .nr(8)
21358 .kr(1)
21359 .sr(1)
21360 .m(3)
21361 .n(8)
21362 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021363 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021364 }
21365
Frank Barchard0725b8d2020-12-07 11:07:35 -080021366 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021367 GemmMicrokernelTester()
21368 .mr(3)
21369 .nr(8)
21370 .kr(1)
21371 .sr(1)
21372 .m(3)
21373 .n(8)
21374 .k(1)
21375 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021376 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021377 }
21378
Frank Barchard0725b8d2020-12-07 11:07:35 -080021379 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021380 GemmMicrokernelTester()
21381 .mr(3)
21382 .nr(8)
21383 .kr(1)
21384 .sr(1)
21385 .m(3)
21386 .n(8)
21387 .k(1)
21388 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021389 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021390 }
21391
Frank Barchard0725b8d2020-12-07 11:07:35 -080021392 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021393 for (uint32_t n = 1; n <= 8; n++) {
21394 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021395 GemmMicrokernelTester()
21396 .mr(3)
21397 .nr(8)
21398 .kr(1)
21399 .sr(1)
21400 .m(m)
21401 .n(n)
21402 .k(1)
21403 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021404 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021405 }
21406 }
21407 }
21408
Frank Barchard0725b8d2020-12-07 11:07:35 -080021409 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021410 for (uint32_t m = 1; m <= 3; m++) {
21411 GemmMicrokernelTester()
21412 .mr(3)
21413 .nr(8)
21414 .kr(1)
21415 .sr(1)
21416 .m(m)
21417 .n(8)
21418 .k(1)
21419 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021420 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021421 }
21422 }
21423
Frank Barchard0725b8d2020-12-07 11:07:35 -080021424 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021425 for (uint32_t n = 1; n <= 8; n++) {
21426 GemmMicrokernelTester()
21427 .mr(3)
21428 .nr(8)
21429 .kr(1)
21430 .sr(1)
21431 .m(3)
21432 .n(n)
21433 .k(1)
21434 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021435 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021436 }
21437 }
21438
Frank Barchard0725b8d2020-12-07 11:07:35 -080021439 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021440 for (size_t k = 2; k < 10; k++) {
21441 GemmMicrokernelTester()
21442 .mr(3)
21443 .nr(8)
21444 .kr(1)
21445 .sr(1)
21446 .m(3)
21447 .n(8)
21448 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021449 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021450 }
21451 }
21452
Frank Barchard0725b8d2020-12-07 11:07:35 -080021453 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021454 for (size_t k = 2; k < 10; k++) {
21455 GemmMicrokernelTester()
21456 .mr(3)
21457 .nr(8)
21458 .kr(1)
21459 .sr(1)
21460 .m(3)
21461 .n(8)
21462 .k(k)
21463 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021464 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021465 }
21466 }
21467
Frank Barchard0725b8d2020-12-07 11:07:35 -080021468 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021469 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021470 for (uint32_t n = 1; n <= 8; n++) {
21471 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021472 GemmMicrokernelTester()
21473 .mr(3)
21474 .nr(8)
21475 .kr(1)
21476 .sr(1)
21477 .m(m)
21478 .n(n)
21479 .k(k)
21480 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021481 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021482 }
21483 }
21484 }
21485 }
21486
Frank Barchard0725b8d2020-12-07 11:07:35 -080021487 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021488 for (uint32_t n = 9; n < 16; n++) {
21489 for (size_t k = 1; k <= 5; k += 2) {
21490 GemmMicrokernelTester()
21491 .mr(3)
21492 .nr(8)
21493 .kr(1)
21494 .sr(1)
21495 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021496 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021497 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021498 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021499 }
21500 }
21501 }
21502
Frank Barchard0725b8d2020-12-07 11:07:35 -080021503 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021504 for (uint32_t n = 9; n < 16; n++) {
21505 for (size_t k = 1; k <= 5; k += 2) {
21506 GemmMicrokernelTester()
21507 .mr(3)
21508 .nr(8)
21509 .kr(1)
21510 .sr(1)
21511 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021512 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021513 .k(k)
21514 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021515 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021516 }
21517 }
21518 }
21519
Frank Barchard0725b8d2020-12-07 11:07:35 -080021520 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021521 for (uint32_t n = 9; n < 16; n++) {
21522 for (size_t k = 1; k <= 5; k += 2) {
21523 GemmMicrokernelTester()
21524 .mr(3)
21525 .nr(8)
21526 .kr(1)
21527 .sr(1)
21528 .m(3)
21529 .n(n)
21530 .k(k)
21531 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021532 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021533 }
21534 }
21535 }
21536
Frank Barchard0725b8d2020-12-07 11:07:35 -080021537 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021538 for (uint32_t n = 9; n < 16; n++) {
21539 for (size_t k = 1; k <= 5; k += 2) {
21540 for (uint32_t m = 1; m <= 3; m++) {
21541 GemmMicrokernelTester()
21542 .mr(3)
21543 .nr(8)
21544 .kr(1)
21545 .sr(1)
21546 .m(m)
21547 .n(n)
21548 .k(k)
21549 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021550 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021551 }
21552 }
21553 }
21554 }
21555
Frank Barchard0725b8d2020-12-07 11:07:35 -080021556 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021557 for (uint32_t n = 16; n <= 24; n += 8) {
21558 for (size_t k = 1; k <= 5; k += 2) {
21559 GemmMicrokernelTester()
21560 .mr(3)
21561 .nr(8)
21562 .kr(1)
21563 .sr(1)
21564 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021565 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021566 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021567 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021568 }
21569 }
21570 }
21571
Frank Barchard0725b8d2020-12-07 11:07:35 -080021572 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021573 for (uint32_t n = 16; n <= 24; n += 8) {
21574 for (size_t k = 1; k <= 5; k += 2) {
21575 GemmMicrokernelTester()
21576 .mr(3)
21577 .nr(8)
21578 .kr(1)
21579 .sr(1)
21580 .m(3)
21581 .n(n)
21582 .k(k)
21583 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021584 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021585 }
21586 }
21587 }
21588
Frank Barchard0725b8d2020-12-07 11:07:35 -080021589 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021590 for (uint32_t n = 16; n <= 24; n += 8) {
21591 for (size_t k = 1; k <= 5; k += 2) {
21592 GemmMicrokernelTester()
21593 .mr(3)
21594 .nr(8)
21595 .kr(1)
21596 .sr(1)
21597 .m(3)
21598 .n(n)
21599 .k(k)
21600 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021601 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021602 }
21603 }
21604 }
21605
Frank Barchard0725b8d2020-12-07 11:07:35 -080021606 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021607 for (uint32_t n = 16; n <= 24; n += 8) {
21608 for (size_t k = 1; k <= 5; k += 2) {
21609 for (uint32_t m = 1; m <= 3; m++) {
21610 GemmMicrokernelTester()
21611 .mr(3)
21612 .nr(8)
21613 .kr(1)
21614 .sr(1)
21615 .m(m)
21616 .n(n)
21617 .k(k)
21618 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021619 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021620 }
21621 }
21622 }
21623 }
21624
Frank Barchard0725b8d2020-12-07 11:07:35 -080021625 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021626 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021627 for (uint32_t n = 1; n <= 8; n++) {
21628 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021629 GemmMicrokernelTester()
21630 .mr(3)
21631 .nr(8)
21632 .kr(1)
21633 .sr(1)
21634 .m(m)
21635 .n(n)
21636 .k(k)
21637 .cm_stride(11)
21638 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021639 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021640 }
21641 }
21642 }
21643 }
21644
Frank Barchard0725b8d2020-12-07 11:07:35 -080021645 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021646 GemmMicrokernelTester()
21647 .mr(3)
21648 .nr(8)
21649 .kr(1)
21650 .sr(1)
21651 .m(3)
21652 .n(8)
21653 .k(1)
21654 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021655 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021656 }
21657
Frank Barchard0725b8d2020-12-07 11:07:35 -080021658 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021659 GemmMicrokernelTester()
21660 .mr(3)
21661 .nr(8)
21662 .kr(1)
21663 .sr(1)
21664 .m(3)
21665 .n(8)
21666 .k(1)
21667 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021668 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021669 }
21670
Frank Barchard0725b8d2020-12-07 11:07:35 -080021671 TEST(F32_GEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021672 GemmMicrokernelTester()
21673 .mr(3)
21674 .nr(8)
21675 .kr(1)
21676 .sr(1)
21677 .m(3)
21678 .n(8)
21679 .k(1)
21680 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021681 .Test(xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021682 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021683#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021684
21685
Marat Dukhan4c617792021-12-21 15:47:58 -080021686#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080021687 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021688 GemmMicrokernelTester()
21689 .mr(4)
21690 .nr(8)
21691 .kr(1)
21692 .sr(1)
21693 .m(4)
21694 .n(8)
21695 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021696 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021697 }
21698
Frank Barchard0725b8d2020-12-07 11:07:35 -080021699 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021700 GemmMicrokernelTester()
21701 .mr(4)
21702 .nr(8)
21703 .kr(1)
21704 .sr(1)
21705 .m(4)
21706 .n(8)
21707 .k(1)
21708 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021709 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021710 }
21711
Frank Barchard0725b8d2020-12-07 11:07:35 -080021712 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021713 GemmMicrokernelTester()
21714 .mr(4)
21715 .nr(8)
21716 .kr(1)
21717 .sr(1)
21718 .m(4)
21719 .n(8)
21720 .k(1)
21721 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021722 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021723 }
21724
Frank Barchard0725b8d2020-12-07 11:07:35 -080021725 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021726 for (uint32_t n = 1; n <= 8; n++) {
21727 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021728 GemmMicrokernelTester()
21729 .mr(4)
21730 .nr(8)
21731 .kr(1)
21732 .sr(1)
21733 .m(m)
21734 .n(n)
21735 .k(1)
21736 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021737 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021738 }
21739 }
21740 }
21741
Frank Barchard0725b8d2020-12-07 11:07:35 -080021742 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021743 for (uint32_t m = 1; m <= 4; m++) {
21744 GemmMicrokernelTester()
21745 .mr(4)
21746 .nr(8)
21747 .kr(1)
21748 .sr(1)
21749 .m(m)
21750 .n(8)
21751 .k(1)
21752 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021753 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021754 }
21755 }
21756
Frank Barchard0725b8d2020-12-07 11:07:35 -080021757 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021758 for (uint32_t n = 1; n <= 8; n++) {
21759 GemmMicrokernelTester()
21760 .mr(4)
21761 .nr(8)
21762 .kr(1)
21763 .sr(1)
21764 .m(4)
21765 .n(n)
21766 .k(1)
21767 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021768 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021769 }
21770 }
21771
Frank Barchard0725b8d2020-12-07 11:07:35 -080021772 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021773 for (size_t k = 2; k < 10; k++) {
21774 GemmMicrokernelTester()
21775 .mr(4)
21776 .nr(8)
21777 .kr(1)
21778 .sr(1)
21779 .m(4)
21780 .n(8)
21781 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021782 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021783 }
21784 }
21785
Frank Barchard0725b8d2020-12-07 11:07:35 -080021786 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021787 for (size_t k = 2; k < 10; k++) {
21788 GemmMicrokernelTester()
21789 .mr(4)
21790 .nr(8)
21791 .kr(1)
21792 .sr(1)
21793 .m(4)
21794 .n(8)
21795 .k(k)
21796 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021797 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021798 }
21799 }
21800
Frank Barchard0725b8d2020-12-07 11:07:35 -080021801 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021802 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021803 for (uint32_t n = 1; n <= 8; n++) {
21804 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021805 GemmMicrokernelTester()
21806 .mr(4)
21807 .nr(8)
21808 .kr(1)
21809 .sr(1)
21810 .m(m)
21811 .n(n)
21812 .k(k)
21813 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021814 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021815 }
21816 }
21817 }
21818 }
21819
Frank Barchard0725b8d2020-12-07 11:07:35 -080021820 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021821 for (uint32_t n = 9; n < 16; n++) {
21822 for (size_t k = 1; k <= 5; k += 2) {
21823 GemmMicrokernelTester()
21824 .mr(4)
21825 .nr(8)
21826 .kr(1)
21827 .sr(1)
21828 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021829 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021830 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021831 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021832 }
21833 }
21834 }
21835
Frank Barchard0725b8d2020-12-07 11:07:35 -080021836 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021837 for (uint32_t n = 9; n < 16; n++) {
21838 for (size_t k = 1; k <= 5; k += 2) {
21839 GemmMicrokernelTester()
21840 .mr(4)
21841 .nr(8)
21842 .kr(1)
21843 .sr(1)
21844 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021845 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021846 .k(k)
21847 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021848 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021849 }
21850 }
21851 }
21852
Frank Barchard0725b8d2020-12-07 11:07:35 -080021853 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021854 for (uint32_t n = 9; n < 16; n++) {
21855 for (size_t k = 1; k <= 5; k += 2) {
21856 GemmMicrokernelTester()
21857 .mr(4)
21858 .nr(8)
21859 .kr(1)
21860 .sr(1)
21861 .m(4)
21862 .n(n)
21863 .k(k)
21864 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021865 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021866 }
21867 }
21868 }
21869
Frank Barchard0725b8d2020-12-07 11:07:35 -080021870 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021871 for (uint32_t n = 9; n < 16; n++) {
21872 for (size_t k = 1; k <= 5; k += 2) {
21873 for (uint32_t m = 1; m <= 4; m++) {
21874 GemmMicrokernelTester()
21875 .mr(4)
21876 .nr(8)
21877 .kr(1)
21878 .sr(1)
21879 .m(m)
21880 .n(n)
21881 .k(k)
21882 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021883 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021884 }
21885 }
21886 }
21887 }
21888
Frank Barchard0725b8d2020-12-07 11:07:35 -080021889 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021890 for (uint32_t n = 16; n <= 24; n += 8) {
21891 for (size_t k = 1; k <= 5; k += 2) {
21892 GemmMicrokernelTester()
21893 .mr(4)
21894 .nr(8)
21895 .kr(1)
21896 .sr(1)
21897 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021898 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021899 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021900 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021901 }
21902 }
21903 }
21904
Frank Barchard0725b8d2020-12-07 11:07:35 -080021905 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021906 for (uint32_t n = 16; n <= 24; n += 8) {
21907 for (size_t k = 1; k <= 5; k += 2) {
21908 GemmMicrokernelTester()
21909 .mr(4)
21910 .nr(8)
21911 .kr(1)
21912 .sr(1)
21913 .m(4)
21914 .n(n)
21915 .k(k)
21916 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021917 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021918 }
21919 }
21920 }
21921
Frank Barchard0725b8d2020-12-07 11:07:35 -080021922 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021923 for (uint32_t n = 16; n <= 24; n += 8) {
21924 for (size_t k = 1; k <= 5; k += 2) {
21925 GemmMicrokernelTester()
21926 .mr(4)
21927 .nr(8)
21928 .kr(1)
21929 .sr(1)
21930 .m(4)
21931 .n(n)
21932 .k(k)
21933 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021934 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021935 }
21936 }
21937 }
21938
Frank Barchard0725b8d2020-12-07 11:07:35 -080021939 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021940 for (uint32_t n = 16; n <= 24; n += 8) {
21941 for (size_t k = 1; k <= 5; k += 2) {
21942 for (uint32_t m = 1; m <= 4; m++) {
21943 GemmMicrokernelTester()
21944 .mr(4)
21945 .nr(8)
21946 .kr(1)
21947 .sr(1)
21948 .m(m)
21949 .n(n)
21950 .k(k)
21951 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021952 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021953 }
21954 }
21955 }
21956 }
21957
Frank Barchard0725b8d2020-12-07 11:07:35 -080021958 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021959 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021960 for (uint32_t n = 1; n <= 8; n++) {
21961 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021962 GemmMicrokernelTester()
21963 .mr(4)
21964 .nr(8)
21965 .kr(1)
21966 .sr(1)
21967 .m(m)
21968 .n(n)
21969 .k(k)
21970 .cm_stride(11)
21971 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021972 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021973 }
21974 }
21975 }
21976 }
21977
Frank Barchard0725b8d2020-12-07 11:07:35 -080021978 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021979 GemmMicrokernelTester()
21980 .mr(4)
21981 .nr(8)
21982 .kr(1)
21983 .sr(1)
21984 .m(4)
21985 .n(8)
21986 .k(1)
21987 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021988 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021989 }
21990
Frank Barchard0725b8d2020-12-07 11:07:35 -080021991 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021992 GemmMicrokernelTester()
21993 .mr(4)
21994 .nr(8)
21995 .kr(1)
21996 .sr(1)
21997 .m(4)
21998 .n(8)
21999 .k(1)
22000 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022001 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022002 }
22003
Frank Barchard0725b8d2020-12-07 11:07:35 -080022004 TEST(F32_GEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022005 GemmMicrokernelTester()
22006 .mr(4)
22007 .nr(8)
22008 .kr(1)
22009 .sr(1)
22010 .m(4)
22011 .n(8)
22012 .k(1)
22013 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022014 .Test(xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022015 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022016#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022017
22018
Marat Dukhan4c617792021-12-21 15:47:58 -080022019#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022020 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022021 GemmMicrokernelTester()
22022 .mr(6)
22023 .nr(8)
22024 .kr(1)
22025 .sr(1)
22026 .m(6)
22027 .n(8)
22028 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022029 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022030 }
22031
Frank Barchard0725b8d2020-12-07 11:07:35 -080022032 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022033 GemmMicrokernelTester()
22034 .mr(6)
22035 .nr(8)
22036 .kr(1)
22037 .sr(1)
22038 .m(6)
22039 .n(8)
22040 .k(1)
22041 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022042 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022043 }
22044
Frank Barchard0725b8d2020-12-07 11:07:35 -080022045 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022046 GemmMicrokernelTester()
22047 .mr(6)
22048 .nr(8)
22049 .kr(1)
22050 .sr(1)
22051 .m(6)
22052 .n(8)
22053 .k(1)
22054 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022055 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022056 }
22057
Frank Barchard0725b8d2020-12-07 11:07:35 -080022058 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022059 for (uint32_t n = 1; n <= 8; n++) {
22060 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022061 GemmMicrokernelTester()
22062 .mr(6)
22063 .nr(8)
22064 .kr(1)
22065 .sr(1)
22066 .m(m)
22067 .n(n)
22068 .k(1)
22069 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022070 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022071 }
22072 }
22073 }
22074
Frank Barchard0725b8d2020-12-07 11:07:35 -080022075 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022076 for (uint32_t m = 1; m <= 6; m++) {
22077 GemmMicrokernelTester()
22078 .mr(6)
22079 .nr(8)
22080 .kr(1)
22081 .sr(1)
22082 .m(m)
22083 .n(8)
22084 .k(1)
22085 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022086 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022087 }
22088 }
22089
Frank Barchard0725b8d2020-12-07 11:07:35 -080022090 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022091 for (uint32_t n = 1; n <= 8; n++) {
22092 GemmMicrokernelTester()
22093 .mr(6)
22094 .nr(8)
22095 .kr(1)
22096 .sr(1)
22097 .m(6)
22098 .n(n)
22099 .k(1)
22100 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022101 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022102 }
22103 }
22104
Frank Barchard0725b8d2020-12-07 11:07:35 -080022105 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022106 for (size_t k = 2; k < 10; k++) {
22107 GemmMicrokernelTester()
22108 .mr(6)
22109 .nr(8)
22110 .kr(1)
22111 .sr(1)
22112 .m(6)
22113 .n(8)
22114 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022115 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022116 }
22117 }
22118
Frank Barchard0725b8d2020-12-07 11:07:35 -080022119 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022120 for (size_t k = 2; k < 10; k++) {
22121 GemmMicrokernelTester()
22122 .mr(6)
22123 .nr(8)
22124 .kr(1)
22125 .sr(1)
22126 .m(6)
22127 .n(8)
22128 .k(k)
22129 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022130 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022131 }
22132 }
22133
Frank Barchard0725b8d2020-12-07 11:07:35 -080022134 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022135 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022136 for (uint32_t n = 1; n <= 8; n++) {
22137 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022138 GemmMicrokernelTester()
22139 .mr(6)
22140 .nr(8)
22141 .kr(1)
22142 .sr(1)
22143 .m(m)
22144 .n(n)
22145 .k(k)
22146 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022147 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022148 }
22149 }
22150 }
22151 }
22152
Frank Barchard0725b8d2020-12-07 11:07:35 -080022153 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022154 for (uint32_t n = 9; n < 16; n++) {
22155 for (size_t k = 1; k <= 5; k += 2) {
22156 GemmMicrokernelTester()
22157 .mr(6)
22158 .nr(8)
22159 .kr(1)
22160 .sr(1)
22161 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022162 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022163 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022164 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022165 }
22166 }
22167 }
22168
Frank Barchard0725b8d2020-12-07 11:07:35 -080022169 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022170 for (uint32_t n = 9; n < 16; n++) {
22171 for (size_t k = 1; k <= 5; k += 2) {
22172 GemmMicrokernelTester()
22173 .mr(6)
22174 .nr(8)
22175 .kr(1)
22176 .sr(1)
22177 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022178 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022179 .k(k)
22180 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022181 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022182 }
22183 }
22184 }
22185
Frank Barchard0725b8d2020-12-07 11:07:35 -080022186 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022187 for (uint32_t n = 9; n < 16; n++) {
22188 for (size_t k = 1; k <= 5; k += 2) {
22189 GemmMicrokernelTester()
22190 .mr(6)
22191 .nr(8)
22192 .kr(1)
22193 .sr(1)
22194 .m(6)
22195 .n(n)
22196 .k(k)
22197 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022198 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022199 }
22200 }
22201 }
22202
Frank Barchard0725b8d2020-12-07 11:07:35 -080022203 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022204 for (uint32_t n = 9; n < 16; n++) {
22205 for (size_t k = 1; k <= 5; k += 2) {
22206 for (uint32_t m = 1; m <= 6; m++) {
22207 GemmMicrokernelTester()
22208 .mr(6)
22209 .nr(8)
22210 .kr(1)
22211 .sr(1)
22212 .m(m)
22213 .n(n)
22214 .k(k)
22215 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022216 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022217 }
22218 }
22219 }
22220 }
22221
Frank Barchard0725b8d2020-12-07 11:07:35 -080022222 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022223 for (uint32_t n = 16; n <= 24; n += 8) {
22224 for (size_t k = 1; k <= 5; k += 2) {
22225 GemmMicrokernelTester()
22226 .mr(6)
22227 .nr(8)
22228 .kr(1)
22229 .sr(1)
22230 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022231 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022232 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022233 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022234 }
22235 }
22236 }
22237
Frank Barchard0725b8d2020-12-07 11:07:35 -080022238 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022239 for (uint32_t n = 16; n <= 24; n += 8) {
22240 for (size_t k = 1; k <= 5; k += 2) {
22241 GemmMicrokernelTester()
22242 .mr(6)
22243 .nr(8)
22244 .kr(1)
22245 .sr(1)
22246 .m(6)
22247 .n(n)
22248 .k(k)
22249 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022250 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022251 }
22252 }
22253 }
22254
Frank Barchard0725b8d2020-12-07 11:07:35 -080022255 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022256 for (uint32_t n = 16; n <= 24; n += 8) {
22257 for (size_t k = 1; k <= 5; k += 2) {
22258 GemmMicrokernelTester()
22259 .mr(6)
22260 .nr(8)
22261 .kr(1)
22262 .sr(1)
22263 .m(6)
22264 .n(n)
22265 .k(k)
22266 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022267 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022268 }
22269 }
22270 }
22271
Frank Barchard0725b8d2020-12-07 11:07:35 -080022272 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022273 for (uint32_t n = 16; n <= 24; n += 8) {
22274 for (size_t k = 1; k <= 5; k += 2) {
22275 for (uint32_t m = 1; m <= 6; m++) {
22276 GemmMicrokernelTester()
22277 .mr(6)
22278 .nr(8)
22279 .kr(1)
22280 .sr(1)
22281 .m(m)
22282 .n(n)
22283 .k(k)
22284 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022285 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022286 }
22287 }
22288 }
22289 }
22290
Frank Barchard0725b8d2020-12-07 11:07:35 -080022291 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022292 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022293 for (uint32_t n = 1; n <= 8; n++) {
22294 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022295 GemmMicrokernelTester()
22296 .mr(6)
22297 .nr(8)
22298 .kr(1)
22299 .sr(1)
22300 .m(m)
22301 .n(n)
22302 .k(k)
22303 .cm_stride(11)
22304 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022305 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022306 }
22307 }
22308 }
22309 }
22310
Frank Barchard0725b8d2020-12-07 11:07:35 -080022311 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022312 GemmMicrokernelTester()
22313 .mr(6)
22314 .nr(8)
22315 .kr(1)
22316 .sr(1)
22317 .m(6)
22318 .n(8)
22319 .k(1)
22320 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022321 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022322 }
22323
Frank Barchard0725b8d2020-12-07 11:07:35 -080022324 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022325 GemmMicrokernelTester()
22326 .mr(6)
22327 .nr(8)
22328 .kr(1)
22329 .sr(1)
22330 .m(6)
22331 .n(8)
22332 .k(1)
22333 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022334 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022335 }
22336
Frank Barchard0725b8d2020-12-07 11:07:35 -080022337 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022338 GemmMicrokernelTester()
22339 .mr(6)
22340 .nr(8)
22341 .kr(1)
22342 .sr(1)
22343 .m(6)
22344 .n(8)
22345 .k(1)
22346 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022347 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022348 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022349#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022350
22351
Marat Dukhan4c617792021-12-21 15:47:58 -080022352#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022353 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022354 GemmMicrokernelTester()
22355 .mr(1)
22356 .nr(8)
22357 .kr(1)
22358 .sr(1)
22359 .m(1)
22360 .n(8)
22361 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022362 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022363 }
22364
Frank Barchard0725b8d2020-12-07 11:07:35 -080022365 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022366 GemmMicrokernelTester()
22367 .mr(1)
22368 .nr(8)
22369 .kr(1)
22370 .sr(1)
22371 .m(1)
22372 .n(8)
22373 .k(4)
22374 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022375 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022376 }
22377
Frank Barchard0725b8d2020-12-07 11:07:35 -080022378 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022379 GemmMicrokernelTester()
22380 .mr(1)
22381 .nr(8)
22382 .kr(1)
22383 .sr(1)
22384 .m(1)
22385 .n(8)
22386 .k(4)
22387 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022388 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022389 }
22390
Frank Barchard0725b8d2020-12-07 11:07:35 -080022391 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022392 for (uint32_t n = 1; n <= 8; n++) {
22393 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022394 GemmMicrokernelTester()
22395 .mr(1)
22396 .nr(8)
22397 .kr(1)
22398 .sr(1)
22399 .m(m)
22400 .n(n)
22401 .k(4)
22402 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022403 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022404 }
22405 }
22406 }
22407
Frank Barchard0725b8d2020-12-07 11:07:35 -080022408 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022409 for (uint32_t m = 1; m <= 1; m++) {
22410 GemmMicrokernelTester()
22411 .mr(1)
22412 .nr(8)
22413 .kr(1)
22414 .sr(1)
22415 .m(m)
22416 .n(8)
22417 .k(4)
22418 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022419 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022420 }
22421 }
22422
Frank Barchard0725b8d2020-12-07 11:07:35 -080022423 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022424 for (uint32_t n = 1; n <= 8; n++) {
22425 GemmMicrokernelTester()
22426 .mr(1)
22427 .nr(8)
22428 .kr(1)
22429 .sr(1)
22430 .m(1)
22431 .n(n)
22432 .k(4)
22433 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022434 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022435 }
22436 }
22437
Frank Barchard0725b8d2020-12-07 11:07:35 -080022438 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022439 for (size_t k = 1; k < 4; k++) {
22440 GemmMicrokernelTester()
22441 .mr(1)
22442 .nr(8)
22443 .kr(1)
22444 .sr(1)
22445 .m(1)
22446 .n(8)
22447 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022448 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022449 }
22450 }
22451
Frank Barchard0725b8d2020-12-07 11:07:35 -080022452 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022453 for (size_t k = 1; k < 4; k++) {
22454 GemmMicrokernelTester()
22455 .mr(1)
22456 .nr(8)
22457 .kr(1)
22458 .sr(1)
22459 .m(1)
22460 .n(8)
22461 .k(k)
22462 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022463 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022464 }
22465 }
22466
Frank Barchard0725b8d2020-12-07 11:07:35 -080022467 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022468 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022469 for (uint32_t n = 1; n <= 8; n++) {
22470 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022471 GemmMicrokernelTester()
22472 .mr(1)
22473 .nr(8)
22474 .kr(1)
22475 .sr(1)
22476 .m(m)
22477 .n(n)
22478 .k(k)
22479 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022480 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022481 }
22482 }
22483 }
22484 }
22485
Frank Barchard0725b8d2020-12-07 11:07:35 -080022486 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022487 for (size_t k = 5; k < 8; k++) {
22488 GemmMicrokernelTester()
22489 .mr(1)
22490 .nr(8)
22491 .kr(1)
22492 .sr(1)
22493 .m(1)
22494 .n(8)
22495 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022496 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022497 }
22498 }
22499
Frank Barchard0725b8d2020-12-07 11:07:35 -080022500 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022501 for (size_t k = 5; k < 8; k++) {
22502 GemmMicrokernelTester()
22503 .mr(1)
22504 .nr(8)
22505 .kr(1)
22506 .sr(1)
22507 .m(1)
22508 .n(8)
22509 .k(k)
22510 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022511 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022512 }
22513 }
22514
Frank Barchard0725b8d2020-12-07 11:07:35 -080022515 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022516 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022517 for (uint32_t n = 1; n <= 8; n++) {
22518 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022519 GemmMicrokernelTester()
22520 .mr(1)
22521 .nr(8)
22522 .kr(1)
22523 .sr(1)
22524 .m(m)
22525 .n(n)
22526 .k(k)
22527 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022528 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022529 }
22530 }
22531 }
22532 }
22533
Frank Barchard0725b8d2020-12-07 11:07:35 -080022534 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022535 for (size_t k = 8; k <= 40; k += 4) {
22536 GemmMicrokernelTester()
22537 .mr(1)
22538 .nr(8)
22539 .kr(1)
22540 .sr(1)
22541 .m(1)
22542 .n(8)
22543 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022544 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022545 }
22546 }
22547
Frank Barchard0725b8d2020-12-07 11:07:35 -080022548 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022549 for (size_t k = 8; k <= 40; k += 4) {
22550 GemmMicrokernelTester()
22551 .mr(1)
22552 .nr(8)
22553 .kr(1)
22554 .sr(1)
22555 .m(1)
22556 .n(8)
22557 .k(k)
22558 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022559 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022560 }
22561 }
22562
Frank Barchard0725b8d2020-12-07 11:07:35 -080022563 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022564 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022565 for (uint32_t n = 1; n <= 8; n++) {
22566 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022567 GemmMicrokernelTester()
22568 .mr(1)
22569 .nr(8)
22570 .kr(1)
22571 .sr(1)
22572 .m(m)
22573 .n(n)
22574 .k(k)
22575 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022576 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022577 }
22578 }
22579 }
22580 }
22581
Frank Barchard0725b8d2020-12-07 11:07:35 -080022582 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022583 for (uint32_t n = 9; n < 16; n++) {
22584 for (size_t k = 1; k <= 20; k += 5) {
22585 GemmMicrokernelTester()
22586 .mr(1)
22587 .nr(8)
22588 .kr(1)
22589 .sr(1)
22590 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022591 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022592 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022593 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022594 }
22595 }
22596 }
22597
Frank Barchard0725b8d2020-12-07 11:07:35 -080022598 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022599 for (uint32_t n = 9; n < 16; n++) {
22600 for (size_t k = 1; k <= 20; k += 5) {
22601 GemmMicrokernelTester()
22602 .mr(1)
22603 .nr(8)
22604 .kr(1)
22605 .sr(1)
22606 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022607 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022608 .k(k)
22609 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022610 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022611 }
22612 }
22613 }
22614
Frank Barchard0725b8d2020-12-07 11:07:35 -080022615 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022616 for (uint32_t n = 9; n < 16; n++) {
22617 for (size_t k = 1; k <= 20; k += 5) {
22618 GemmMicrokernelTester()
22619 .mr(1)
22620 .nr(8)
22621 .kr(1)
22622 .sr(1)
22623 .m(1)
22624 .n(n)
22625 .k(k)
22626 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022627 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022628 }
22629 }
22630 }
22631
Frank Barchard0725b8d2020-12-07 11:07:35 -080022632 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022633 for (uint32_t n = 9; n < 16; n++) {
22634 for (size_t k = 1; k <= 20; k += 5) {
22635 for (uint32_t m = 1; m <= 1; m++) {
22636 GemmMicrokernelTester()
22637 .mr(1)
22638 .nr(8)
22639 .kr(1)
22640 .sr(1)
22641 .m(m)
22642 .n(n)
22643 .k(k)
22644 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022645 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022646 }
22647 }
22648 }
22649 }
22650
Frank Barchard0725b8d2020-12-07 11:07:35 -080022651 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022652 for (uint32_t n = 16; n <= 24; n += 8) {
22653 for (size_t k = 1; k <= 20; k += 5) {
22654 GemmMicrokernelTester()
22655 .mr(1)
22656 .nr(8)
22657 .kr(1)
22658 .sr(1)
22659 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022660 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022661 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022662 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022663 }
22664 }
22665 }
22666
Frank Barchard0725b8d2020-12-07 11:07:35 -080022667 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022668 for (uint32_t n = 16; n <= 24; n += 8) {
22669 for (size_t k = 1; k <= 20; k += 5) {
22670 GemmMicrokernelTester()
22671 .mr(1)
22672 .nr(8)
22673 .kr(1)
22674 .sr(1)
22675 .m(1)
22676 .n(n)
22677 .k(k)
22678 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022679 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022680 }
22681 }
22682 }
22683
Frank Barchard0725b8d2020-12-07 11:07:35 -080022684 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022685 for (uint32_t n = 16; n <= 24; n += 8) {
22686 for (size_t k = 1; k <= 20; k += 5) {
22687 GemmMicrokernelTester()
22688 .mr(1)
22689 .nr(8)
22690 .kr(1)
22691 .sr(1)
22692 .m(1)
22693 .n(n)
22694 .k(k)
22695 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022696 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022697 }
22698 }
22699 }
22700
Frank Barchard0725b8d2020-12-07 11:07:35 -080022701 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022702 for (uint32_t n = 16; n <= 24; n += 8) {
22703 for (size_t k = 1; k <= 20; k += 5) {
22704 for (uint32_t m = 1; m <= 1; m++) {
22705 GemmMicrokernelTester()
22706 .mr(1)
22707 .nr(8)
22708 .kr(1)
22709 .sr(1)
22710 .m(m)
22711 .n(n)
22712 .k(k)
22713 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022714 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022715 }
22716 }
22717 }
22718 }
22719
Frank Barchard0725b8d2020-12-07 11:07:35 -080022720 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022721 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022722 for (uint32_t n = 1; n <= 8; n++) {
22723 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022724 GemmMicrokernelTester()
22725 .mr(1)
22726 .nr(8)
22727 .kr(1)
22728 .sr(1)
22729 .m(m)
22730 .n(n)
22731 .k(k)
22732 .cm_stride(11)
22733 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022734 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022735 }
22736 }
22737 }
22738 }
22739
Frank Barchard0725b8d2020-12-07 11:07:35 -080022740 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022741 GemmMicrokernelTester()
22742 .mr(1)
22743 .nr(8)
22744 .kr(1)
22745 .sr(1)
22746 .m(1)
22747 .n(8)
22748 .k(4)
22749 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022750 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022751 }
22752
Frank Barchard0725b8d2020-12-07 11:07:35 -080022753 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022754 GemmMicrokernelTester()
22755 .mr(1)
22756 .nr(8)
22757 .kr(1)
22758 .sr(1)
22759 .m(1)
22760 .n(8)
22761 .k(4)
22762 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022763 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022764 }
22765
Frank Barchard0725b8d2020-12-07 11:07:35 -080022766 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022767 GemmMicrokernelTester()
22768 .mr(1)
22769 .nr(8)
22770 .kr(1)
22771 .sr(1)
22772 .m(1)
22773 .n(8)
22774 .k(4)
22775 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022776 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022777 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022778#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022779
22780
Marat Dukhan4c617792021-12-21 15:47:58 -080022781#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022782 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022783 GemmMicrokernelTester()
22784 .mr(6)
22785 .nr(8)
22786 .kr(1)
22787 .sr(1)
22788 .m(6)
22789 .n(8)
22790 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022791 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022792 }
22793
Frank Barchard0725b8d2020-12-07 11:07:35 -080022794 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022795 GemmMicrokernelTester()
22796 .mr(6)
22797 .nr(8)
22798 .kr(1)
22799 .sr(1)
22800 .m(6)
22801 .n(8)
22802 .k(4)
22803 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022804 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022805 }
22806
Frank Barchard0725b8d2020-12-07 11:07:35 -080022807 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022808 GemmMicrokernelTester()
22809 .mr(6)
22810 .nr(8)
22811 .kr(1)
22812 .sr(1)
22813 .m(6)
22814 .n(8)
22815 .k(4)
22816 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022817 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022818 }
22819
Frank Barchard0725b8d2020-12-07 11:07:35 -080022820 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022821 for (uint32_t n = 1; n <= 8; n++) {
22822 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022823 GemmMicrokernelTester()
22824 .mr(6)
22825 .nr(8)
22826 .kr(1)
22827 .sr(1)
22828 .m(m)
22829 .n(n)
22830 .k(4)
22831 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022832 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022833 }
22834 }
22835 }
22836
Frank Barchard0725b8d2020-12-07 11:07:35 -080022837 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022838 for (uint32_t m = 1; m <= 6; m++) {
22839 GemmMicrokernelTester()
22840 .mr(6)
22841 .nr(8)
22842 .kr(1)
22843 .sr(1)
22844 .m(m)
22845 .n(8)
22846 .k(4)
22847 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022848 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022849 }
22850 }
22851
Frank Barchard0725b8d2020-12-07 11:07:35 -080022852 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022853 for (uint32_t n = 1; n <= 8; n++) {
22854 GemmMicrokernelTester()
22855 .mr(6)
22856 .nr(8)
22857 .kr(1)
22858 .sr(1)
22859 .m(6)
22860 .n(n)
22861 .k(4)
22862 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022863 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022864 }
22865 }
22866
Frank Barchard0725b8d2020-12-07 11:07:35 -080022867 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022868 for (size_t k = 1; k < 4; k++) {
22869 GemmMicrokernelTester()
22870 .mr(6)
22871 .nr(8)
22872 .kr(1)
22873 .sr(1)
22874 .m(6)
22875 .n(8)
22876 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022877 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022878 }
22879 }
22880
Frank Barchard0725b8d2020-12-07 11:07:35 -080022881 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022882 for (size_t k = 1; k < 4; k++) {
22883 GemmMicrokernelTester()
22884 .mr(6)
22885 .nr(8)
22886 .kr(1)
22887 .sr(1)
22888 .m(6)
22889 .n(8)
22890 .k(k)
22891 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022892 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022893 }
22894 }
22895
Frank Barchard0725b8d2020-12-07 11:07:35 -080022896 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022897 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022898 for (uint32_t n = 1; n <= 8; n++) {
22899 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022900 GemmMicrokernelTester()
22901 .mr(6)
22902 .nr(8)
22903 .kr(1)
22904 .sr(1)
22905 .m(m)
22906 .n(n)
22907 .k(k)
22908 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022909 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022910 }
22911 }
22912 }
22913 }
22914
Frank Barchard0725b8d2020-12-07 11:07:35 -080022915 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022916 for (size_t k = 5; k < 8; k++) {
22917 GemmMicrokernelTester()
22918 .mr(6)
22919 .nr(8)
22920 .kr(1)
22921 .sr(1)
22922 .m(6)
22923 .n(8)
22924 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022925 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022926 }
22927 }
22928
Frank Barchard0725b8d2020-12-07 11:07:35 -080022929 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022930 for (size_t k = 5; k < 8; k++) {
22931 GemmMicrokernelTester()
22932 .mr(6)
22933 .nr(8)
22934 .kr(1)
22935 .sr(1)
22936 .m(6)
22937 .n(8)
22938 .k(k)
22939 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022940 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022941 }
22942 }
22943
Frank Barchard0725b8d2020-12-07 11:07:35 -080022944 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022945 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022946 for (uint32_t n = 1; n <= 8; n++) {
22947 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022948 GemmMicrokernelTester()
22949 .mr(6)
22950 .nr(8)
22951 .kr(1)
22952 .sr(1)
22953 .m(m)
22954 .n(n)
22955 .k(k)
22956 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022957 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022958 }
22959 }
22960 }
22961 }
22962
Frank Barchard0725b8d2020-12-07 11:07:35 -080022963 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022964 for (size_t k = 8; k <= 40; k += 4) {
22965 GemmMicrokernelTester()
22966 .mr(6)
22967 .nr(8)
22968 .kr(1)
22969 .sr(1)
22970 .m(6)
22971 .n(8)
22972 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022973 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022974 }
22975 }
22976
Frank Barchard0725b8d2020-12-07 11:07:35 -080022977 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022978 for (size_t k = 8; k <= 40; k += 4) {
22979 GemmMicrokernelTester()
22980 .mr(6)
22981 .nr(8)
22982 .kr(1)
22983 .sr(1)
22984 .m(6)
22985 .n(8)
22986 .k(k)
22987 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022988 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022989 }
22990 }
22991
Frank Barchard0725b8d2020-12-07 11:07:35 -080022992 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022993 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022994 for (uint32_t n = 1; n <= 8; n++) {
22995 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022996 GemmMicrokernelTester()
22997 .mr(6)
22998 .nr(8)
22999 .kr(1)
23000 .sr(1)
23001 .m(m)
23002 .n(n)
23003 .k(k)
23004 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023005 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023006 }
23007 }
23008 }
23009 }
23010
Frank Barchard0725b8d2020-12-07 11:07:35 -080023011 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023012 for (uint32_t n = 9; n < 16; n++) {
23013 for (size_t k = 1; k <= 20; k += 5) {
23014 GemmMicrokernelTester()
23015 .mr(6)
23016 .nr(8)
23017 .kr(1)
23018 .sr(1)
23019 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023020 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023021 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023022 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023023 }
23024 }
23025 }
23026
Frank Barchard0725b8d2020-12-07 11:07:35 -080023027 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023028 for (uint32_t n = 9; n < 16; n++) {
23029 for (size_t k = 1; k <= 20; k += 5) {
23030 GemmMicrokernelTester()
23031 .mr(6)
23032 .nr(8)
23033 .kr(1)
23034 .sr(1)
23035 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023036 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023037 .k(k)
23038 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023039 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023040 }
23041 }
23042 }
23043
Frank Barchard0725b8d2020-12-07 11:07:35 -080023044 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023045 for (uint32_t n = 9; n < 16; n++) {
23046 for (size_t k = 1; k <= 20; k += 5) {
23047 GemmMicrokernelTester()
23048 .mr(6)
23049 .nr(8)
23050 .kr(1)
23051 .sr(1)
23052 .m(6)
23053 .n(n)
23054 .k(k)
23055 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023056 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023057 }
23058 }
23059 }
23060
Frank Barchard0725b8d2020-12-07 11:07:35 -080023061 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023062 for (uint32_t n = 9; n < 16; n++) {
23063 for (size_t k = 1; k <= 20; k += 5) {
23064 for (uint32_t m = 1; m <= 6; m++) {
23065 GemmMicrokernelTester()
23066 .mr(6)
23067 .nr(8)
23068 .kr(1)
23069 .sr(1)
23070 .m(m)
23071 .n(n)
23072 .k(k)
23073 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023074 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023075 }
23076 }
23077 }
23078 }
23079
Frank Barchard0725b8d2020-12-07 11:07:35 -080023080 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023081 for (uint32_t n = 16; n <= 24; n += 8) {
23082 for (size_t k = 1; k <= 20; k += 5) {
23083 GemmMicrokernelTester()
23084 .mr(6)
23085 .nr(8)
23086 .kr(1)
23087 .sr(1)
23088 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023089 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023090 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023091 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023092 }
23093 }
23094 }
23095
Frank Barchard0725b8d2020-12-07 11:07:35 -080023096 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023097 for (uint32_t n = 16; n <= 24; n += 8) {
23098 for (size_t k = 1; k <= 20; k += 5) {
23099 GemmMicrokernelTester()
23100 .mr(6)
23101 .nr(8)
23102 .kr(1)
23103 .sr(1)
23104 .m(6)
23105 .n(n)
23106 .k(k)
23107 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023108 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023109 }
23110 }
23111 }
23112
Frank Barchard0725b8d2020-12-07 11:07:35 -080023113 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023114 for (uint32_t n = 16; n <= 24; n += 8) {
23115 for (size_t k = 1; k <= 20; k += 5) {
23116 GemmMicrokernelTester()
23117 .mr(6)
23118 .nr(8)
23119 .kr(1)
23120 .sr(1)
23121 .m(6)
23122 .n(n)
23123 .k(k)
23124 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023125 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023126 }
23127 }
23128 }
23129
Frank Barchard0725b8d2020-12-07 11:07:35 -080023130 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023131 for (uint32_t n = 16; n <= 24; n += 8) {
23132 for (size_t k = 1; k <= 20; k += 5) {
23133 for (uint32_t m = 1; m <= 6; m++) {
23134 GemmMicrokernelTester()
23135 .mr(6)
23136 .nr(8)
23137 .kr(1)
23138 .sr(1)
23139 .m(m)
23140 .n(n)
23141 .k(k)
23142 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023143 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023144 }
23145 }
23146 }
23147 }
23148
Frank Barchard0725b8d2020-12-07 11:07:35 -080023149 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023150 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023151 for (uint32_t n = 1; n <= 8; n++) {
23152 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023153 GemmMicrokernelTester()
23154 .mr(6)
23155 .nr(8)
23156 .kr(1)
23157 .sr(1)
23158 .m(m)
23159 .n(n)
23160 .k(k)
23161 .cm_stride(11)
23162 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023163 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023164 }
23165 }
23166 }
23167 }
23168
Frank Barchard0725b8d2020-12-07 11:07:35 -080023169 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023170 GemmMicrokernelTester()
23171 .mr(6)
23172 .nr(8)
23173 .kr(1)
23174 .sr(1)
23175 .m(6)
23176 .n(8)
23177 .k(4)
23178 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023179 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023180 }
23181
Frank Barchard0725b8d2020-12-07 11:07:35 -080023182 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023183 GemmMicrokernelTester()
23184 .mr(6)
23185 .nr(8)
23186 .kr(1)
23187 .sr(1)
23188 .m(6)
23189 .n(8)
23190 .k(4)
23191 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023192 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023193 }
23194
Frank Barchard0725b8d2020-12-07 11:07:35 -080023195 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023196 GemmMicrokernelTester()
23197 .mr(6)
23198 .nr(8)
23199 .kr(1)
23200 .sr(1)
23201 .m(6)
23202 .n(8)
23203 .k(4)
23204 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023205 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023206 }
Marat Dukhan4c617792021-12-21 15:47:58 -080023207#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023208
23209
Marat Dukhan4c617792021-12-21 15:47:58 -080023210#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080023211 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023212 GemmMicrokernelTester()
23213 .mr(1)
23214 .nr(8)
23215 .kr(1)
23216 .sr(1)
23217 .m(1)
23218 .n(8)
23219 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023220 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023221 }
23222
Frank Barchard0725b8d2020-12-07 11:07:35 -080023223 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023224 GemmMicrokernelTester()
23225 .mr(1)
23226 .nr(8)
23227 .kr(1)
23228 .sr(1)
23229 .m(1)
23230 .n(8)
23231 .k(4)
23232 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023233 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023234 }
23235
Frank Barchard0725b8d2020-12-07 11:07:35 -080023236 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023237 GemmMicrokernelTester()
23238 .mr(1)
23239 .nr(8)
23240 .kr(1)
23241 .sr(1)
23242 .m(1)
23243 .n(8)
23244 .k(4)
23245 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023246 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023247 }
23248
Frank Barchard0725b8d2020-12-07 11:07:35 -080023249 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023250 for (uint32_t n = 1; n <= 8; n++) {
23251 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023252 GemmMicrokernelTester()
23253 .mr(1)
23254 .nr(8)
23255 .kr(1)
23256 .sr(1)
23257 .m(m)
23258 .n(n)
23259 .k(4)
23260 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023261 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023262 }
23263 }
23264 }
23265
Frank Barchard0725b8d2020-12-07 11:07:35 -080023266 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023267 for (uint32_t m = 1; m <= 1; m++) {
23268 GemmMicrokernelTester()
23269 .mr(1)
23270 .nr(8)
23271 .kr(1)
23272 .sr(1)
23273 .m(m)
23274 .n(8)
23275 .k(4)
23276 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023277 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023278 }
23279 }
23280
Frank Barchard0725b8d2020-12-07 11:07:35 -080023281 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023282 for (uint32_t n = 1; n <= 8; n++) {
23283 GemmMicrokernelTester()
23284 .mr(1)
23285 .nr(8)
23286 .kr(1)
23287 .sr(1)
23288 .m(1)
23289 .n(n)
23290 .k(4)
23291 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023292 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023293 }
23294 }
23295
Frank Barchard0725b8d2020-12-07 11:07:35 -080023296 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023297 for (size_t k = 1; k < 4; k++) {
23298 GemmMicrokernelTester()
23299 .mr(1)
23300 .nr(8)
23301 .kr(1)
23302 .sr(1)
23303 .m(1)
23304 .n(8)
23305 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023306 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023307 }
23308 }
23309
Frank Barchard0725b8d2020-12-07 11:07:35 -080023310 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023311 for (size_t k = 1; k < 4; k++) {
23312 GemmMicrokernelTester()
23313 .mr(1)
23314 .nr(8)
23315 .kr(1)
23316 .sr(1)
23317 .m(1)
23318 .n(8)
23319 .k(k)
23320 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023321 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023322 }
23323 }
23324
Frank Barchard0725b8d2020-12-07 11:07:35 -080023325 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023326 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023327 for (uint32_t n = 1; n <= 8; n++) {
23328 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023329 GemmMicrokernelTester()
23330 .mr(1)
23331 .nr(8)
23332 .kr(1)
23333 .sr(1)
23334 .m(m)
23335 .n(n)
23336 .k(k)
23337 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023338 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023339 }
23340 }
23341 }
23342 }
23343
Frank Barchard0725b8d2020-12-07 11:07:35 -080023344 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023345 for (size_t k = 5; k < 8; k++) {
23346 GemmMicrokernelTester()
23347 .mr(1)
23348 .nr(8)
23349 .kr(1)
23350 .sr(1)
23351 .m(1)
23352 .n(8)
23353 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023354 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023355 }
23356 }
23357
Frank Barchard0725b8d2020-12-07 11:07:35 -080023358 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023359 for (size_t k = 5; k < 8; k++) {
23360 GemmMicrokernelTester()
23361 .mr(1)
23362 .nr(8)
23363 .kr(1)
23364 .sr(1)
23365 .m(1)
23366 .n(8)
23367 .k(k)
23368 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023369 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023370 }
23371 }
23372
Frank Barchard0725b8d2020-12-07 11:07:35 -080023373 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023374 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023375 for (uint32_t n = 1; n <= 8; n++) {
23376 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023377 GemmMicrokernelTester()
23378 .mr(1)
23379 .nr(8)
23380 .kr(1)
23381 .sr(1)
23382 .m(m)
23383 .n(n)
23384 .k(k)
23385 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023386 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023387 }
23388 }
23389 }
23390 }
23391
Frank Barchard0725b8d2020-12-07 11:07:35 -080023392 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023393 for (size_t k = 8; k <= 40; k += 4) {
23394 GemmMicrokernelTester()
23395 .mr(1)
23396 .nr(8)
23397 .kr(1)
23398 .sr(1)
23399 .m(1)
23400 .n(8)
23401 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023402 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023403 }
23404 }
23405
Frank Barchard0725b8d2020-12-07 11:07:35 -080023406 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023407 for (size_t k = 8; k <= 40; k += 4) {
23408 GemmMicrokernelTester()
23409 .mr(1)
23410 .nr(8)
23411 .kr(1)
23412 .sr(1)
23413 .m(1)
23414 .n(8)
23415 .k(k)
23416 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023417 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023418 }
23419 }
23420
Frank Barchard0725b8d2020-12-07 11:07:35 -080023421 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023422 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023423 for (uint32_t n = 1; n <= 8; n++) {
23424 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023425 GemmMicrokernelTester()
23426 .mr(1)
23427 .nr(8)
23428 .kr(1)
23429 .sr(1)
23430 .m(m)
23431 .n(n)
23432 .k(k)
23433 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023434 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023435 }
23436 }
23437 }
23438 }
23439
Frank Barchard0725b8d2020-12-07 11:07:35 -080023440 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023441 for (uint32_t n = 9; n < 16; n++) {
23442 for (size_t k = 1; k <= 20; k += 5) {
23443 GemmMicrokernelTester()
23444 .mr(1)
23445 .nr(8)
23446 .kr(1)
23447 .sr(1)
23448 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023449 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023450 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023451 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023452 }
23453 }
23454 }
23455
Frank Barchard0725b8d2020-12-07 11:07:35 -080023456 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023457 for (uint32_t n = 9; n < 16; n++) {
23458 for (size_t k = 1; k <= 20; k += 5) {
23459 GemmMicrokernelTester()
23460 .mr(1)
23461 .nr(8)
23462 .kr(1)
23463 .sr(1)
23464 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023465 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023466 .k(k)
23467 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023468 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023469 }
23470 }
23471 }
23472
Frank Barchard0725b8d2020-12-07 11:07:35 -080023473 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023474 for (uint32_t n = 9; n < 16; n++) {
23475 for (size_t k = 1; k <= 20; k += 5) {
23476 GemmMicrokernelTester()
23477 .mr(1)
23478 .nr(8)
23479 .kr(1)
23480 .sr(1)
23481 .m(1)
23482 .n(n)
23483 .k(k)
23484 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023485 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023486 }
23487 }
23488 }
23489
Frank Barchard0725b8d2020-12-07 11:07:35 -080023490 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023491 for (uint32_t n = 9; n < 16; n++) {
23492 for (size_t k = 1; k <= 20; k += 5) {
23493 for (uint32_t m = 1; m <= 1; m++) {
23494 GemmMicrokernelTester()
23495 .mr(1)
23496 .nr(8)
23497 .kr(1)
23498 .sr(1)
23499 .m(m)
23500 .n(n)
23501 .k(k)
23502 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023503 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023504 }
23505 }
23506 }
23507 }
23508
Frank Barchard0725b8d2020-12-07 11:07:35 -080023509 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023510 for (uint32_t n = 16; n <= 24; n += 8) {
23511 for (size_t k = 1; k <= 20; k += 5) {
23512 GemmMicrokernelTester()
23513 .mr(1)
23514 .nr(8)
23515 .kr(1)
23516 .sr(1)
23517 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023518 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023519 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023520 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023521 }
23522 }
23523 }
23524
Frank Barchard0725b8d2020-12-07 11:07:35 -080023525 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023526 for (uint32_t n = 16; n <= 24; n += 8) {
23527 for (size_t k = 1; k <= 20; k += 5) {
23528 GemmMicrokernelTester()
23529 .mr(1)
23530 .nr(8)
23531 .kr(1)
23532 .sr(1)
23533 .m(1)
23534 .n(n)
23535 .k(k)
23536 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023537 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023538 }
23539 }
23540 }
23541
Frank Barchard0725b8d2020-12-07 11:07:35 -080023542 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023543 for (uint32_t n = 16; n <= 24; n += 8) {
23544 for (size_t k = 1; k <= 20; k += 5) {
23545 GemmMicrokernelTester()
23546 .mr(1)
23547 .nr(8)
23548 .kr(1)
23549 .sr(1)
23550 .m(1)
23551 .n(n)
23552 .k(k)
23553 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023554 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023555 }
23556 }
23557 }
23558
Frank Barchard0725b8d2020-12-07 11:07:35 -080023559 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023560 for (uint32_t n = 16; n <= 24; n += 8) {
23561 for (size_t k = 1; k <= 20; k += 5) {
23562 for (uint32_t m = 1; m <= 1; m++) {
23563 GemmMicrokernelTester()
23564 .mr(1)
23565 .nr(8)
23566 .kr(1)
23567 .sr(1)
23568 .m(m)
23569 .n(n)
23570 .k(k)
23571 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023572 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023573 }
23574 }
23575 }
23576 }
23577
Frank Barchard0725b8d2020-12-07 11:07:35 -080023578 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023579 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023580 for (uint32_t n = 1; n <= 8; n++) {
23581 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023582 GemmMicrokernelTester()
23583 .mr(1)
23584 .nr(8)
23585 .kr(1)
23586 .sr(1)
23587 .m(m)
23588 .n(n)
23589 .k(k)
23590 .cm_stride(11)
23591 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023592 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023593 }
23594 }
23595 }
23596 }
23597
Frank Barchard0725b8d2020-12-07 11:07:35 -080023598 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023599 GemmMicrokernelTester()
23600 .mr(1)
23601 .nr(8)
23602 .kr(1)
23603 .sr(1)
23604 .m(1)
23605 .n(8)
23606 .k(4)
23607 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023608 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023609 }
23610
Frank Barchard0725b8d2020-12-07 11:07:35 -080023611 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023612 GemmMicrokernelTester()
23613 .mr(1)
23614 .nr(8)
23615 .kr(1)
23616 .sr(1)
23617 .m(1)
23618 .n(8)
23619 .k(4)
23620 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023621 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023622 }
23623
Frank Barchard0725b8d2020-12-07 11:07:35 -080023624 TEST(F32_GEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023625 GemmMicrokernelTester()
23626 .mr(1)
23627 .nr(8)
23628 .kr(1)
23629 .sr(1)
23630 .m(1)
23631 .n(8)
23632 .k(4)
23633 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023634 .Test(xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023635 }
Marat Dukhan4c617792021-12-21 15:47:58 -080023636#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023637
23638
Marat Dukhan4c617792021-12-21 15:47:58 -080023639#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080023640 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023641 GemmMicrokernelTester()
23642 .mr(6)
23643 .nr(8)
23644 .kr(1)
23645 .sr(1)
23646 .m(6)
23647 .n(8)
23648 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023649 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023650 }
23651
Frank Barchard0725b8d2020-12-07 11:07:35 -080023652 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023653 GemmMicrokernelTester()
23654 .mr(6)
23655 .nr(8)
23656 .kr(1)
23657 .sr(1)
23658 .m(6)
23659 .n(8)
23660 .k(4)
23661 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023662 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023663 }
23664
Frank Barchard0725b8d2020-12-07 11:07:35 -080023665 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023666 GemmMicrokernelTester()
23667 .mr(6)
23668 .nr(8)
23669 .kr(1)
23670 .sr(1)
23671 .m(6)
23672 .n(8)
23673 .k(4)
23674 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023675 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023676 }
23677
Frank Barchard0725b8d2020-12-07 11:07:35 -080023678 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023679 for (uint32_t n = 1; n <= 8; n++) {
23680 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023681 GemmMicrokernelTester()
23682 .mr(6)
23683 .nr(8)
23684 .kr(1)
23685 .sr(1)
23686 .m(m)
23687 .n(n)
23688 .k(4)
23689 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023690 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023691 }
23692 }
23693 }
23694
Frank Barchard0725b8d2020-12-07 11:07:35 -080023695 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023696 for (uint32_t m = 1; m <= 6; m++) {
23697 GemmMicrokernelTester()
23698 .mr(6)
23699 .nr(8)
23700 .kr(1)
23701 .sr(1)
23702 .m(m)
23703 .n(8)
23704 .k(4)
23705 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023706 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023707 }
23708 }
23709
Frank Barchard0725b8d2020-12-07 11:07:35 -080023710 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023711 for (uint32_t n = 1; n <= 8; n++) {
23712 GemmMicrokernelTester()
23713 .mr(6)
23714 .nr(8)
23715 .kr(1)
23716 .sr(1)
23717 .m(6)
23718 .n(n)
23719 .k(4)
23720 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023721 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023722 }
23723 }
23724
Frank Barchard0725b8d2020-12-07 11:07:35 -080023725 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023726 for (size_t k = 1; k < 4; k++) {
23727 GemmMicrokernelTester()
23728 .mr(6)
23729 .nr(8)
23730 .kr(1)
23731 .sr(1)
23732 .m(6)
23733 .n(8)
23734 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023735 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023736 }
23737 }
23738
Frank Barchard0725b8d2020-12-07 11:07:35 -080023739 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023740 for (size_t k = 1; k < 4; k++) {
23741 GemmMicrokernelTester()
23742 .mr(6)
23743 .nr(8)
23744 .kr(1)
23745 .sr(1)
23746 .m(6)
23747 .n(8)
23748 .k(k)
23749 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023750 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023751 }
23752 }
23753
Frank Barchard0725b8d2020-12-07 11:07:35 -080023754 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023755 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023756 for (uint32_t n = 1; n <= 8; n++) {
23757 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023758 GemmMicrokernelTester()
23759 .mr(6)
23760 .nr(8)
23761 .kr(1)
23762 .sr(1)
23763 .m(m)
23764 .n(n)
23765 .k(k)
23766 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023767 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023768 }
23769 }
23770 }
23771 }
23772
Frank Barchard0725b8d2020-12-07 11:07:35 -080023773 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023774 for (size_t k = 5; k < 8; k++) {
23775 GemmMicrokernelTester()
23776 .mr(6)
23777 .nr(8)
23778 .kr(1)
23779 .sr(1)
23780 .m(6)
23781 .n(8)
23782 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023783 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023784 }
23785 }
23786
Frank Barchard0725b8d2020-12-07 11:07:35 -080023787 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023788 for (size_t k = 5; k < 8; k++) {
23789 GemmMicrokernelTester()
23790 .mr(6)
23791 .nr(8)
23792 .kr(1)
23793 .sr(1)
23794 .m(6)
23795 .n(8)
23796 .k(k)
23797 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023798 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023799 }
23800 }
23801
Frank Barchard0725b8d2020-12-07 11:07:35 -080023802 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023803 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023804 for (uint32_t n = 1; n <= 8; n++) {
23805 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023806 GemmMicrokernelTester()
23807 .mr(6)
23808 .nr(8)
23809 .kr(1)
23810 .sr(1)
23811 .m(m)
23812 .n(n)
23813 .k(k)
23814 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023815 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023816 }
23817 }
23818 }
23819 }
23820
Frank Barchard0725b8d2020-12-07 11:07:35 -080023821 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023822 for (size_t k = 8; k <= 40; k += 4) {
23823 GemmMicrokernelTester()
23824 .mr(6)
23825 .nr(8)
23826 .kr(1)
23827 .sr(1)
23828 .m(6)
23829 .n(8)
23830 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023831 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023832 }
23833 }
23834
Frank Barchard0725b8d2020-12-07 11:07:35 -080023835 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023836 for (size_t k = 8; k <= 40; k += 4) {
23837 GemmMicrokernelTester()
23838 .mr(6)
23839 .nr(8)
23840 .kr(1)
23841 .sr(1)
23842 .m(6)
23843 .n(8)
23844 .k(k)
23845 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023846 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023847 }
23848 }
23849
Frank Barchard0725b8d2020-12-07 11:07:35 -080023850 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023851 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023852 for (uint32_t n = 1; n <= 8; n++) {
23853 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023854 GemmMicrokernelTester()
23855 .mr(6)
23856 .nr(8)
23857 .kr(1)
23858 .sr(1)
23859 .m(m)
23860 .n(n)
23861 .k(k)
23862 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023863 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023864 }
23865 }
23866 }
23867 }
23868
Frank Barchard0725b8d2020-12-07 11:07:35 -080023869 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023870 for (uint32_t n = 9; n < 16; n++) {
23871 for (size_t k = 1; k <= 20; k += 5) {
23872 GemmMicrokernelTester()
23873 .mr(6)
23874 .nr(8)
23875 .kr(1)
23876 .sr(1)
23877 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023878 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023879 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023880 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023881 }
23882 }
23883 }
23884
Frank Barchard0725b8d2020-12-07 11:07:35 -080023885 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023886 for (uint32_t n = 9; n < 16; n++) {
23887 for (size_t k = 1; k <= 20; k += 5) {
23888 GemmMicrokernelTester()
23889 .mr(6)
23890 .nr(8)
23891 .kr(1)
23892 .sr(1)
23893 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023894 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023895 .k(k)
23896 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023897 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023898 }
23899 }
23900 }
23901
Frank Barchard0725b8d2020-12-07 11:07:35 -080023902 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023903 for (uint32_t n = 9; n < 16; n++) {
23904 for (size_t k = 1; k <= 20; k += 5) {
23905 GemmMicrokernelTester()
23906 .mr(6)
23907 .nr(8)
23908 .kr(1)
23909 .sr(1)
23910 .m(6)
23911 .n(n)
23912 .k(k)
23913 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023914 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023915 }
23916 }
23917 }
23918
Frank Barchard0725b8d2020-12-07 11:07:35 -080023919 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023920 for (uint32_t n = 9; n < 16; n++) {
23921 for (size_t k = 1; k <= 20; k += 5) {
23922 for (uint32_t m = 1; m <= 6; m++) {
23923 GemmMicrokernelTester()
23924 .mr(6)
23925 .nr(8)
23926 .kr(1)
23927 .sr(1)
23928 .m(m)
23929 .n(n)
23930 .k(k)
23931 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023932 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023933 }
23934 }
23935 }
23936 }
23937
Frank Barchard0725b8d2020-12-07 11:07:35 -080023938 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023939 for (uint32_t n = 16; n <= 24; n += 8) {
23940 for (size_t k = 1; k <= 20; k += 5) {
23941 GemmMicrokernelTester()
23942 .mr(6)
23943 .nr(8)
23944 .kr(1)
23945 .sr(1)
23946 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023947 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023948 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023949 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023950 }
23951 }
23952 }
23953
Frank Barchard0725b8d2020-12-07 11:07:35 -080023954 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023955 for (uint32_t n = 16; n <= 24; n += 8) {
23956 for (size_t k = 1; k <= 20; k += 5) {
23957 GemmMicrokernelTester()
23958 .mr(6)
23959 .nr(8)
23960 .kr(1)
23961 .sr(1)
23962 .m(6)
23963 .n(n)
23964 .k(k)
23965 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023966 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023967 }
23968 }
23969 }
23970
Frank Barchard0725b8d2020-12-07 11:07:35 -080023971 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023972 for (uint32_t n = 16; n <= 24; n += 8) {
23973 for (size_t k = 1; k <= 20; k += 5) {
23974 GemmMicrokernelTester()
23975 .mr(6)
23976 .nr(8)
23977 .kr(1)
23978 .sr(1)
23979 .m(6)
23980 .n(n)
23981 .k(k)
23982 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023983 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023984 }
23985 }
23986 }
23987
Frank Barchard0725b8d2020-12-07 11:07:35 -080023988 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023989 for (uint32_t n = 16; n <= 24; n += 8) {
23990 for (size_t k = 1; k <= 20; k += 5) {
23991 for (uint32_t m = 1; m <= 6; m++) {
23992 GemmMicrokernelTester()
23993 .mr(6)
23994 .nr(8)
23995 .kr(1)
23996 .sr(1)
23997 .m(m)
23998 .n(n)
23999 .k(k)
24000 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024001 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024002 }
24003 }
24004 }
24005 }
24006
Frank Barchard0725b8d2020-12-07 11:07:35 -080024007 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024008 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024009 for (uint32_t n = 1; n <= 8; n++) {
24010 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024011 GemmMicrokernelTester()
24012 .mr(6)
24013 .nr(8)
24014 .kr(1)
24015 .sr(1)
24016 .m(m)
24017 .n(n)
24018 .k(k)
24019 .cm_stride(11)
24020 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024021 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024022 }
24023 }
24024 }
24025 }
24026
Frank Barchard0725b8d2020-12-07 11:07:35 -080024027 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024028 GemmMicrokernelTester()
24029 .mr(6)
24030 .nr(8)
24031 .kr(1)
24032 .sr(1)
24033 .m(6)
24034 .n(8)
24035 .k(4)
24036 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024037 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024038 }
24039
Frank Barchard0725b8d2020-12-07 11:07:35 -080024040 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024041 GemmMicrokernelTester()
24042 .mr(6)
24043 .nr(8)
24044 .kr(1)
24045 .sr(1)
24046 .m(6)
24047 .n(8)
24048 .k(4)
24049 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024050 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024051 }
24052
Frank Barchard0725b8d2020-12-07 11:07:35 -080024053 TEST(F32_GEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024054 GemmMicrokernelTester()
24055 .mr(6)
24056 .nr(8)
24057 .kr(1)
24058 .sr(1)
24059 .m(6)
24060 .n(8)
24061 .k(4)
24062 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024063 .Test(xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024064 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024065#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024066
24067
Marat Dukhan4c617792021-12-21 15:47:58 -080024068#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024069 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4) {
24070 GemmMicrokernelTester()
24071 .mr(3)
24072 .nr(8)
24073 .kr(1)
24074 .sr(4)
24075 .m(3)
24076 .n(8)
24077 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024078 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024079 }
24080
24081 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cn) {
24082 GemmMicrokernelTester()
24083 .mr(3)
24084 .nr(8)
24085 .kr(1)
24086 .sr(4)
24087 .m(3)
24088 .n(8)
24089 .k(4)
24090 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024091 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024092 }
24093
24094 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
24095 GemmMicrokernelTester()
24096 .mr(3)
24097 .nr(8)
24098 .kr(1)
24099 .sr(4)
24100 .m(3)
24101 .n(8)
24102 .k(4)
24103 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024104 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024105 }
24106
24107 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024108 for (uint32_t n = 1; n <= 8; n++) {
24109 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024110 GemmMicrokernelTester()
24111 .mr(3)
24112 .nr(8)
24113 .kr(1)
24114 .sr(4)
24115 .m(m)
24116 .n(n)
24117 .k(4)
24118 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024119 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024120 }
24121 }
24122 }
24123
24124 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
24125 for (uint32_t m = 1; m <= 3; m++) {
24126 GemmMicrokernelTester()
24127 .mr(3)
24128 .nr(8)
24129 .kr(1)
24130 .sr(4)
24131 .m(m)
24132 .n(8)
24133 .k(4)
24134 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024135 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024136 }
24137 }
24138
24139 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
24140 for (uint32_t n = 1; n <= 8; n++) {
24141 GemmMicrokernelTester()
24142 .mr(3)
24143 .nr(8)
24144 .kr(1)
24145 .sr(4)
24146 .m(3)
24147 .n(n)
24148 .k(4)
24149 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024150 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024151 }
24152 }
24153
24154 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4) {
24155 for (size_t k = 1; k < 4; k++) {
24156 GemmMicrokernelTester()
24157 .mr(3)
24158 .nr(8)
24159 .kr(1)
24160 .sr(4)
24161 .m(3)
24162 .n(8)
24163 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024164 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024165 }
24166 }
24167
24168 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
24169 for (size_t k = 1; k < 4; k++) {
24170 GemmMicrokernelTester()
24171 .mr(3)
24172 .nr(8)
24173 .kr(1)
24174 .sr(4)
24175 .m(3)
24176 .n(8)
24177 .k(k)
24178 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024179 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024180 }
24181 }
24182
24183 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
24184 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024185 for (uint32_t n = 1; n <= 8; n++) {
24186 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024187 GemmMicrokernelTester()
24188 .mr(3)
24189 .nr(8)
24190 .kr(1)
24191 .sr(4)
24192 .m(m)
24193 .n(n)
24194 .k(k)
24195 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024196 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024197 }
24198 }
24199 }
24200 }
24201
24202 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4) {
24203 for (size_t k = 5; k < 8; k++) {
24204 GemmMicrokernelTester()
24205 .mr(3)
24206 .nr(8)
24207 .kr(1)
24208 .sr(4)
24209 .m(3)
24210 .n(8)
24211 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024212 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024213 }
24214 }
24215
24216 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
24217 for (size_t k = 5; k < 8; k++) {
24218 GemmMicrokernelTester()
24219 .mr(3)
24220 .nr(8)
24221 .kr(1)
24222 .sr(4)
24223 .m(3)
24224 .n(8)
24225 .k(k)
24226 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024227 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024228 }
24229 }
24230
24231 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
24232 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024233 for (uint32_t n = 1; n <= 8; n++) {
24234 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024235 GemmMicrokernelTester()
24236 .mr(3)
24237 .nr(8)
24238 .kr(1)
24239 .sr(4)
24240 .m(m)
24241 .n(n)
24242 .k(k)
24243 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024244 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024245 }
24246 }
24247 }
24248 }
24249
24250 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4) {
24251 for (size_t k = 8; k <= 40; k += 4) {
24252 GemmMicrokernelTester()
24253 .mr(3)
24254 .nr(8)
24255 .kr(1)
24256 .sr(4)
24257 .m(3)
24258 .n(8)
24259 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024260 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024261 }
24262 }
24263
24264 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
24265 for (size_t k = 8; k <= 40; k += 4) {
24266 GemmMicrokernelTester()
24267 .mr(3)
24268 .nr(8)
24269 .kr(1)
24270 .sr(4)
24271 .m(3)
24272 .n(8)
24273 .k(k)
24274 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024275 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024276 }
24277 }
24278
24279 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_subtile) {
24280 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024281 for (uint32_t n = 1; n <= 8; n++) {
24282 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024283 GemmMicrokernelTester()
24284 .mr(3)
24285 .nr(8)
24286 .kr(1)
24287 .sr(4)
24288 .m(m)
24289 .n(n)
24290 .k(k)
24291 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024292 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024293 }
24294 }
24295 }
24296 }
24297
24298 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8) {
24299 for (uint32_t n = 9; n < 16; n++) {
24300 for (size_t k = 1; k <= 20; k += 5) {
24301 GemmMicrokernelTester()
24302 .mr(3)
24303 .nr(8)
24304 .kr(1)
24305 .sr(4)
24306 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024307 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024308 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024309 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024310 }
24311 }
24312 }
24313
24314 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
24315 for (uint32_t n = 9; n < 16; n++) {
24316 for (size_t k = 1; k <= 20; k += 5) {
24317 GemmMicrokernelTester()
24318 .mr(3)
24319 .nr(8)
24320 .kr(1)
24321 .sr(4)
24322 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024323 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024324 .k(k)
24325 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024326 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024327 }
24328 }
24329 }
24330
24331 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
24332 for (uint32_t n = 9; n < 16; n++) {
24333 for (size_t k = 1; k <= 20; k += 5) {
24334 GemmMicrokernelTester()
24335 .mr(3)
24336 .nr(8)
24337 .kr(1)
24338 .sr(4)
24339 .m(3)
24340 .n(n)
24341 .k(k)
24342 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024343 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024344 }
24345 }
24346 }
24347
24348 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
24349 for (uint32_t n = 9; n < 16; n++) {
24350 for (size_t k = 1; k <= 20; k += 5) {
24351 for (uint32_t m = 1; m <= 3; m++) {
24352 GemmMicrokernelTester()
24353 .mr(3)
24354 .nr(8)
24355 .kr(1)
24356 .sr(4)
24357 .m(m)
24358 .n(n)
24359 .k(k)
24360 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024361 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024362 }
24363 }
24364 }
24365 }
24366
24367 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8) {
24368 for (uint32_t n = 16; n <= 24; n += 8) {
24369 for (size_t k = 1; k <= 20; k += 5) {
24370 GemmMicrokernelTester()
24371 .mr(3)
24372 .nr(8)
24373 .kr(1)
24374 .sr(4)
24375 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024376 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024377 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024378 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024379 }
24380 }
24381 }
24382
24383 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
24384 for (uint32_t n = 16; n <= 24; n += 8) {
24385 for (size_t k = 1; k <= 20; k += 5) {
24386 GemmMicrokernelTester()
24387 .mr(3)
24388 .nr(8)
24389 .kr(1)
24390 .sr(4)
24391 .m(3)
24392 .n(n)
24393 .k(k)
24394 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024395 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024396 }
24397 }
24398 }
24399
24400 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
24401 for (uint32_t n = 16; n <= 24; n += 8) {
24402 for (size_t k = 1; k <= 20; k += 5) {
24403 GemmMicrokernelTester()
24404 .mr(3)
24405 .nr(8)
24406 .kr(1)
24407 .sr(4)
24408 .m(3)
24409 .n(n)
24410 .k(k)
24411 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024412 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024413 }
24414 }
24415 }
24416
24417 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_subtile) {
24418 for (uint32_t n = 16; n <= 24; n += 8) {
24419 for (size_t k = 1; k <= 20; k += 5) {
24420 for (uint32_t m = 1; m <= 3; m++) {
24421 GemmMicrokernelTester()
24422 .mr(3)
24423 .nr(8)
24424 .kr(1)
24425 .sr(4)
24426 .m(m)
24427 .n(n)
24428 .k(k)
24429 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024430 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024431 }
24432 }
24433 }
24434 }
24435
24436 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm_subtile) {
24437 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024438 for (uint32_t n = 1; n <= 8; n++) {
24439 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024440 GemmMicrokernelTester()
24441 .mr(3)
24442 .nr(8)
24443 .kr(1)
24444 .sr(4)
24445 .m(m)
24446 .n(n)
24447 .k(k)
24448 .cm_stride(11)
24449 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024450 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024451 }
24452 }
24453 }
24454 }
24455
24456 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, qmin) {
24457 GemmMicrokernelTester()
24458 .mr(3)
24459 .nr(8)
24460 .kr(1)
24461 .sr(4)
24462 .m(3)
24463 .n(8)
24464 .k(4)
24465 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024466 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024467 }
24468
24469 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, qmax) {
24470 GemmMicrokernelTester()
24471 .mr(3)
24472 .nr(8)
24473 .kr(1)
24474 .sr(4)
24475 .m(3)
24476 .n(8)
24477 .k(4)
24478 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024479 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024480 }
24481
24482 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm) {
24483 GemmMicrokernelTester()
24484 .mr(3)
24485 .nr(8)
24486 .kr(1)
24487 .sr(4)
24488 .m(3)
24489 .n(8)
24490 .k(4)
24491 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024492 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024493 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024494#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024495
24496
Marat Dukhan4c617792021-12-21 15:47:58 -080024497#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024498 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4) {
24499 GemmMicrokernelTester()
24500 .mr(6)
24501 .nr(8)
24502 .kr(1)
24503 .sr(4)
24504 .m(6)
24505 .n(8)
24506 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024507 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024508 }
24509
24510 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cn) {
24511 GemmMicrokernelTester()
24512 .mr(6)
24513 .nr(8)
24514 .kr(1)
24515 .sr(4)
24516 .m(6)
24517 .n(8)
24518 .k(4)
24519 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024520 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024521 }
24522
24523 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
24524 GemmMicrokernelTester()
24525 .mr(6)
24526 .nr(8)
24527 .kr(1)
24528 .sr(4)
24529 .m(6)
24530 .n(8)
24531 .k(4)
24532 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024533 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024534 }
24535
24536 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024537 for (uint32_t n = 1; n <= 8; n++) {
24538 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024539 GemmMicrokernelTester()
24540 .mr(6)
24541 .nr(8)
24542 .kr(1)
24543 .sr(4)
24544 .m(m)
24545 .n(n)
24546 .k(4)
24547 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024548 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024549 }
24550 }
24551 }
24552
24553 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
24554 for (uint32_t m = 1; m <= 6; m++) {
24555 GemmMicrokernelTester()
24556 .mr(6)
24557 .nr(8)
24558 .kr(1)
24559 .sr(4)
24560 .m(m)
24561 .n(8)
24562 .k(4)
24563 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024564 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024565 }
24566 }
24567
24568 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
24569 for (uint32_t n = 1; n <= 8; n++) {
24570 GemmMicrokernelTester()
24571 .mr(6)
24572 .nr(8)
24573 .kr(1)
24574 .sr(4)
24575 .m(6)
24576 .n(n)
24577 .k(4)
24578 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024579 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024580 }
24581 }
24582
24583 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4) {
24584 for (size_t k = 1; k < 4; k++) {
24585 GemmMicrokernelTester()
24586 .mr(6)
24587 .nr(8)
24588 .kr(1)
24589 .sr(4)
24590 .m(6)
24591 .n(8)
24592 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024593 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024594 }
24595 }
24596
24597 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
24598 for (size_t k = 1; k < 4; k++) {
24599 GemmMicrokernelTester()
24600 .mr(6)
24601 .nr(8)
24602 .kr(1)
24603 .sr(4)
24604 .m(6)
24605 .n(8)
24606 .k(k)
24607 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024608 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024609 }
24610 }
24611
24612 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
24613 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024614 for (uint32_t n = 1; n <= 8; n++) {
24615 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024616 GemmMicrokernelTester()
24617 .mr(6)
24618 .nr(8)
24619 .kr(1)
24620 .sr(4)
24621 .m(m)
24622 .n(n)
24623 .k(k)
24624 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024625 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024626 }
24627 }
24628 }
24629 }
24630
24631 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4) {
24632 for (size_t k = 5; k < 8; k++) {
24633 GemmMicrokernelTester()
24634 .mr(6)
24635 .nr(8)
24636 .kr(1)
24637 .sr(4)
24638 .m(6)
24639 .n(8)
24640 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024641 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024642 }
24643 }
24644
24645 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
24646 for (size_t k = 5; k < 8; k++) {
24647 GemmMicrokernelTester()
24648 .mr(6)
24649 .nr(8)
24650 .kr(1)
24651 .sr(4)
24652 .m(6)
24653 .n(8)
24654 .k(k)
24655 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024656 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024657 }
24658 }
24659
24660 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
24661 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024662 for (uint32_t n = 1; n <= 8; n++) {
24663 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024664 GemmMicrokernelTester()
24665 .mr(6)
24666 .nr(8)
24667 .kr(1)
24668 .sr(4)
24669 .m(m)
24670 .n(n)
24671 .k(k)
24672 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024673 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024674 }
24675 }
24676 }
24677 }
24678
24679 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4) {
24680 for (size_t k = 8; k <= 40; k += 4) {
24681 GemmMicrokernelTester()
24682 .mr(6)
24683 .nr(8)
24684 .kr(1)
24685 .sr(4)
24686 .m(6)
24687 .n(8)
24688 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024689 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024690 }
24691 }
24692
24693 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
24694 for (size_t k = 8; k <= 40; k += 4) {
24695 GemmMicrokernelTester()
24696 .mr(6)
24697 .nr(8)
24698 .kr(1)
24699 .sr(4)
24700 .m(6)
24701 .n(8)
24702 .k(k)
24703 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024704 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024705 }
24706 }
24707
24708 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_subtile) {
24709 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024710 for (uint32_t n = 1; n <= 8; n++) {
24711 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024712 GemmMicrokernelTester()
24713 .mr(6)
24714 .nr(8)
24715 .kr(1)
24716 .sr(4)
24717 .m(m)
24718 .n(n)
24719 .k(k)
24720 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024721 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024722 }
24723 }
24724 }
24725 }
24726
24727 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8) {
24728 for (uint32_t n = 9; n < 16; n++) {
24729 for (size_t k = 1; k <= 20; k += 5) {
24730 GemmMicrokernelTester()
24731 .mr(6)
24732 .nr(8)
24733 .kr(1)
24734 .sr(4)
24735 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024736 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024737 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024738 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024739 }
24740 }
24741 }
24742
24743 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
24744 for (uint32_t n = 9; n < 16; n++) {
24745 for (size_t k = 1; k <= 20; k += 5) {
24746 GemmMicrokernelTester()
24747 .mr(6)
24748 .nr(8)
24749 .kr(1)
24750 .sr(4)
24751 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024752 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024753 .k(k)
24754 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024755 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024756 }
24757 }
24758 }
24759
24760 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
24761 for (uint32_t n = 9; n < 16; n++) {
24762 for (size_t k = 1; k <= 20; k += 5) {
24763 GemmMicrokernelTester()
24764 .mr(6)
24765 .nr(8)
24766 .kr(1)
24767 .sr(4)
24768 .m(6)
24769 .n(n)
24770 .k(k)
24771 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024772 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024773 }
24774 }
24775 }
24776
24777 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
24778 for (uint32_t n = 9; n < 16; n++) {
24779 for (size_t k = 1; k <= 20; k += 5) {
24780 for (uint32_t m = 1; m <= 6; m++) {
24781 GemmMicrokernelTester()
24782 .mr(6)
24783 .nr(8)
24784 .kr(1)
24785 .sr(4)
24786 .m(m)
24787 .n(n)
24788 .k(k)
24789 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024790 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024791 }
24792 }
24793 }
24794 }
24795
24796 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8) {
24797 for (uint32_t n = 16; n <= 24; n += 8) {
24798 for (size_t k = 1; k <= 20; k += 5) {
24799 GemmMicrokernelTester()
24800 .mr(6)
24801 .nr(8)
24802 .kr(1)
24803 .sr(4)
24804 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024805 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024806 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024807 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024808 }
24809 }
24810 }
24811
24812 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
24813 for (uint32_t n = 16; n <= 24; n += 8) {
24814 for (size_t k = 1; k <= 20; k += 5) {
24815 GemmMicrokernelTester()
24816 .mr(6)
24817 .nr(8)
24818 .kr(1)
24819 .sr(4)
24820 .m(6)
24821 .n(n)
24822 .k(k)
24823 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024824 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024825 }
24826 }
24827 }
24828
24829 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
24830 for (uint32_t n = 16; n <= 24; n += 8) {
24831 for (size_t k = 1; k <= 20; k += 5) {
24832 GemmMicrokernelTester()
24833 .mr(6)
24834 .nr(8)
24835 .kr(1)
24836 .sr(4)
24837 .m(6)
24838 .n(n)
24839 .k(k)
24840 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024841 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024842 }
24843 }
24844 }
24845
24846 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_subtile) {
24847 for (uint32_t n = 16; n <= 24; n += 8) {
24848 for (size_t k = 1; k <= 20; k += 5) {
24849 for (uint32_t m = 1; m <= 6; m++) {
24850 GemmMicrokernelTester()
24851 .mr(6)
24852 .nr(8)
24853 .kr(1)
24854 .sr(4)
24855 .m(m)
24856 .n(n)
24857 .k(k)
24858 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024859 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024860 }
24861 }
24862 }
24863 }
24864
24865 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm_subtile) {
24866 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024867 for (uint32_t n = 1; n <= 8; n++) {
24868 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024869 GemmMicrokernelTester()
24870 .mr(6)
24871 .nr(8)
24872 .kr(1)
24873 .sr(4)
24874 .m(m)
24875 .n(n)
24876 .k(k)
24877 .cm_stride(11)
24878 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024879 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024880 }
24881 }
24882 }
24883 }
24884
24885 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, qmin) {
24886 GemmMicrokernelTester()
24887 .mr(6)
24888 .nr(8)
24889 .kr(1)
24890 .sr(4)
24891 .m(6)
24892 .n(8)
24893 .k(4)
24894 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024895 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024896 }
24897
24898 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, qmax) {
24899 GemmMicrokernelTester()
24900 .mr(6)
24901 .nr(8)
24902 .kr(1)
24903 .sr(4)
24904 .m(6)
24905 .n(8)
24906 .k(4)
24907 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024908 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024909 }
24910
24911 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm) {
24912 GemmMicrokernelTester()
24913 .mr(6)
24914 .nr(8)
24915 .kr(1)
24916 .sr(4)
24917 .m(6)
24918 .n(8)
24919 .k(4)
24920 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024921 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024922 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024923#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024924
24925
Marat Dukhan4c617792021-12-21 15:47:58 -080024926#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024927 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4) {
24928 GemmMicrokernelTester()
24929 .mr(3)
24930 .nr(8)
24931 .kr(1)
24932 .sr(4)
24933 .m(3)
24934 .n(8)
24935 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024936 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024937 }
24938
24939 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cn) {
24940 GemmMicrokernelTester()
24941 .mr(3)
24942 .nr(8)
24943 .kr(1)
24944 .sr(4)
24945 .m(3)
24946 .n(8)
24947 .k(4)
24948 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024949 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024950 }
24951
24952 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
24953 GemmMicrokernelTester()
24954 .mr(3)
24955 .nr(8)
24956 .kr(1)
24957 .sr(4)
24958 .m(3)
24959 .n(8)
24960 .k(4)
24961 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024962 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024963 }
24964
24965 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024966 for (uint32_t n = 1; n <= 8; n++) {
24967 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024968 GemmMicrokernelTester()
24969 .mr(3)
24970 .nr(8)
24971 .kr(1)
24972 .sr(4)
24973 .m(m)
24974 .n(n)
24975 .k(4)
24976 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024977 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024978 }
24979 }
24980 }
24981
24982 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
24983 for (uint32_t m = 1; m <= 3; m++) {
24984 GemmMicrokernelTester()
24985 .mr(3)
24986 .nr(8)
24987 .kr(1)
24988 .sr(4)
24989 .m(m)
24990 .n(8)
24991 .k(4)
24992 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024993 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024994 }
24995 }
24996
24997 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
24998 for (uint32_t n = 1; n <= 8; n++) {
24999 GemmMicrokernelTester()
25000 .mr(3)
25001 .nr(8)
25002 .kr(1)
25003 .sr(4)
25004 .m(3)
25005 .n(n)
25006 .k(4)
25007 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025008 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025009 }
25010 }
25011
25012 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4) {
25013 for (size_t k = 1; k < 4; k++) {
25014 GemmMicrokernelTester()
25015 .mr(3)
25016 .nr(8)
25017 .kr(1)
25018 .sr(4)
25019 .m(3)
25020 .n(8)
25021 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025022 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025023 }
25024 }
25025
25026 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
25027 for (size_t k = 1; k < 4; k++) {
25028 GemmMicrokernelTester()
25029 .mr(3)
25030 .nr(8)
25031 .kr(1)
25032 .sr(4)
25033 .m(3)
25034 .n(8)
25035 .k(k)
25036 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025037 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025038 }
25039 }
25040
25041 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_subtile) {
25042 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025043 for (uint32_t n = 1; n <= 8; n++) {
25044 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025045 GemmMicrokernelTester()
25046 .mr(3)
25047 .nr(8)
25048 .kr(1)
25049 .sr(4)
25050 .m(m)
25051 .n(n)
25052 .k(k)
25053 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025054 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025055 }
25056 }
25057 }
25058 }
25059
25060 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4) {
25061 for (size_t k = 5; k < 8; k++) {
25062 GemmMicrokernelTester()
25063 .mr(3)
25064 .nr(8)
25065 .kr(1)
25066 .sr(4)
25067 .m(3)
25068 .n(8)
25069 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025070 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025071 }
25072 }
25073
25074 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
25075 for (size_t k = 5; k < 8; k++) {
25076 GemmMicrokernelTester()
25077 .mr(3)
25078 .nr(8)
25079 .kr(1)
25080 .sr(4)
25081 .m(3)
25082 .n(8)
25083 .k(k)
25084 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025085 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025086 }
25087 }
25088
25089 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_subtile) {
25090 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025091 for (uint32_t n = 1; n <= 8; n++) {
25092 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025093 GemmMicrokernelTester()
25094 .mr(3)
25095 .nr(8)
25096 .kr(1)
25097 .sr(4)
25098 .m(m)
25099 .n(n)
25100 .k(k)
25101 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025102 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025103 }
25104 }
25105 }
25106 }
25107
25108 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4) {
25109 for (size_t k = 8; k <= 40; k += 4) {
25110 GemmMicrokernelTester()
25111 .mr(3)
25112 .nr(8)
25113 .kr(1)
25114 .sr(4)
25115 .m(3)
25116 .n(8)
25117 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025118 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025119 }
25120 }
25121
25122 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_strided_a) {
25123 for (size_t k = 8; k <= 40; k += 4) {
25124 GemmMicrokernelTester()
25125 .mr(3)
25126 .nr(8)
25127 .kr(1)
25128 .sr(4)
25129 .m(3)
25130 .n(8)
25131 .k(k)
25132 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025133 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025134 }
25135 }
25136
25137 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_subtile) {
25138 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025139 for (uint32_t n = 1; n <= 8; n++) {
25140 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025141 GemmMicrokernelTester()
25142 .mr(3)
25143 .nr(8)
25144 .kr(1)
25145 .sr(4)
25146 .m(m)
25147 .n(n)
25148 .k(k)
25149 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025150 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025151 }
25152 }
25153 }
25154 }
25155
25156 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8) {
25157 for (uint32_t n = 9; n < 16; n++) {
25158 for (size_t k = 1; k <= 20; k += 5) {
25159 GemmMicrokernelTester()
25160 .mr(3)
25161 .nr(8)
25162 .kr(1)
25163 .sr(4)
25164 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025165 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025166 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025167 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025168 }
25169 }
25170 }
25171
25172 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
25173 for (uint32_t n = 9; n < 16; n++) {
25174 for (size_t k = 1; k <= 20; k += 5) {
25175 GemmMicrokernelTester()
25176 .mr(3)
25177 .nr(8)
25178 .kr(1)
25179 .sr(4)
25180 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025181 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025182 .k(k)
25183 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025184 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025185 }
25186 }
25187 }
25188
25189 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
25190 for (uint32_t n = 9; n < 16; n++) {
25191 for (size_t k = 1; k <= 20; k += 5) {
25192 GemmMicrokernelTester()
25193 .mr(3)
25194 .nr(8)
25195 .kr(1)
25196 .sr(4)
25197 .m(3)
25198 .n(n)
25199 .k(k)
25200 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025201 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025202 }
25203 }
25204 }
25205
25206 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_subtile) {
25207 for (uint32_t n = 9; n < 16; n++) {
25208 for (size_t k = 1; k <= 20; k += 5) {
25209 for (uint32_t m = 1; m <= 3; m++) {
25210 GemmMicrokernelTester()
25211 .mr(3)
25212 .nr(8)
25213 .kr(1)
25214 .sr(4)
25215 .m(m)
25216 .n(n)
25217 .k(k)
25218 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025219 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025220 }
25221 }
25222 }
25223 }
25224
25225 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8) {
25226 for (uint32_t n = 16; n <= 24; n += 8) {
25227 for (size_t k = 1; k <= 20; k += 5) {
25228 GemmMicrokernelTester()
25229 .mr(3)
25230 .nr(8)
25231 .kr(1)
25232 .sr(4)
25233 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025234 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025235 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025236 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025237 }
25238 }
25239 }
25240
25241 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
25242 for (uint32_t n = 16; n <= 24; n += 8) {
25243 for (size_t k = 1; k <= 20; k += 5) {
25244 GemmMicrokernelTester()
25245 .mr(3)
25246 .nr(8)
25247 .kr(1)
25248 .sr(4)
25249 .m(3)
25250 .n(n)
25251 .k(k)
25252 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025253 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025254 }
25255 }
25256 }
25257
25258 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_a) {
25259 for (uint32_t n = 16; n <= 24; n += 8) {
25260 for (size_t k = 1; k <= 20; k += 5) {
25261 GemmMicrokernelTester()
25262 .mr(3)
25263 .nr(8)
25264 .kr(1)
25265 .sr(4)
25266 .m(3)
25267 .n(n)
25268 .k(k)
25269 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025270 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025271 }
25272 }
25273 }
25274
25275 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_subtile) {
25276 for (uint32_t n = 16; n <= 24; n += 8) {
25277 for (size_t k = 1; k <= 20; k += 5) {
25278 for (uint32_t m = 1; m <= 3; m++) {
25279 GemmMicrokernelTester()
25280 .mr(3)
25281 .nr(8)
25282 .kr(1)
25283 .sr(4)
25284 .m(m)
25285 .n(n)
25286 .k(k)
25287 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025288 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025289 }
25290 }
25291 }
25292 }
25293
25294 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cm_subtile) {
25295 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025296 for (uint32_t n = 1; n <= 8; n++) {
25297 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025298 GemmMicrokernelTester()
25299 .mr(3)
25300 .nr(8)
25301 .kr(1)
25302 .sr(4)
25303 .m(m)
25304 .n(n)
25305 .k(k)
25306 .cm_stride(11)
25307 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025308 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025309 }
25310 }
25311 }
25312 }
25313
25314 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, qmin) {
25315 GemmMicrokernelTester()
25316 .mr(3)
25317 .nr(8)
25318 .kr(1)
25319 .sr(4)
25320 .m(3)
25321 .n(8)
25322 .k(4)
25323 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025324 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025325 }
25326
25327 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, qmax) {
25328 GemmMicrokernelTester()
25329 .mr(3)
25330 .nr(8)
25331 .kr(1)
25332 .sr(4)
25333 .m(3)
25334 .n(8)
25335 .k(4)
25336 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025337 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025338 }
25339
25340 TEST(F32_GEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cm) {
25341 GemmMicrokernelTester()
25342 .mr(3)
25343 .nr(8)
25344 .kr(1)
25345 .sr(4)
25346 .m(3)
25347 .n(8)
25348 .k(4)
25349 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025350 .Test(xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025351 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025352#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025353
25354
Marat Dukhan4c617792021-12-21 15:47:58 -080025355#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025356 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4) {
25357 GemmMicrokernelTester()
25358 .mr(6)
25359 .nr(8)
25360 .kr(1)
25361 .sr(4)
25362 .m(6)
25363 .n(8)
25364 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025365 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025366 }
25367
25368 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cn) {
25369 GemmMicrokernelTester()
25370 .mr(6)
25371 .nr(8)
25372 .kr(1)
25373 .sr(4)
25374 .m(6)
25375 .n(8)
25376 .k(4)
25377 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025378 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025379 }
25380
25381 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
25382 GemmMicrokernelTester()
25383 .mr(6)
25384 .nr(8)
25385 .kr(1)
25386 .sr(4)
25387 .m(6)
25388 .n(8)
25389 .k(4)
25390 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025391 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025392 }
25393
25394 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025395 for (uint32_t n = 1; n <= 8; n++) {
25396 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025397 GemmMicrokernelTester()
25398 .mr(6)
25399 .nr(8)
25400 .kr(1)
25401 .sr(4)
25402 .m(m)
25403 .n(n)
25404 .k(4)
25405 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025406 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025407 }
25408 }
25409 }
25410
25411 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
25412 for (uint32_t m = 1; m <= 6; m++) {
25413 GemmMicrokernelTester()
25414 .mr(6)
25415 .nr(8)
25416 .kr(1)
25417 .sr(4)
25418 .m(m)
25419 .n(8)
25420 .k(4)
25421 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025422 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025423 }
25424 }
25425
25426 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
25427 for (uint32_t n = 1; n <= 8; n++) {
25428 GemmMicrokernelTester()
25429 .mr(6)
25430 .nr(8)
25431 .kr(1)
25432 .sr(4)
25433 .m(6)
25434 .n(n)
25435 .k(4)
25436 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025437 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025438 }
25439 }
25440
25441 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4) {
25442 for (size_t k = 1; k < 4; k++) {
25443 GemmMicrokernelTester()
25444 .mr(6)
25445 .nr(8)
25446 .kr(1)
25447 .sr(4)
25448 .m(6)
25449 .n(8)
25450 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025451 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025452 }
25453 }
25454
25455 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
25456 for (size_t k = 1; k < 4; k++) {
25457 GemmMicrokernelTester()
25458 .mr(6)
25459 .nr(8)
25460 .kr(1)
25461 .sr(4)
25462 .m(6)
25463 .n(8)
25464 .k(k)
25465 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025466 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025467 }
25468 }
25469
25470 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_subtile) {
25471 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025472 for (uint32_t n = 1; n <= 8; n++) {
25473 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025474 GemmMicrokernelTester()
25475 .mr(6)
25476 .nr(8)
25477 .kr(1)
25478 .sr(4)
25479 .m(m)
25480 .n(n)
25481 .k(k)
25482 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025483 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025484 }
25485 }
25486 }
25487 }
25488
25489 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4) {
25490 for (size_t k = 5; k < 8; k++) {
25491 GemmMicrokernelTester()
25492 .mr(6)
25493 .nr(8)
25494 .kr(1)
25495 .sr(4)
25496 .m(6)
25497 .n(8)
25498 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025499 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025500 }
25501 }
25502
25503 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
25504 for (size_t k = 5; k < 8; k++) {
25505 GemmMicrokernelTester()
25506 .mr(6)
25507 .nr(8)
25508 .kr(1)
25509 .sr(4)
25510 .m(6)
25511 .n(8)
25512 .k(k)
25513 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025514 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025515 }
25516 }
25517
25518 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_subtile) {
25519 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025520 for (uint32_t n = 1; n <= 8; n++) {
25521 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025522 GemmMicrokernelTester()
25523 .mr(6)
25524 .nr(8)
25525 .kr(1)
25526 .sr(4)
25527 .m(m)
25528 .n(n)
25529 .k(k)
25530 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025531 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025532 }
25533 }
25534 }
25535 }
25536
25537 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4) {
25538 for (size_t k = 8; k <= 40; k += 4) {
25539 GemmMicrokernelTester()
25540 .mr(6)
25541 .nr(8)
25542 .kr(1)
25543 .sr(4)
25544 .m(6)
25545 .n(8)
25546 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025547 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025548 }
25549 }
25550
25551 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_strided_a) {
25552 for (size_t k = 8; k <= 40; k += 4) {
25553 GemmMicrokernelTester()
25554 .mr(6)
25555 .nr(8)
25556 .kr(1)
25557 .sr(4)
25558 .m(6)
25559 .n(8)
25560 .k(k)
25561 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025562 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025563 }
25564 }
25565
25566 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_subtile) {
25567 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025568 for (uint32_t n = 1; n <= 8; n++) {
25569 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025570 GemmMicrokernelTester()
25571 .mr(6)
25572 .nr(8)
25573 .kr(1)
25574 .sr(4)
25575 .m(m)
25576 .n(n)
25577 .k(k)
25578 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025579 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025580 }
25581 }
25582 }
25583 }
25584
25585 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8) {
25586 for (uint32_t n = 9; n < 16; n++) {
25587 for (size_t k = 1; k <= 20; k += 5) {
25588 GemmMicrokernelTester()
25589 .mr(6)
25590 .nr(8)
25591 .kr(1)
25592 .sr(4)
25593 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025594 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025595 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025596 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025597 }
25598 }
25599 }
25600
25601 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
25602 for (uint32_t n = 9; n < 16; n++) {
25603 for (size_t k = 1; k <= 20; k += 5) {
25604 GemmMicrokernelTester()
25605 .mr(6)
25606 .nr(8)
25607 .kr(1)
25608 .sr(4)
25609 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025610 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025611 .k(k)
25612 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025613 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025614 }
25615 }
25616 }
25617
25618 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
25619 for (uint32_t n = 9; n < 16; n++) {
25620 for (size_t k = 1; k <= 20; k += 5) {
25621 GemmMicrokernelTester()
25622 .mr(6)
25623 .nr(8)
25624 .kr(1)
25625 .sr(4)
25626 .m(6)
25627 .n(n)
25628 .k(k)
25629 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025630 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025631 }
25632 }
25633 }
25634
25635 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_subtile) {
25636 for (uint32_t n = 9; n < 16; n++) {
25637 for (size_t k = 1; k <= 20; k += 5) {
25638 for (uint32_t m = 1; m <= 6; m++) {
25639 GemmMicrokernelTester()
25640 .mr(6)
25641 .nr(8)
25642 .kr(1)
25643 .sr(4)
25644 .m(m)
25645 .n(n)
25646 .k(k)
25647 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025648 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025649 }
25650 }
25651 }
25652 }
25653
25654 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8) {
25655 for (uint32_t n = 16; n <= 24; n += 8) {
25656 for (size_t k = 1; k <= 20; k += 5) {
25657 GemmMicrokernelTester()
25658 .mr(6)
25659 .nr(8)
25660 .kr(1)
25661 .sr(4)
25662 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025663 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025664 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025665 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025666 }
25667 }
25668 }
25669
25670 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
25671 for (uint32_t n = 16; n <= 24; n += 8) {
25672 for (size_t k = 1; k <= 20; k += 5) {
25673 GemmMicrokernelTester()
25674 .mr(6)
25675 .nr(8)
25676 .kr(1)
25677 .sr(4)
25678 .m(6)
25679 .n(n)
25680 .k(k)
25681 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025682 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025683 }
25684 }
25685 }
25686
25687 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_a) {
25688 for (uint32_t n = 16; n <= 24; n += 8) {
25689 for (size_t k = 1; k <= 20; k += 5) {
25690 GemmMicrokernelTester()
25691 .mr(6)
25692 .nr(8)
25693 .kr(1)
25694 .sr(4)
25695 .m(6)
25696 .n(n)
25697 .k(k)
25698 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025699 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025700 }
25701 }
25702 }
25703
25704 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_subtile) {
25705 for (uint32_t n = 16; n <= 24; n += 8) {
25706 for (size_t k = 1; k <= 20; k += 5) {
25707 for (uint32_t m = 1; m <= 6; m++) {
25708 GemmMicrokernelTester()
25709 .mr(6)
25710 .nr(8)
25711 .kr(1)
25712 .sr(4)
25713 .m(m)
25714 .n(n)
25715 .k(k)
25716 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025717 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025718 }
25719 }
25720 }
25721 }
25722
25723 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cm_subtile) {
25724 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025725 for (uint32_t n = 1; n <= 8; n++) {
25726 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025727 GemmMicrokernelTester()
25728 .mr(6)
25729 .nr(8)
25730 .kr(1)
25731 .sr(4)
25732 .m(m)
25733 .n(n)
25734 .k(k)
25735 .cm_stride(11)
25736 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025737 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025738 }
25739 }
25740 }
25741 }
25742
25743 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, qmin) {
25744 GemmMicrokernelTester()
25745 .mr(6)
25746 .nr(8)
25747 .kr(1)
25748 .sr(4)
25749 .m(6)
25750 .n(8)
25751 .k(4)
25752 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025753 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025754 }
25755
25756 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, qmax) {
25757 GemmMicrokernelTester()
25758 .mr(6)
25759 .nr(8)
25760 .kr(1)
25761 .sr(4)
25762 .m(6)
25763 .n(8)
25764 .k(4)
25765 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025766 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025767 }
25768
25769 TEST(F32_GEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cm) {
25770 GemmMicrokernelTester()
25771 .mr(6)
25772 .nr(8)
25773 .kr(1)
25774 .sr(4)
25775 .m(6)
25776 .n(8)
25777 .k(4)
25778 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025779 .Test(xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025780 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025781#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025782
25783
Marat Dukhan4c617792021-12-21 15:47:58 -080025784#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhane39e6462020-07-09 01:33:36 -070025785 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4) {
25786 GemmMicrokernelTester()
25787 .mr(4)
25788 .nr(2)
25789 .kr(4)
25790 .sr(1)
25791 .m(4)
25792 .n(2)
25793 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025794 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025795 }
25796
25797 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cn) {
25798 GemmMicrokernelTester()
25799 .mr(4)
25800 .nr(2)
25801 .kr(4)
25802 .sr(1)
25803 .m(4)
25804 .n(2)
25805 .k(4)
25806 .cn_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025807 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025808 }
25809
25810 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_strided_a) {
25811 GemmMicrokernelTester()
25812 .mr(4)
25813 .nr(2)
25814 .kr(4)
25815 .sr(1)
25816 .m(4)
25817 .n(2)
25818 .k(4)
25819 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025820 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025821 }
25822
25823 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025824 for (uint32_t n = 1; n <= 2; n++) {
25825 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070025826 GemmMicrokernelTester()
25827 .mr(4)
25828 .nr(2)
25829 .kr(4)
25830 .sr(1)
25831 .m(m)
25832 .n(n)
25833 .k(4)
25834 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025835 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025836 }
25837 }
25838 }
25839
25840 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile_m) {
25841 for (uint32_t m = 1; m <= 4; m++) {
25842 GemmMicrokernelTester()
25843 .mr(4)
25844 .nr(2)
25845 .kr(4)
25846 .sr(1)
25847 .m(m)
25848 .n(2)
25849 .k(4)
25850 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025851 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025852 }
25853 }
25854
25855 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile_n) {
25856 for (uint32_t n = 1; n <= 2; n++) {
25857 GemmMicrokernelTester()
25858 .mr(4)
25859 .nr(2)
25860 .kr(4)
25861 .sr(1)
25862 .m(4)
25863 .n(n)
25864 .k(4)
25865 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025866 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025867 }
25868 }
25869
25870 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4) {
25871 for (size_t k = 1; k < 4; k++) {
25872 GemmMicrokernelTester()
25873 .mr(4)
25874 .nr(2)
25875 .kr(4)
25876 .sr(1)
25877 .m(4)
25878 .n(2)
25879 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025880 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025881 }
25882 }
25883
25884 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4_strided_a) {
25885 for (size_t k = 1; k < 4; k++) {
25886 GemmMicrokernelTester()
25887 .mr(4)
25888 .nr(2)
25889 .kr(4)
25890 .sr(1)
25891 .m(4)
25892 .n(2)
25893 .k(k)
25894 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025895 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025896 }
25897 }
25898
25899 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4_subtile) {
25900 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025901 for (uint32_t n = 1; n <= 2; n++) {
25902 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070025903 GemmMicrokernelTester()
25904 .mr(4)
25905 .nr(2)
25906 .kr(4)
25907 .sr(1)
25908 .m(m)
25909 .n(n)
25910 .k(k)
25911 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025912 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025913 }
25914 }
25915 }
25916 }
25917
25918 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4) {
25919 for (size_t k = 5; k < 8; k++) {
25920 GemmMicrokernelTester()
25921 .mr(4)
25922 .nr(2)
25923 .kr(4)
25924 .sr(1)
25925 .m(4)
25926 .n(2)
25927 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025928 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025929 }
25930 }
25931
25932 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4_strided_a) {
25933 for (size_t k = 5; k < 8; k++) {
25934 GemmMicrokernelTester()
25935 .mr(4)
25936 .nr(2)
25937 .kr(4)
25938 .sr(1)
25939 .m(4)
25940 .n(2)
25941 .k(k)
25942 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025943 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025944 }
25945 }
25946
25947 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4_subtile) {
25948 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025949 for (uint32_t n = 1; n <= 2; n++) {
25950 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070025951 GemmMicrokernelTester()
25952 .mr(4)
25953 .nr(2)
25954 .kr(4)
25955 .sr(1)
25956 .m(m)
25957 .n(n)
25958 .k(k)
25959 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025960 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025961 }
25962 }
25963 }
25964 }
25965
25966 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4) {
25967 for (size_t k = 8; k <= 40; k += 4) {
25968 GemmMicrokernelTester()
25969 .mr(4)
25970 .nr(2)
25971 .kr(4)
25972 .sr(1)
25973 .m(4)
25974 .n(2)
25975 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025976 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025977 }
25978 }
25979
25980 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4_strided_a) {
25981 for (size_t k = 8; k <= 40; k += 4) {
25982 GemmMicrokernelTester()
25983 .mr(4)
25984 .nr(2)
25985 .kr(4)
25986 .sr(1)
25987 .m(4)
25988 .n(2)
25989 .k(k)
25990 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025991 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070025992 }
25993 }
25994
25995 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4_subtile) {
25996 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025997 for (uint32_t n = 1; n <= 2; n++) {
25998 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070025999 GemmMicrokernelTester()
26000 .mr(4)
26001 .nr(2)
26002 .kr(4)
26003 .sr(1)
26004 .m(m)
26005 .n(n)
26006 .k(k)
26007 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026008 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026009 }
26010 }
26011 }
26012 }
26013
26014 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2) {
26015 for (uint32_t n = 3; n < 4; n++) {
26016 for (size_t k = 1; k <= 20; k += 5) {
26017 GemmMicrokernelTester()
26018 .mr(4)
26019 .nr(2)
26020 .kr(4)
26021 .sr(1)
26022 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026023 .n(n)
Marat Dukhane39e6462020-07-09 01:33:36 -070026024 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026025 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026026 }
26027 }
26028 }
26029
26030 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_strided_cn) {
26031 for (uint32_t n = 3; n < 4; n++) {
26032 for (size_t k = 1; k <= 20; k += 5) {
26033 GemmMicrokernelTester()
26034 .mr(4)
26035 .nr(2)
26036 .kr(4)
26037 .sr(1)
26038 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026039 .n(n)
Marat Dukhane39e6462020-07-09 01:33:36 -070026040 .k(k)
26041 .cn_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026042 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026043 }
26044 }
26045 }
26046
26047 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_strided_a) {
26048 for (uint32_t n = 3; n < 4; n++) {
26049 for (size_t k = 1; k <= 20; k += 5) {
26050 GemmMicrokernelTester()
26051 .mr(4)
26052 .nr(2)
26053 .kr(4)
26054 .sr(1)
26055 .m(4)
26056 .n(n)
26057 .k(k)
26058 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026059 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026060 }
26061 }
26062 }
26063
26064 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_subtile) {
26065 for (uint32_t n = 3; n < 4; n++) {
26066 for (size_t k = 1; k <= 20; k += 5) {
26067 for (uint32_t m = 1; m <= 4; m++) {
26068 GemmMicrokernelTester()
26069 .mr(4)
26070 .nr(2)
26071 .kr(4)
26072 .sr(1)
26073 .m(m)
26074 .n(n)
26075 .k(k)
26076 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026077 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026078 }
26079 }
26080 }
26081 }
26082
26083 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2) {
26084 for (uint32_t n = 4; n <= 6; n += 2) {
26085 for (size_t k = 1; k <= 20; k += 5) {
26086 GemmMicrokernelTester()
26087 .mr(4)
26088 .nr(2)
26089 .kr(4)
26090 .sr(1)
26091 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026092 .n(n)
Marat Dukhane39e6462020-07-09 01:33:36 -070026093 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026094 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026095 }
26096 }
26097 }
26098
26099 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_strided_cn) {
26100 for (uint32_t n = 4; n <= 6; n += 2) {
26101 for (size_t k = 1; k <= 20; k += 5) {
26102 GemmMicrokernelTester()
26103 .mr(4)
26104 .nr(2)
26105 .kr(4)
26106 .sr(1)
26107 .m(4)
26108 .n(n)
26109 .k(k)
26110 .cn_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026111 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026112 }
26113 }
26114 }
26115
26116 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_strided_a) {
26117 for (uint32_t n = 4; n <= 6; n += 2) {
26118 for (size_t k = 1; k <= 20; k += 5) {
26119 GemmMicrokernelTester()
26120 .mr(4)
26121 .nr(2)
26122 .kr(4)
26123 .sr(1)
26124 .m(4)
26125 .n(n)
26126 .k(k)
26127 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026128 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026129 }
26130 }
26131 }
26132
26133 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_subtile) {
26134 for (uint32_t n = 4; n <= 6; n += 2) {
26135 for (size_t k = 1; k <= 20; k += 5) {
26136 for (uint32_t m = 1; m <= 4; m++) {
26137 GemmMicrokernelTester()
26138 .mr(4)
26139 .nr(2)
26140 .kr(4)
26141 .sr(1)
26142 .m(m)
26143 .n(n)
26144 .k(k)
26145 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026146 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026147 }
26148 }
26149 }
26150 }
26151
26152 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cm_subtile) {
26153 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026154 for (uint32_t n = 1; n <= 2; n++) {
26155 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070026156 GemmMicrokernelTester()
26157 .mr(4)
26158 .nr(2)
26159 .kr(4)
26160 .sr(1)
26161 .m(m)
26162 .n(n)
26163 .k(k)
26164 .cm_stride(5)
26165 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026166 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026167 }
26168 }
26169 }
26170 }
26171
26172 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, qmin) {
26173 GemmMicrokernelTester()
26174 .mr(4)
26175 .nr(2)
26176 .kr(4)
26177 .sr(1)
26178 .m(4)
26179 .n(2)
26180 .k(4)
26181 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026182 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026183 }
26184
26185 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, qmax) {
26186 GemmMicrokernelTester()
26187 .mr(4)
26188 .nr(2)
26189 .kr(4)
26190 .sr(1)
26191 .m(4)
26192 .n(2)
26193 .k(4)
26194 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026195 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026196 }
26197
26198 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cm) {
26199 GemmMicrokernelTester()
26200 .mr(4)
26201 .nr(2)
26202 .kr(4)
26203 .sr(1)
26204 .m(4)
26205 .n(2)
26206 .k(4)
26207 .cm_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026208 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026209 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026210#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhane39e6462020-07-09 01:33:36 -070026211
26212
Marat Dukhan4c617792021-12-21 15:47:58 -080026213#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhane39e6462020-07-09 01:33:36 -070026214 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4) {
26215 GemmMicrokernelTester()
26216 .mr(4)
26217 .nr(2)
26218 .kr(4)
26219 .sr(1)
26220 .m(4)
26221 .n(2)
26222 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026223 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026224 }
26225
26226 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cn) {
26227 GemmMicrokernelTester()
26228 .mr(4)
26229 .nr(2)
26230 .kr(4)
26231 .sr(1)
26232 .m(4)
26233 .n(2)
26234 .k(4)
26235 .cn_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026236 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026237 }
26238
26239 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_strided_a) {
26240 GemmMicrokernelTester()
26241 .mr(4)
26242 .nr(2)
26243 .kr(4)
26244 .sr(1)
26245 .m(4)
26246 .n(2)
26247 .k(4)
26248 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026249 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026250 }
26251
26252 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026253 for (uint32_t n = 1; n <= 2; n++) {
26254 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070026255 GemmMicrokernelTester()
26256 .mr(4)
26257 .nr(2)
26258 .kr(4)
26259 .sr(1)
26260 .m(m)
26261 .n(n)
26262 .k(4)
26263 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026264 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026265 }
26266 }
26267 }
26268
26269 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile_m) {
26270 for (uint32_t m = 1; m <= 4; m++) {
26271 GemmMicrokernelTester()
26272 .mr(4)
26273 .nr(2)
26274 .kr(4)
26275 .sr(1)
26276 .m(m)
26277 .n(2)
26278 .k(4)
26279 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026280 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026281 }
26282 }
26283
26284 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile_n) {
26285 for (uint32_t n = 1; n <= 2; n++) {
26286 GemmMicrokernelTester()
26287 .mr(4)
26288 .nr(2)
26289 .kr(4)
26290 .sr(1)
26291 .m(4)
26292 .n(n)
26293 .k(4)
26294 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026295 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026296 }
26297 }
26298
26299 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4) {
26300 for (size_t k = 1; k < 4; k++) {
26301 GemmMicrokernelTester()
26302 .mr(4)
26303 .nr(2)
26304 .kr(4)
26305 .sr(1)
26306 .m(4)
26307 .n(2)
26308 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026309 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026310 }
26311 }
26312
26313 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4_strided_a) {
26314 for (size_t k = 1; k < 4; k++) {
26315 GemmMicrokernelTester()
26316 .mr(4)
26317 .nr(2)
26318 .kr(4)
26319 .sr(1)
26320 .m(4)
26321 .n(2)
26322 .k(k)
26323 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026324 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026325 }
26326 }
26327
26328 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4_subtile) {
26329 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026330 for (uint32_t n = 1; n <= 2; n++) {
26331 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070026332 GemmMicrokernelTester()
26333 .mr(4)
26334 .nr(2)
26335 .kr(4)
26336 .sr(1)
26337 .m(m)
26338 .n(n)
26339 .k(k)
26340 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026341 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026342 }
26343 }
26344 }
26345 }
26346
26347 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4) {
26348 for (size_t k = 5; k < 8; k++) {
26349 GemmMicrokernelTester()
26350 .mr(4)
26351 .nr(2)
26352 .kr(4)
26353 .sr(1)
26354 .m(4)
26355 .n(2)
26356 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026357 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026358 }
26359 }
26360
26361 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4_strided_a) {
26362 for (size_t k = 5; k < 8; k++) {
26363 GemmMicrokernelTester()
26364 .mr(4)
26365 .nr(2)
26366 .kr(4)
26367 .sr(1)
26368 .m(4)
26369 .n(2)
26370 .k(k)
26371 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026372 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026373 }
26374 }
26375
26376 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4_subtile) {
26377 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026378 for (uint32_t n = 1; n <= 2; n++) {
26379 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070026380 GemmMicrokernelTester()
26381 .mr(4)
26382 .nr(2)
26383 .kr(4)
26384 .sr(1)
26385 .m(m)
26386 .n(n)
26387 .k(k)
26388 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026389 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026390 }
26391 }
26392 }
26393 }
26394
26395 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4) {
26396 for (size_t k = 8; k <= 40; k += 4) {
26397 GemmMicrokernelTester()
26398 .mr(4)
26399 .nr(2)
26400 .kr(4)
26401 .sr(1)
26402 .m(4)
26403 .n(2)
26404 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026405 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026406 }
26407 }
26408
26409 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4_strided_a) {
26410 for (size_t k = 8; k <= 40; k += 4) {
26411 GemmMicrokernelTester()
26412 .mr(4)
26413 .nr(2)
26414 .kr(4)
26415 .sr(1)
26416 .m(4)
26417 .n(2)
26418 .k(k)
26419 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026420 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026421 }
26422 }
26423
26424 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4_subtile) {
26425 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026426 for (uint32_t n = 1; n <= 2; n++) {
26427 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070026428 GemmMicrokernelTester()
26429 .mr(4)
26430 .nr(2)
26431 .kr(4)
26432 .sr(1)
26433 .m(m)
26434 .n(n)
26435 .k(k)
26436 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026437 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026438 }
26439 }
26440 }
26441 }
26442
26443 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2) {
26444 for (uint32_t n = 3; n < 4; n++) {
26445 for (size_t k = 1; k <= 20; k += 5) {
26446 GemmMicrokernelTester()
26447 .mr(4)
26448 .nr(2)
26449 .kr(4)
26450 .sr(1)
26451 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026452 .n(n)
Marat Dukhane39e6462020-07-09 01:33:36 -070026453 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026454 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026455 }
26456 }
26457 }
26458
26459 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_strided_cn) {
26460 for (uint32_t n = 3; n < 4; n++) {
26461 for (size_t k = 1; k <= 20; k += 5) {
26462 GemmMicrokernelTester()
26463 .mr(4)
26464 .nr(2)
26465 .kr(4)
26466 .sr(1)
26467 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026468 .n(n)
Marat Dukhane39e6462020-07-09 01:33:36 -070026469 .k(k)
26470 .cn_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026471 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026472 }
26473 }
26474 }
26475
26476 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_strided_a) {
26477 for (uint32_t n = 3; n < 4; n++) {
26478 for (size_t k = 1; k <= 20; k += 5) {
26479 GemmMicrokernelTester()
26480 .mr(4)
26481 .nr(2)
26482 .kr(4)
26483 .sr(1)
26484 .m(4)
26485 .n(n)
26486 .k(k)
26487 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026488 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026489 }
26490 }
26491 }
26492
26493 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_subtile) {
26494 for (uint32_t n = 3; n < 4; n++) {
26495 for (size_t k = 1; k <= 20; k += 5) {
26496 for (uint32_t m = 1; m <= 4; m++) {
26497 GemmMicrokernelTester()
26498 .mr(4)
26499 .nr(2)
26500 .kr(4)
26501 .sr(1)
26502 .m(m)
26503 .n(n)
26504 .k(k)
26505 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026506 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026507 }
26508 }
26509 }
26510 }
26511
26512 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2) {
26513 for (uint32_t n = 4; n <= 6; n += 2) {
26514 for (size_t k = 1; k <= 20; k += 5) {
26515 GemmMicrokernelTester()
26516 .mr(4)
26517 .nr(2)
26518 .kr(4)
26519 .sr(1)
26520 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026521 .n(n)
Marat Dukhane39e6462020-07-09 01:33:36 -070026522 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026523 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026524 }
26525 }
26526 }
26527
26528 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_strided_cn) {
26529 for (uint32_t n = 4; n <= 6; n += 2) {
26530 for (size_t k = 1; k <= 20; k += 5) {
26531 GemmMicrokernelTester()
26532 .mr(4)
26533 .nr(2)
26534 .kr(4)
26535 .sr(1)
26536 .m(4)
26537 .n(n)
26538 .k(k)
26539 .cn_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026540 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026541 }
26542 }
26543 }
26544
26545 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_strided_a) {
26546 for (uint32_t n = 4; n <= 6; n += 2) {
26547 for (size_t k = 1; k <= 20; k += 5) {
26548 GemmMicrokernelTester()
26549 .mr(4)
26550 .nr(2)
26551 .kr(4)
26552 .sr(1)
26553 .m(4)
26554 .n(n)
26555 .k(k)
26556 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026557 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026558 }
26559 }
26560 }
26561
26562 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_subtile) {
26563 for (uint32_t n = 4; n <= 6; n += 2) {
26564 for (size_t k = 1; k <= 20; k += 5) {
26565 for (uint32_t m = 1; m <= 4; m++) {
26566 GemmMicrokernelTester()
26567 .mr(4)
26568 .nr(2)
26569 .kr(4)
26570 .sr(1)
26571 .m(m)
26572 .n(n)
26573 .k(k)
26574 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026575 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026576 }
26577 }
26578 }
26579 }
26580
26581 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cm_subtile) {
26582 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026583 for (uint32_t n = 1; n <= 2; n++) {
26584 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhane39e6462020-07-09 01:33:36 -070026585 GemmMicrokernelTester()
26586 .mr(4)
26587 .nr(2)
26588 .kr(4)
26589 .sr(1)
26590 .m(m)
26591 .n(n)
26592 .k(k)
26593 .cm_stride(5)
26594 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026595 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026596 }
26597 }
26598 }
26599 }
26600
26601 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, qmin) {
26602 GemmMicrokernelTester()
26603 .mr(4)
26604 .nr(2)
26605 .kr(4)
26606 .sr(1)
26607 .m(4)
26608 .n(2)
26609 .k(4)
26610 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026611 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026612 }
26613
26614 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, qmax) {
26615 GemmMicrokernelTester()
26616 .mr(4)
26617 .nr(2)
26618 .kr(4)
26619 .sr(1)
26620 .m(4)
26621 .n(2)
26622 .k(4)
26623 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026624 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026625 }
26626
26627 TEST(F32_GEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cm) {
26628 GemmMicrokernelTester()
26629 .mr(4)
26630 .nr(2)
26631 .kr(4)
26632 .sr(1)
26633 .m(4)
26634 .n(2)
26635 .k(4)
26636 .cm_stride(5)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026637 .Test(xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhane39e6462020-07-09 01:33:36 -070026638 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026639#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhane39e6462020-07-09 01:33:36 -070026640
26641
Marat Dukhan4c617792021-12-21 15:47:58 -080026642#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhande06f492020-04-09 00:19:31 -070026643 TEST(F32_GEMM_MINMAX_2X4__WASM, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026644 GemmMicrokernelTester()
26645 .mr(2)
26646 .nr(4)
26647 .kr(1)
26648 .sr(1)
26649 .m(2)
26650 .n(4)
26651 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026652 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026653 }
26654
Marat Dukhande06f492020-04-09 00:19:31 -070026655 TEST(F32_GEMM_MINMAX_2X4__WASM, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026656 GemmMicrokernelTester()
26657 .mr(2)
26658 .nr(4)
26659 .kr(1)
26660 .sr(1)
26661 .m(2)
26662 .n(4)
26663 .k(1)
26664 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026665 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026666 }
26667
Marat Dukhande06f492020-04-09 00:19:31 -070026668 TEST(F32_GEMM_MINMAX_2X4__WASM, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026669 GemmMicrokernelTester()
26670 .mr(2)
26671 .nr(4)
26672 .kr(1)
26673 .sr(1)
26674 .m(2)
26675 .n(4)
26676 .k(1)
26677 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026678 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026679 }
26680
Marat Dukhande06f492020-04-09 00:19:31 -070026681 TEST(F32_GEMM_MINMAX_2X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026682 for (uint32_t n = 1; n <= 4; n++) {
26683 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026684 GemmMicrokernelTester()
26685 .mr(2)
26686 .nr(4)
26687 .kr(1)
26688 .sr(1)
26689 .m(m)
26690 .n(n)
26691 .k(1)
26692 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026693 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026694 }
26695 }
26696 }
26697
Marat Dukhande06f492020-04-09 00:19:31 -070026698 TEST(F32_GEMM_MINMAX_2X4__WASM, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026699 for (uint32_t m = 1; m <= 2; m++) {
26700 GemmMicrokernelTester()
26701 .mr(2)
26702 .nr(4)
26703 .kr(1)
26704 .sr(1)
26705 .m(m)
26706 .n(4)
26707 .k(1)
26708 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026709 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026710 }
26711 }
26712
Marat Dukhande06f492020-04-09 00:19:31 -070026713 TEST(F32_GEMM_MINMAX_2X4__WASM, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026714 for (uint32_t n = 1; n <= 4; n++) {
26715 GemmMicrokernelTester()
26716 .mr(2)
26717 .nr(4)
26718 .kr(1)
26719 .sr(1)
26720 .m(2)
26721 .n(n)
26722 .k(1)
26723 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026724 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026725 }
26726 }
26727
Marat Dukhande06f492020-04-09 00:19:31 -070026728 TEST(F32_GEMM_MINMAX_2X4__WASM, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026729 for (size_t k = 2; k < 10; k++) {
26730 GemmMicrokernelTester()
26731 .mr(2)
26732 .nr(4)
26733 .kr(1)
26734 .sr(1)
26735 .m(2)
26736 .n(4)
26737 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026738 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026739 }
26740 }
26741
Marat Dukhande06f492020-04-09 00:19:31 -070026742 TEST(F32_GEMM_MINMAX_2X4__WASM, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026743 for (size_t k = 2; k < 10; k++) {
26744 GemmMicrokernelTester()
26745 .mr(2)
26746 .nr(4)
26747 .kr(1)
26748 .sr(1)
26749 .m(2)
26750 .n(4)
26751 .k(k)
26752 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026753 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026754 }
26755 }
26756
Marat Dukhande06f492020-04-09 00:19:31 -070026757 TEST(F32_GEMM_MINMAX_2X4__WASM, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026758 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026759 for (uint32_t n = 1; n <= 4; n++) {
26760 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026761 GemmMicrokernelTester()
26762 .mr(2)
26763 .nr(4)
26764 .kr(1)
26765 .sr(1)
26766 .m(m)
26767 .n(n)
26768 .k(k)
26769 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026770 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026771 }
26772 }
26773 }
26774 }
26775
Marat Dukhande06f492020-04-09 00:19:31 -070026776 TEST(F32_GEMM_MINMAX_2X4__WASM, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026777 for (uint32_t n = 5; n < 8; n++) {
26778 for (size_t k = 1; k <= 5; k += 2) {
26779 GemmMicrokernelTester()
26780 .mr(2)
26781 .nr(4)
26782 .kr(1)
26783 .sr(1)
26784 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026785 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070026786 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026787 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026788 }
26789 }
26790 }
26791
Marat Dukhande06f492020-04-09 00:19:31 -070026792 TEST(F32_GEMM_MINMAX_2X4__WASM, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026793 for (uint32_t n = 5; n < 8; n++) {
26794 for (size_t k = 1; k <= 5; k += 2) {
26795 GemmMicrokernelTester()
26796 .mr(2)
26797 .nr(4)
26798 .kr(1)
26799 .sr(1)
26800 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026801 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070026802 .k(k)
26803 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026804 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026805 }
26806 }
26807 }
26808
Marat Dukhande06f492020-04-09 00:19:31 -070026809 TEST(F32_GEMM_MINMAX_2X4__WASM, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026810 for (uint32_t n = 5; n < 8; n++) {
26811 for (size_t k = 1; k <= 5; k += 2) {
26812 GemmMicrokernelTester()
26813 .mr(2)
26814 .nr(4)
26815 .kr(1)
26816 .sr(1)
26817 .m(2)
26818 .n(n)
26819 .k(k)
26820 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026821 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026822 }
26823 }
26824 }
26825
Marat Dukhande06f492020-04-09 00:19:31 -070026826 TEST(F32_GEMM_MINMAX_2X4__WASM, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026827 for (uint32_t n = 5; n < 8; n++) {
26828 for (size_t k = 1; k <= 5; k += 2) {
26829 for (uint32_t m = 1; m <= 2; m++) {
26830 GemmMicrokernelTester()
26831 .mr(2)
26832 .nr(4)
26833 .kr(1)
26834 .sr(1)
26835 .m(m)
26836 .n(n)
26837 .k(k)
26838 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026839 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026840 }
26841 }
26842 }
26843 }
26844
Marat Dukhande06f492020-04-09 00:19:31 -070026845 TEST(F32_GEMM_MINMAX_2X4__WASM, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026846 for (uint32_t n = 8; n <= 12; n += 4) {
26847 for (size_t k = 1; k <= 5; k += 2) {
26848 GemmMicrokernelTester()
26849 .mr(2)
26850 .nr(4)
26851 .kr(1)
26852 .sr(1)
26853 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026854 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070026855 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026856 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026857 }
26858 }
26859 }
26860
Marat Dukhande06f492020-04-09 00:19:31 -070026861 TEST(F32_GEMM_MINMAX_2X4__WASM, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026862 for (uint32_t n = 8; n <= 12; n += 4) {
26863 for (size_t k = 1; k <= 5; k += 2) {
26864 GemmMicrokernelTester()
26865 .mr(2)
26866 .nr(4)
26867 .kr(1)
26868 .sr(1)
26869 .m(2)
26870 .n(n)
26871 .k(k)
26872 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026873 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026874 }
26875 }
26876 }
26877
Marat Dukhande06f492020-04-09 00:19:31 -070026878 TEST(F32_GEMM_MINMAX_2X4__WASM, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026879 for (uint32_t n = 8; n <= 12; n += 4) {
26880 for (size_t k = 1; k <= 5; k += 2) {
26881 GemmMicrokernelTester()
26882 .mr(2)
26883 .nr(4)
26884 .kr(1)
26885 .sr(1)
26886 .m(2)
26887 .n(n)
26888 .k(k)
26889 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026890 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026891 }
26892 }
26893 }
26894
Marat Dukhande06f492020-04-09 00:19:31 -070026895 TEST(F32_GEMM_MINMAX_2X4__WASM, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026896 for (uint32_t n = 8; n <= 12; n += 4) {
26897 for (size_t k = 1; k <= 5; k += 2) {
26898 for (uint32_t m = 1; m <= 2; m++) {
26899 GemmMicrokernelTester()
26900 .mr(2)
26901 .nr(4)
26902 .kr(1)
26903 .sr(1)
26904 .m(m)
26905 .n(n)
26906 .k(k)
26907 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026908 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026909 }
26910 }
26911 }
26912 }
26913
Marat Dukhande06f492020-04-09 00:19:31 -070026914 TEST(F32_GEMM_MINMAX_2X4__WASM, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026915 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026916 for (uint32_t n = 1; n <= 4; n++) {
26917 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026918 GemmMicrokernelTester()
26919 .mr(2)
26920 .nr(4)
26921 .kr(1)
26922 .sr(1)
26923 .m(m)
26924 .n(n)
26925 .k(k)
26926 .cm_stride(7)
26927 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026928 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026929 }
26930 }
26931 }
26932 }
26933
Marat Dukhande06f492020-04-09 00:19:31 -070026934 TEST(F32_GEMM_MINMAX_2X4__WASM, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026935 GemmMicrokernelTester()
26936 .mr(2)
26937 .nr(4)
26938 .kr(1)
26939 .sr(1)
26940 .m(2)
26941 .n(4)
26942 .k(1)
26943 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026944 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026945 }
26946
Marat Dukhande06f492020-04-09 00:19:31 -070026947 TEST(F32_GEMM_MINMAX_2X4__WASM, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026948 GemmMicrokernelTester()
26949 .mr(2)
26950 .nr(4)
26951 .kr(1)
26952 .sr(1)
26953 .m(2)
26954 .n(4)
26955 .k(1)
26956 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026957 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026958 }
26959
Marat Dukhande06f492020-04-09 00:19:31 -070026960 TEST(F32_GEMM_MINMAX_2X4__WASM, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026961 GemmMicrokernelTester()
26962 .mr(2)
26963 .nr(4)
26964 .kr(1)
26965 .sr(1)
26966 .m(2)
26967 .n(4)
26968 .k(1)
26969 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026970 .Test(xnn_f32_gemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026971 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026972#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1c587112020-04-08 20:04:28 -070026973
26974
Marat Dukhan4c617792021-12-21 15:47:58 -080026975#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhande06f492020-04-09 00:19:31 -070026976 TEST(F32_GEMM_MINMAX_4X4__WASM, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026977 GemmMicrokernelTester()
26978 .mr(4)
26979 .nr(4)
26980 .kr(1)
26981 .sr(1)
26982 .m(4)
26983 .n(4)
26984 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026985 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026986 }
26987
Marat Dukhande06f492020-04-09 00:19:31 -070026988 TEST(F32_GEMM_MINMAX_4X4__WASM, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070026989 GemmMicrokernelTester()
26990 .mr(4)
26991 .nr(4)
26992 .kr(1)
26993 .sr(1)
26994 .m(4)
26995 .n(4)
26996 .k(1)
26997 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070026998 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070026999 }
27000
Marat Dukhande06f492020-04-09 00:19:31 -070027001 TEST(F32_GEMM_MINMAX_4X4__WASM, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027002 GemmMicrokernelTester()
27003 .mr(4)
27004 .nr(4)
27005 .kr(1)
27006 .sr(1)
27007 .m(4)
27008 .n(4)
27009 .k(1)
27010 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027011 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027012 }
27013
Marat Dukhande06f492020-04-09 00:19:31 -070027014 TEST(F32_GEMM_MINMAX_4X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027015 for (uint32_t n = 1; n <= 4; n++) {
27016 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027017 GemmMicrokernelTester()
27018 .mr(4)
27019 .nr(4)
27020 .kr(1)
27021 .sr(1)
27022 .m(m)
27023 .n(n)
27024 .k(1)
27025 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027026 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027027 }
27028 }
27029 }
27030
Marat Dukhande06f492020-04-09 00:19:31 -070027031 TEST(F32_GEMM_MINMAX_4X4__WASM, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027032 for (uint32_t m = 1; m <= 4; m++) {
27033 GemmMicrokernelTester()
27034 .mr(4)
27035 .nr(4)
27036 .kr(1)
27037 .sr(1)
27038 .m(m)
27039 .n(4)
27040 .k(1)
27041 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027042 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027043 }
27044 }
27045
Marat Dukhande06f492020-04-09 00:19:31 -070027046 TEST(F32_GEMM_MINMAX_4X4__WASM, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027047 for (uint32_t n = 1; n <= 4; n++) {
27048 GemmMicrokernelTester()
27049 .mr(4)
27050 .nr(4)
27051 .kr(1)
27052 .sr(1)
27053 .m(4)
27054 .n(n)
27055 .k(1)
27056 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027057 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027058 }
27059 }
27060
Marat Dukhande06f492020-04-09 00:19:31 -070027061 TEST(F32_GEMM_MINMAX_4X4__WASM, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027062 for (size_t k = 2; k < 10; k++) {
27063 GemmMicrokernelTester()
27064 .mr(4)
27065 .nr(4)
27066 .kr(1)
27067 .sr(1)
27068 .m(4)
27069 .n(4)
27070 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027071 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027072 }
27073 }
27074
Marat Dukhande06f492020-04-09 00:19:31 -070027075 TEST(F32_GEMM_MINMAX_4X4__WASM, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027076 for (size_t k = 2; k < 10; k++) {
27077 GemmMicrokernelTester()
27078 .mr(4)
27079 .nr(4)
27080 .kr(1)
27081 .sr(1)
27082 .m(4)
27083 .n(4)
27084 .k(k)
27085 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027086 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027087 }
27088 }
27089
Marat Dukhande06f492020-04-09 00:19:31 -070027090 TEST(F32_GEMM_MINMAX_4X4__WASM, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027091 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027092 for (uint32_t n = 1; n <= 4; n++) {
27093 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027094 GemmMicrokernelTester()
27095 .mr(4)
27096 .nr(4)
27097 .kr(1)
27098 .sr(1)
27099 .m(m)
27100 .n(n)
27101 .k(k)
27102 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027103 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027104 }
27105 }
27106 }
27107 }
27108
Marat Dukhande06f492020-04-09 00:19:31 -070027109 TEST(F32_GEMM_MINMAX_4X4__WASM, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027110 for (uint32_t n = 5; n < 8; n++) {
27111 for (size_t k = 1; k <= 5; k += 2) {
27112 GemmMicrokernelTester()
27113 .mr(4)
27114 .nr(4)
27115 .kr(1)
27116 .sr(1)
27117 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027118 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027119 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027120 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027121 }
27122 }
27123 }
27124
Marat Dukhande06f492020-04-09 00:19:31 -070027125 TEST(F32_GEMM_MINMAX_4X4__WASM, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027126 for (uint32_t n = 5; n < 8; n++) {
27127 for (size_t k = 1; k <= 5; k += 2) {
27128 GemmMicrokernelTester()
27129 .mr(4)
27130 .nr(4)
27131 .kr(1)
27132 .sr(1)
27133 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027134 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027135 .k(k)
27136 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027137 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027138 }
27139 }
27140 }
27141
Marat Dukhande06f492020-04-09 00:19:31 -070027142 TEST(F32_GEMM_MINMAX_4X4__WASM, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027143 for (uint32_t n = 5; n < 8; n++) {
27144 for (size_t k = 1; k <= 5; k += 2) {
27145 GemmMicrokernelTester()
27146 .mr(4)
27147 .nr(4)
27148 .kr(1)
27149 .sr(1)
27150 .m(4)
27151 .n(n)
27152 .k(k)
27153 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027154 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027155 }
27156 }
27157 }
27158
Marat Dukhande06f492020-04-09 00:19:31 -070027159 TEST(F32_GEMM_MINMAX_4X4__WASM, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027160 for (uint32_t n = 5; n < 8; n++) {
27161 for (size_t k = 1; k <= 5; k += 2) {
27162 for (uint32_t m = 1; m <= 4; m++) {
27163 GemmMicrokernelTester()
27164 .mr(4)
27165 .nr(4)
27166 .kr(1)
27167 .sr(1)
27168 .m(m)
27169 .n(n)
27170 .k(k)
27171 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027172 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027173 }
27174 }
27175 }
27176 }
27177
Marat Dukhande06f492020-04-09 00:19:31 -070027178 TEST(F32_GEMM_MINMAX_4X4__WASM, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027179 for (uint32_t n = 8; n <= 12; n += 4) {
27180 for (size_t k = 1; k <= 5; k += 2) {
27181 GemmMicrokernelTester()
27182 .mr(4)
27183 .nr(4)
27184 .kr(1)
27185 .sr(1)
27186 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027187 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027188 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027189 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027190 }
27191 }
27192 }
27193
Marat Dukhande06f492020-04-09 00:19:31 -070027194 TEST(F32_GEMM_MINMAX_4X4__WASM, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027195 for (uint32_t n = 8; n <= 12; n += 4) {
27196 for (size_t k = 1; k <= 5; k += 2) {
27197 GemmMicrokernelTester()
27198 .mr(4)
27199 .nr(4)
27200 .kr(1)
27201 .sr(1)
27202 .m(4)
27203 .n(n)
27204 .k(k)
27205 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027206 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027207 }
27208 }
27209 }
27210
Marat Dukhande06f492020-04-09 00:19:31 -070027211 TEST(F32_GEMM_MINMAX_4X4__WASM, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027212 for (uint32_t n = 8; n <= 12; n += 4) {
27213 for (size_t k = 1; k <= 5; k += 2) {
27214 GemmMicrokernelTester()
27215 .mr(4)
27216 .nr(4)
27217 .kr(1)
27218 .sr(1)
27219 .m(4)
27220 .n(n)
27221 .k(k)
27222 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027223 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027224 }
27225 }
27226 }
27227
Marat Dukhande06f492020-04-09 00:19:31 -070027228 TEST(F32_GEMM_MINMAX_4X4__WASM, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027229 for (uint32_t n = 8; n <= 12; n += 4) {
27230 for (size_t k = 1; k <= 5; k += 2) {
27231 for (uint32_t m = 1; m <= 4; m++) {
27232 GemmMicrokernelTester()
27233 .mr(4)
27234 .nr(4)
27235 .kr(1)
27236 .sr(1)
27237 .m(m)
27238 .n(n)
27239 .k(k)
27240 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027241 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027242 }
27243 }
27244 }
27245 }
27246
Marat Dukhande06f492020-04-09 00:19:31 -070027247 TEST(F32_GEMM_MINMAX_4X4__WASM, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027248 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027249 for (uint32_t n = 1; n <= 4; n++) {
27250 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027251 GemmMicrokernelTester()
27252 .mr(4)
27253 .nr(4)
27254 .kr(1)
27255 .sr(1)
27256 .m(m)
27257 .n(n)
27258 .k(k)
27259 .cm_stride(7)
27260 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027261 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027262 }
27263 }
27264 }
27265 }
27266
Marat Dukhande06f492020-04-09 00:19:31 -070027267 TEST(F32_GEMM_MINMAX_4X4__WASM, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027268 GemmMicrokernelTester()
27269 .mr(4)
27270 .nr(4)
27271 .kr(1)
27272 .sr(1)
27273 .m(4)
27274 .n(4)
27275 .k(1)
27276 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027277 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027278 }
27279
Marat Dukhande06f492020-04-09 00:19:31 -070027280 TEST(F32_GEMM_MINMAX_4X4__WASM, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027281 GemmMicrokernelTester()
27282 .mr(4)
27283 .nr(4)
27284 .kr(1)
27285 .sr(1)
27286 .m(4)
27287 .n(4)
27288 .k(1)
27289 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027290 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027291 }
27292
Marat Dukhande06f492020-04-09 00:19:31 -070027293 TEST(F32_GEMM_MINMAX_4X4__WASM, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027294 GemmMicrokernelTester()
27295 .mr(4)
27296 .nr(4)
27297 .kr(1)
27298 .sr(1)
27299 .m(4)
27300 .n(4)
27301 .k(1)
27302 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027303 .Test(xnn_f32_gemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027304 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027305#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1c587112020-04-08 20:04:28 -070027306
27307
Marat Dukhan4c617792021-12-21 15:47:58 -080027308#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhande06f492020-04-09 00:19:31 -070027309 TEST(F32_GEMM_MINMAX_4X2__WASM, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027310 GemmMicrokernelTester()
27311 .mr(4)
27312 .nr(2)
27313 .kr(1)
27314 .sr(1)
27315 .m(4)
27316 .n(2)
27317 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027318 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027319 }
27320
Marat Dukhande06f492020-04-09 00:19:31 -070027321 TEST(F32_GEMM_MINMAX_4X2__WASM, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027322 GemmMicrokernelTester()
27323 .mr(4)
27324 .nr(2)
27325 .kr(1)
27326 .sr(1)
27327 .m(4)
27328 .n(2)
27329 .k(1)
27330 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027331 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027332 }
27333
Marat Dukhande06f492020-04-09 00:19:31 -070027334 TEST(F32_GEMM_MINMAX_4X2__WASM, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027335 GemmMicrokernelTester()
27336 .mr(4)
27337 .nr(2)
27338 .kr(1)
27339 .sr(1)
27340 .m(4)
27341 .n(2)
27342 .k(1)
27343 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027344 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027345 }
27346
Marat Dukhande06f492020-04-09 00:19:31 -070027347 TEST(F32_GEMM_MINMAX_4X2__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027348 for (uint32_t n = 1; n <= 2; n++) {
27349 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027350 GemmMicrokernelTester()
27351 .mr(4)
27352 .nr(2)
27353 .kr(1)
27354 .sr(1)
27355 .m(m)
27356 .n(n)
27357 .k(1)
27358 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027359 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027360 }
27361 }
27362 }
27363
Marat Dukhande06f492020-04-09 00:19:31 -070027364 TEST(F32_GEMM_MINMAX_4X2__WASM, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027365 for (uint32_t m = 1; m <= 4; m++) {
27366 GemmMicrokernelTester()
27367 .mr(4)
27368 .nr(2)
27369 .kr(1)
27370 .sr(1)
27371 .m(m)
27372 .n(2)
27373 .k(1)
27374 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027375 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027376 }
27377 }
27378
Marat Dukhande06f492020-04-09 00:19:31 -070027379 TEST(F32_GEMM_MINMAX_4X2__WASM, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027380 for (uint32_t n = 1; n <= 2; n++) {
27381 GemmMicrokernelTester()
27382 .mr(4)
27383 .nr(2)
27384 .kr(1)
27385 .sr(1)
27386 .m(4)
27387 .n(n)
27388 .k(1)
27389 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027390 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027391 }
27392 }
27393
Marat Dukhande06f492020-04-09 00:19:31 -070027394 TEST(F32_GEMM_MINMAX_4X2__WASM, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027395 for (size_t k = 2; k < 10; k++) {
27396 GemmMicrokernelTester()
27397 .mr(4)
27398 .nr(2)
27399 .kr(1)
27400 .sr(1)
27401 .m(4)
27402 .n(2)
27403 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027404 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027405 }
27406 }
27407
Marat Dukhande06f492020-04-09 00:19:31 -070027408 TEST(F32_GEMM_MINMAX_4X2__WASM, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027409 for (size_t k = 2; k < 10; k++) {
27410 GemmMicrokernelTester()
27411 .mr(4)
27412 .nr(2)
27413 .kr(1)
27414 .sr(1)
27415 .m(4)
27416 .n(2)
27417 .k(k)
27418 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027419 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027420 }
27421 }
27422
Marat Dukhande06f492020-04-09 00:19:31 -070027423 TEST(F32_GEMM_MINMAX_4X2__WASM, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027424 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027425 for (uint32_t n = 1; n <= 2; n++) {
27426 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027427 GemmMicrokernelTester()
27428 .mr(4)
27429 .nr(2)
27430 .kr(1)
27431 .sr(1)
27432 .m(m)
27433 .n(n)
27434 .k(k)
27435 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027436 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027437 }
27438 }
27439 }
27440 }
27441
Marat Dukhande06f492020-04-09 00:19:31 -070027442 TEST(F32_GEMM_MINMAX_4X2__WASM, n_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027443 for (uint32_t n = 3; n < 4; n++) {
27444 for (size_t k = 1; k <= 5; k += 2) {
27445 GemmMicrokernelTester()
27446 .mr(4)
27447 .nr(2)
27448 .kr(1)
27449 .sr(1)
27450 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027451 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027452 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027453 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027454 }
27455 }
27456 }
27457
Marat Dukhande06f492020-04-09 00:19:31 -070027458 TEST(F32_GEMM_MINMAX_4X2__WASM, n_gt_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027459 for (uint32_t n = 3; n < 4; n++) {
27460 for (size_t k = 1; k <= 5; k += 2) {
27461 GemmMicrokernelTester()
27462 .mr(4)
27463 .nr(2)
27464 .kr(1)
27465 .sr(1)
27466 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027467 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027468 .k(k)
27469 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027470 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027471 }
27472 }
27473 }
27474
Marat Dukhande06f492020-04-09 00:19:31 -070027475 TEST(F32_GEMM_MINMAX_4X2__WASM, n_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027476 for (uint32_t n = 3; n < 4; n++) {
27477 for (size_t k = 1; k <= 5; k += 2) {
27478 GemmMicrokernelTester()
27479 .mr(4)
27480 .nr(2)
27481 .kr(1)
27482 .sr(1)
27483 .m(4)
27484 .n(n)
27485 .k(k)
27486 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027487 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027488 }
27489 }
27490 }
27491
Marat Dukhande06f492020-04-09 00:19:31 -070027492 TEST(F32_GEMM_MINMAX_4X2__WASM, n_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027493 for (uint32_t n = 3; n < 4; n++) {
27494 for (size_t k = 1; k <= 5; k += 2) {
27495 for (uint32_t m = 1; m <= 4; m++) {
27496 GemmMicrokernelTester()
27497 .mr(4)
27498 .nr(2)
27499 .kr(1)
27500 .sr(1)
27501 .m(m)
27502 .n(n)
27503 .k(k)
27504 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027505 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027506 }
27507 }
27508 }
27509 }
27510
Marat Dukhande06f492020-04-09 00:19:31 -070027511 TEST(F32_GEMM_MINMAX_4X2__WASM, n_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027512 for (uint32_t n = 4; n <= 6; n += 2) {
27513 for (size_t k = 1; k <= 5; k += 2) {
27514 GemmMicrokernelTester()
27515 .mr(4)
27516 .nr(2)
27517 .kr(1)
27518 .sr(1)
27519 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027520 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027521 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027522 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027523 }
27524 }
27525 }
27526
Marat Dukhande06f492020-04-09 00:19:31 -070027527 TEST(F32_GEMM_MINMAX_4X2__WASM, n_div_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027528 for (uint32_t n = 4; n <= 6; n += 2) {
27529 for (size_t k = 1; k <= 5; k += 2) {
27530 GemmMicrokernelTester()
27531 .mr(4)
27532 .nr(2)
27533 .kr(1)
27534 .sr(1)
27535 .m(4)
27536 .n(n)
27537 .k(k)
27538 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027539 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027540 }
27541 }
27542 }
27543
Marat Dukhande06f492020-04-09 00:19:31 -070027544 TEST(F32_GEMM_MINMAX_4X2__WASM, n_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027545 for (uint32_t n = 4; n <= 6; n += 2) {
27546 for (size_t k = 1; k <= 5; k += 2) {
27547 GemmMicrokernelTester()
27548 .mr(4)
27549 .nr(2)
27550 .kr(1)
27551 .sr(1)
27552 .m(4)
27553 .n(n)
27554 .k(k)
27555 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027556 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027557 }
27558 }
27559 }
27560
Marat Dukhande06f492020-04-09 00:19:31 -070027561 TEST(F32_GEMM_MINMAX_4X2__WASM, n_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027562 for (uint32_t n = 4; n <= 6; n += 2) {
27563 for (size_t k = 1; k <= 5; k += 2) {
27564 for (uint32_t m = 1; m <= 4; m++) {
27565 GemmMicrokernelTester()
27566 .mr(4)
27567 .nr(2)
27568 .kr(1)
27569 .sr(1)
27570 .m(m)
27571 .n(n)
27572 .k(k)
27573 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027574 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027575 }
27576 }
27577 }
27578 }
27579
Marat Dukhande06f492020-04-09 00:19:31 -070027580 TEST(F32_GEMM_MINMAX_4X2__WASM, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027581 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027582 for (uint32_t n = 1; n <= 2; n++) {
27583 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027584 GemmMicrokernelTester()
27585 .mr(4)
27586 .nr(2)
27587 .kr(1)
27588 .sr(1)
27589 .m(m)
27590 .n(n)
27591 .k(k)
27592 .cm_stride(5)
27593 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027594 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027595 }
27596 }
27597 }
27598 }
27599
Marat Dukhande06f492020-04-09 00:19:31 -070027600 TEST(F32_GEMM_MINMAX_4X2__WASM, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027601 GemmMicrokernelTester()
27602 .mr(4)
27603 .nr(2)
27604 .kr(1)
27605 .sr(1)
27606 .m(4)
27607 .n(2)
27608 .k(1)
27609 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027610 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027611 }
27612
Marat Dukhande06f492020-04-09 00:19:31 -070027613 TEST(F32_GEMM_MINMAX_4X2__WASM, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027614 GemmMicrokernelTester()
27615 .mr(4)
27616 .nr(2)
27617 .kr(1)
27618 .sr(1)
27619 .m(4)
27620 .n(2)
27621 .k(1)
27622 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027623 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027624 }
27625
Marat Dukhande06f492020-04-09 00:19:31 -070027626 TEST(F32_GEMM_MINMAX_4X2__WASM, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027627 GemmMicrokernelTester()
27628 .mr(4)
27629 .nr(2)
27630 .kr(1)
27631 .sr(1)
27632 .m(4)
27633 .n(2)
27634 .k(1)
27635 .cm_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027636 .Test(xnn_f32_gemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027637 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027638#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1c587112020-04-08 20:04:28 -070027639
27640
Marat Dukhande06f492020-04-09 00:19:31 -070027641TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027642 GemmMicrokernelTester()
27643 .mr(1)
27644 .nr(4)
27645 .kr(1)
27646 .sr(1)
27647 .m(1)
27648 .n(4)
27649 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027650 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027651}
27652
Marat Dukhande06f492020-04-09 00:19:31 -070027653TEST(F32_GEMM_MINMAX_1X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027654 GemmMicrokernelTester()
27655 .mr(1)
27656 .nr(4)
27657 .kr(1)
27658 .sr(1)
27659 .m(1)
27660 .n(4)
27661 .k(1)
27662 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027663 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027664}
27665
Marat Dukhande06f492020-04-09 00:19:31 -070027666TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027667 GemmMicrokernelTester()
27668 .mr(1)
27669 .nr(4)
27670 .kr(1)
27671 .sr(1)
27672 .m(1)
27673 .n(4)
27674 .k(1)
27675 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027676 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027677}
27678
Marat Dukhande06f492020-04-09 00:19:31 -070027679TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027680 for (uint32_t n = 1; n <= 4; n++) {
27681 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027682 GemmMicrokernelTester()
27683 .mr(1)
27684 .nr(4)
27685 .kr(1)
27686 .sr(1)
27687 .m(m)
27688 .n(n)
27689 .k(1)
27690 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027691 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027692 }
27693 }
27694}
27695
Marat Dukhande06f492020-04-09 00:19:31 -070027696TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027697 for (uint32_t m = 1; m <= 1; m++) {
27698 GemmMicrokernelTester()
27699 .mr(1)
27700 .nr(4)
27701 .kr(1)
27702 .sr(1)
27703 .m(m)
27704 .n(4)
27705 .k(1)
27706 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027707 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027708 }
27709}
27710
Marat Dukhande06f492020-04-09 00:19:31 -070027711TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027712 for (uint32_t n = 1; n <= 4; n++) {
27713 GemmMicrokernelTester()
27714 .mr(1)
27715 .nr(4)
27716 .kr(1)
27717 .sr(1)
27718 .m(1)
27719 .n(n)
27720 .k(1)
27721 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027722 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027723 }
27724}
27725
Marat Dukhande06f492020-04-09 00:19:31 -070027726TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027727 for (size_t k = 2; k < 10; k++) {
27728 GemmMicrokernelTester()
27729 .mr(1)
27730 .nr(4)
27731 .kr(1)
27732 .sr(1)
27733 .m(1)
27734 .n(4)
27735 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027736 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027737 }
27738}
27739
Marat Dukhande06f492020-04-09 00:19:31 -070027740TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027741 for (size_t k = 2; k < 10; k++) {
27742 GemmMicrokernelTester()
27743 .mr(1)
27744 .nr(4)
27745 .kr(1)
27746 .sr(1)
27747 .m(1)
27748 .n(4)
27749 .k(k)
27750 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027751 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027752 }
27753}
27754
Marat Dukhande06f492020-04-09 00:19:31 -070027755TEST(F32_GEMM_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027756 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027757 for (uint32_t n = 1; n <= 4; n++) {
27758 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027759 GemmMicrokernelTester()
27760 .mr(1)
27761 .nr(4)
27762 .kr(1)
27763 .sr(1)
27764 .m(m)
27765 .n(n)
27766 .k(k)
27767 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027768 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027769 }
27770 }
27771 }
27772}
27773
Marat Dukhande06f492020-04-09 00:19:31 -070027774TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027775 for (uint32_t n = 5; n < 8; n++) {
27776 for (size_t k = 1; k <= 5; k += 2) {
27777 GemmMicrokernelTester()
27778 .mr(1)
27779 .nr(4)
27780 .kr(1)
27781 .sr(1)
27782 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027783 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027784 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027785 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027786 }
27787 }
27788}
27789
Marat Dukhande06f492020-04-09 00:19:31 -070027790TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027791 for (uint32_t n = 5; n < 8; n++) {
27792 for (size_t k = 1; k <= 5; k += 2) {
27793 GemmMicrokernelTester()
27794 .mr(1)
27795 .nr(4)
27796 .kr(1)
27797 .sr(1)
27798 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027799 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027800 .k(k)
27801 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027802 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027803 }
27804 }
27805}
27806
Marat Dukhande06f492020-04-09 00:19:31 -070027807TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027808 for (uint32_t n = 5; n < 8; n++) {
27809 for (size_t k = 1; k <= 5; k += 2) {
27810 GemmMicrokernelTester()
27811 .mr(1)
27812 .nr(4)
27813 .kr(1)
27814 .sr(1)
27815 .m(1)
27816 .n(n)
27817 .k(k)
27818 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027819 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027820 }
27821 }
27822}
27823
Marat Dukhande06f492020-04-09 00:19:31 -070027824TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027825 for (uint32_t n = 5; n < 8; n++) {
27826 for (size_t k = 1; k <= 5; k += 2) {
27827 for (uint32_t m = 1; m <= 1; m++) {
27828 GemmMicrokernelTester()
27829 .mr(1)
27830 .nr(4)
27831 .kr(1)
27832 .sr(1)
27833 .m(m)
27834 .n(n)
27835 .k(k)
27836 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027837 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027838 }
27839 }
27840 }
27841}
27842
Marat Dukhande06f492020-04-09 00:19:31 -070027843TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027844 for (uint32_t n = 8; n <= 12; n += 4) {
27845 for (size_t k = 1; k <= 5; k += 2) {
27846 GemmMicrokernelTester()
27847 .mr(1)
27848 .nr(4)
27849 .kr(1)
27850 .sr(1)
27851 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027852 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070027853 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027854 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027855 }
27856 }
27857}
27858
Marat Dukhande06f492020-04-09 00:19:31 -070027859TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027860 for (uint32_t n = 8; n <= 12; n += 4) {
27861 for (size_t k = 1; k <= 5; k += 2) {
27862 GemmMicrokernelTester()
27863 .mr(1)
27864 .nr(4)
27865 .kr(1)
27866 .sr(1)
27867 .m(1)
27868 .n(n)
27869 .k(k)
27870 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027871 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027872 }
27873 }
27874}
27875
Marat Dukhande06f492020-04-09 00:19:31 -070027876TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027877 for (uint32_t n = 8; n <= 12; n += 4) {
27878 for (size_t k = 1; k <= 5; k += 2) {
27879 GemmMicrokernelTester()
27880 .mr(1)
27881 .nr(4)
27882 .kr(1)
27883 .sr(1)
27884 .m(1)
27885 .n(n)
27886 .k(k)
27887 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027888 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027889 }
27890 }
27891}
27892
Marat Dukhande06f492020-04-09 00:19:31 -070027893TEST(F32_GEMM_MINMAX_1X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027894 for (uint32_t n = 8; n <= 12; n += 4) {
27895 for (size_t k = 1; k <= 5; k += 2) {
27896 for (uint32_t m = 1; m <= 1; m++) {
27897 GemmMicrokernelTester()
27898 .mr(1)
27899 .nr(4)
27900 .kr(1)
27901 .sr(1)
27902 .m(m)
27903 .n(n)
27904 .k(k)
27905 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027906 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027907 }
27908 }
27909 }
27910}
27911
Marat Dukhande06f492020-04-09 00:19:31 -070027912TEST(F32_GEMM_MINMAX_1X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027913 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027914 for (uint32_t n = 1; n <= 4; n++) {
27915 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027916 GemmMicrokernelTester()
27917 .mr(1)
27918 .nr(4)
27919 .kr(1)
27920 .sr(1)
27921 .m(m)
27922 .n(n)
27923 .k(k)
27924 .cm_stride(7)
27925 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027926 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027927 }
27928 }
27929 }
27930}
27931
Marat Dukhande06f492020-04-09 00:19:31 -070027932TEST(F32_GEMM_MINMAX_1X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027933 GemmMicrokernelTester()
27934 .mr(1)
27935 .nr(4)
27936 .kr(1)
27937 .sr(1)
27938 .m(1)
27939 .n(4)
27940 .k(1)
27941 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027942 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027943}
27944
Marat Dukhande06f492020-04-09 00:19:31 -070027945TEST(F32_GEMM_MINMAX_1X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027946 GemmMicrokernelTester()
27947 .mr(1)
27948 .nr(4)
27949 .kr(1)
27950 .sr(1)
27951 .m(1)
27952 .n(4)
27953 .k(1)
27954 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027955 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027956}
27957
Marat Dukhande06f492020-04-09 00:19:31 -070027958TEST(F32_GEMM_MINMAX_1X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027959 GemmMicrokernelTester()
27960 .mr(1)
27961 .nr(4)
27962 .kr(1)
27963 .sr(1)
27964 .m(1)
27965 .n(4)
27966 .k(1)
27967 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027968 .Test(xnn_f32_gemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027969}
27970
27971
Marat Dukhande06f492020-04-09 00:19:31 -070027972TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027973 GemmMicrokernelTester()
27974 .mr(4)
27975 .nr(4)
27976 .kr(1)
27977 .sr(1)
27978 .m(4)
27979 .n(4)
27980 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027981 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027982}
27983
Marat Dukhande06f492020-04-09 00:19:31 -070027984TEST(F32_GEMM_MINMAX_4X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027985 GemmMicrokernelTester()
27986 .mr(4)
27987 .nr(4)
27988 .kr(1)
27989 .sr(1)
27990 .m(4)
27991 .n(4)
27992 .k(1)
27993 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070027994 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070027995}
27996
Marat Dukhande06f492020-04-09 00:19:31 -070027997TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027998 GemmMicrokernelTester()
27999 .mr(4)
28000 .nr(4)
28001 .kr(1)
28002 .sr(1)
28003 .m(4)
28004 .n(4)
28005 .k(1)
28006 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028007 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028008}
28009
Marat Dukhande06f492020-04-09 00:19:31 -070028010TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028011 for (uint32_t n = 1; n <= 4; n++) {
28012 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028013 GemmMicrokernelTester()
28014 .mr(4)
28015 .nr(4)
28016 .kr(1)
28017 .sr(1)
28018 .m(m)
28019 .n(n)
28020 .k(1)
28021 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028022 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028023 }
28024 }
28025}
28026
Marat Dukhande06f492020-04-09 00:19:31 -070028027TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028028 for (uint32_t m = 1; m <= 4; m++) {
28029 GemmMicrokernelTester()
28030 .mr(4)
28031 .nr(4)
28032 .kr(1)
28033 .sr(1)
28034 .m(m)
28035 .n(4)
28036 .k(1)
28037 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028038 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028039 }
28040}
28041
Marat Dukhande06f492020-04-09 00:19:31 -070028042TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028043 for (uint32_t n = 1; n <= 4; n++) {
28044 GemmMicrokernelTester()
28045 .mr(4)
28046 .nr(4)
28047 .kr(1)
28048 .sr(1)
28049 .m(4)
28050 .n(n)
28051 .k(1)
28052 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028053 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028054 }
28055}
28056
Marat Dukhande06f492020-04-09 00:19:31 -070028057TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028058 for (size_t k = 2; k < 10; k++) {
28059 GemmMicrokernelTester()
28060 .mr(4)
28061 .nr(4)
28062 .kr(1)
28063 .sr(1)
28064 .m(4)
28065 .n(4)
28066 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028067 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028068 }
28069}
28070
Marat Dukhande06f492020-04-09 00:19:31 -070028071TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028072 for (size_t k = 2; k < 10; k++) {
28073 GemmMicrokernelTester()
28074 .mr(4)
28075 .nr(4)
28076 .kr(1)
28077 .sr(1)
28078 .m(4)
28079 .n(4)
28080 .k(k)
28081 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028082 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028083 }
28084}
28085
Marat Dukhande06f492020-04-09 00:19:31 -070028086TEST(F32_GEMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028087 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028088 for (uint32_t n = 1; n <= 4; n++) {
28089 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028090 GemmMicrokernelTester()
28091 .mr(4)
28092 .nr(4)
28093 .kr(1)
28094 .sr(1)
28095 .m(m)
28096 .n(n)
28097 .k(k)
28098 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028099 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028100 }
28101 }
28102 }
28103}
28104
Marat Dukhande06f492020-04-09 00:19:31 -070028105TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028106 for (uint32_t n = 5; n < 8; n++) {
28107 for (size_t k = 1; k <= 5; k += 2) {
28108 GemmMicrokernelTester()
28109 .mr(4)
28110 .nr(4)
28111 .kr(1)
28112 .sr(1)
28113 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028114 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028115 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028116 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028117 }
28118 }
28119}
28120
Marat Dukhande06f492020-04-09 00:19:31 -070028121TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028122 for (uint32_t n = 5; n < 8; n++) {
28123 for (size_t k = 1; k <= 5; k += 2) {
28124 GemmMicrokernelTester()
28125 .mr(4)
28126 .nr(4)
28127 .kr(1)
28128 .sr(1)
28129 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028130 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028131 .k(k)
28132 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028133 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028134 }
28135 }
28136}
28137
Marat Dukhande06f492020-04-09 00:19:31 -070028138TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028139 for (uint32_t n = 5; n < 8; n++) {
28140 for (size_t k = 1; k <= 5; k += 2) {
28141 GemmMicrokernelTester()
28142 .mr(4)
28143 .nr(4)
28144 .kr(1)
28145 .sr(1)
28146 .m(4)
28147 .n(n)
28148 .k(k)
28149 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028150 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028151 }
28152 }
28153}
28154
Marat Dukhande06f492020-04-09 00:19:31 -070028155TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028156 for (uint32_t n = 5; n < 8; n++) {
28157 for (size_t k = 1; k <= 5; k += 2) {
28158 for (uint32_t m = 1; m <= 4; m++) {
28159 GemmMicrokernelTester()
28160 .mr(4)
28161 .nr(4)
28162 .kr(1)
28163 .sr(1)
28164 .m(m)
28165 .n(n)
28166 .k(k)
28167 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028168 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028169 }
28170 }
28171 }
28172}
28173
Marat Dukhande06f492020-04-09 00:19:31 -070028174TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028175 for (uint32_t n = 8; n <= 12; n += 4) {
28176 for (size_t k = 1; k <= 5; k += 2) {
28177 GemmMicrokernelTester()
28178 .mr(4)
28179 .nr(4)
28180 .kr(1)
28181 .sr(1)
28182 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028183 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028184 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028185 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028186 }
28187 }
28188}
28189
Marat Dukhande06f492020-04-09 00:19:31 -070028190TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028191 for (uint32_t n = 8; n <= 12; n += 4) {
28192 for (size_t k = 1; k <= 5; k += 2) {
28193 GemmMicrokernelTester()
28194 .mr(4)
28195 .nr(4)
28196 .kr(1)
28197 .sr(1)
28198 .m(4)
28199 .n(n)
28200 .k(k)
28201 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028202 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028203 }
28204 }
28205}
28206
Marat Dukhande06f492020-04-09 00:19:31 -070028207TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028208 for (uint32_t n = 8; n <= 12; n += 4) {
28209 for (size_t k = 1; k <= 5; k += 2) {
28210 GemmMicrokernelTester()
28211 .mr(4)
28212 .nr(4)
28213 .kr(1)
28214 .sr(1)
28215 .m(4)
28216 .n(n)
28217 .k(k)
28218 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028219 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028220 }
28221 }
28222}
28223
Marat Dukhande06f492020-04-09 00:19:31 -070028224TEST(F32_GEMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028225 for (uint32_t n = 8; n <= 12; n += 4) {
28226 for (size_t k = 1; k <= 5; k += 2) {
28227 for (uint32_t m = 1; m <= 4; m++) {
28228 GemmMicrokernelTester()
28229 .mr(4)
28230 .nr(4)
28231 .kr(1)
28232 .sr(1)
28233 .m(m)
28234 .n(n)
28235 .k(k)
28236 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028237 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028238 }
28239 }
28240 }
28241}
28242
Marat Dukhande06f492020-04-09 00:19:31 -070028243TEST(F32_GEMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028244 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028245 for (uint32_t n = 1; n <= 4; n++) {
28246 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028247 GemmMicrokernelTester()
28248 .mr(4)
28249 .nr(4)
28250 .kr(1)
28251 .sr(1)
28252 .m(m)
28253 .n(n)
28254 .k(k)
28255 .cm_stride(7)
28256 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028257 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028258 }
28259 }
28260 }
28261}
28262
Marat Dukhande06f492020-04-09 00:19:31 -070028263TEST(F32_GEMM_MINMAX_4X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028264 GemmMicrokernelTester()
28265 .mr(4)
28266 .nr(4)
28267 .kr(1)
28268 .sr(1)
28269 .m(4)
28270 .n(4)
28271 .k(1)
28272 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028273 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028274}
28275
Marat Dukhande06f492020-04-09 00:19:31 -070028276TEST(F32_GEMM_MINMAX_4X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028277 GemmMicrokernelTester()
28278 .mr(4)
28279 .nr(4)
28280 .kr(1)
28281 .sr(1)
28282 .m(4)
28283 .n(4)
28284 .k(1)
28285 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028286 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028287}
28288
Marat Dukhande06f492020-04-09 00:19:31 -070028289TEST(F32_GEMM_MINMAX_4X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028290 GemmMicrokernelTester()
28291 .mr(4)
28292 .nr(4)
28293 .kr(1)
28294 .sr(1)
28295 .m(4)
28296 .n(4)
28297 .k(1)
28298 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028299 .Test(xnn_f32_gemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028300}
28301
28302
Marat Dukhande06f492020-04-09 00:19:31 -070028303TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028304 GemmMicrokernelTester()
28305 .mr(4)
28306 .nr(2)
28307 .kr(1)
28308 .sr(1)
28309 .m(4)
28310 .n(2)
28311 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028312 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028313}
28314
Marat Dukhande06f492020-04-09 00:19:31 -070028315TEST(F32_GEMM_MINMAX_4X2__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028316 GemmMicrokernelTester()
28317 .mr(4)
28318 .nr(2)
28319 .kr(1)
28320 .sr(1)
28321 .m(4)
28322 .n(2)
28323 .k(1)
28324 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028325 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028326}
28327
Marat Dukhande06f492020-04-09 00:19:31 -070028328TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028329 GemmMicrokernelTester()
28330 .mr(4)
28331 .nr(2)
28332 .kr(1)
28333 .sr(1)
28334 .m(4)
28335 .n(2)
28336 .k(1)
28337 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028338 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028339}
28340
Marat Dukhande06f492020-04-09 00:19:31 -070028341TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028342 for (uint32_t n = 1; n <= 2; n++) {
28343 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028344 GemmMicrokernelTester()
28345 .mr(4)
28346 .nr(2)
28347 .kr(1)
28348 .sr(1)
28349 .m(m)
28350 .n(n)
28351 .k(1)
28352 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028353 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028354 }
28355 }
28356}
28357
Marat Dukhande06f492020-04-09 00:19:31 -070028358TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028359 for (uint32_t m = 1; m <= 4; m++) {
28360 GemmMicrokernelTester()
28361 .mr(4)
28362 .nr(2)
28363 .kr(1)
28364 .sr(1)
28365 .m(m)
28366 .n(2)
28367 .k(1)
28368 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028369 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028370 }
28371}
28372
Marat Dukhande06f492020-04-09 00:19:31 -070028373TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028374 for (uint32_t n = 1; n <= 2; n++) {
28375 GemmMicrokernelTester()
28376 .mr(4)
28377 .nr(2)
28378 .kr(1)
28379 .sr(1)
28380 .m(4)
28381 .n(n)
28382 .k(1)
28383 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028384 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028385 }
28386}
28387
Marat Dukhande06f492020-04-09 00:19:31 -070028388TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028389 for (size_t k = 2; k < 10; k++) {
28390 GemmMicrokernelTester()
28391 .mr(4)
28392 .nr(2)
28393 .kr(1)
28394 .sr(1)
28395 .m(4)
28396 .n(2)
28397 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028398 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028399 }
28400}
28401
Marat Dukhande06f492020-04-09 00:19:31 -070028402TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028403 for (size_t k = 2; k < 10; k++) {
28404 GemmMicrokernelTester()
28405 .mr(4)
28406 .nr(2)
28407 .kr(1)
28408 .sr(1)
28409 .m(4)
28410 .n(2)
28411 .k(k)
28412 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028413 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028414 }
28415}
28416
Marat Dukhande06f492020-04-09 00:19:31 -070028417TEST(F32_GEMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028418 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028419 for (uint32_t n = 1; n <= 2; n++) {
28420 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028421 GemmMicrokernelTester()
28422 .mr(4)
28423 .nr(2)
28424 .kr(1)
28425 .sr(1)
28426 .m(m)
28427 .n(n)
28428 .k(k)
28429 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028430 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028431 }
28432 }
28433 }
28434}
28435
Marat Dukhande06f492020-04-09 00:19:31 -070028436TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028437 for (uint32_t n = 3; n < 4; n++) {
28438 for (size_t k = 1; k <= 5; k += 2) {
28439 GemmMicrokernelTester()
28440 .mr(4)
28441 .nr(2)
28442 .kr(1)
28443 .sr(1)
28444 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028445 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028446 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028447 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028448 }
28449 }
28450}
28451
Marat Dukhande06f492020-04-09 00:19:31 -070028452TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028453 for (uint32_t n = 3; n < 4; n++) {
28454 for (size_t k = 1; k <= 5; k += 2) {
28455 GemmMicrokernelTester()
28456 .mr(4)
28457 .nr(2)
28458 .kr(1)
28459 .sr(1)
28460 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028461 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028462 .k(k)
28463 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028464 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028465 }
28466 }
28467}
28468
Marat Dukhande06f492020-04-09 00:19:31 -070028469TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028470 for (uint32_t n = 3; n < 4; n++) {
28471 for (size_t k = 1; k <= 5; k += 2) {
28472 GemmMicrokernelTester()
28473 .mr(4)
28474 .nr(2)
28475 .kr(1)
28476 .sr(1)
28477 .m(4)
28478 .n(n)
28479 .k(k)
28480 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028481 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028482 }
28483 }
28484}
28485
Marat Dukhande06f492020-04-09 00:19:31 -070028486TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028487 for (uint32_t n = 3; n < 4; n++) {
28488 for (size_t k = 1; k <= 5; k += 2) {
28489 for (uint32_t m = 1; m <= 4; m++) {
28490 GemmMicrokernelTester()
28491 .mr(4)
28492 .nr(2)
28493 .kr(1)
28494 .sr(1)
28495 .m(m)
28496 .n(n)
28497 .k(k)
28498 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028499 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028500 }
28501 }
28502 }
28503}
28504
Marat Dukhande06f492020-04-09 00:19:31 -070028505TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028506 for (uint32_t n = 4; n <= 6; n += 2) {
28507 for (size_t k = 1; k <= 5; k += 2) {
28508 GemmMicrokernelTester()
28509 .mr(4)
28510 .nr(2)
28511 .kr(1)
28512 .sr(1)
28513 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028514 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028515 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028516 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028517 }
28518 }
28519}
28520
Marat Dukhande06f492020-04-09 00:19:31 -070028521TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028522 for (uint32_t n = 4; n <= 6; n += 2) {
28523 for (size_t k = 1; k <= 5; k += 2) {
28524 GemmMicrokernelTester()
28525 .mr(4)
28526 .nr(2)
28527 .kr(1)
28528 .sr(1)
28529 .m(4)
28530 .n(n)
28531 .k(k)
28532 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028533 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028534 }
28535 }
28536}
28537
Marat Dukhande06f492020-04-09 00:19:31 -070028538TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028539 for (uint32_t n = 4; n <= 6; n += 2) {
28540 for (size_t k = 1; k <= 5; k += 2) {
28541 GemmMicrokernelTester()
28542 .mr(4)
28543 .nr(2)
28544 .kr(1)
28545 .sr(1)
28546 .m(4)
28547 .n(n)
28548 .k(k)
28549 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028550 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028551 }
28552 }
28553}
28554
Marat Dukhande06f492020-04-09 00:19:31 -070028555TEST(F32_GEMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028556 for (uint32_t n = 4; n <= 6; n += 2) {
28557 for (size_t k = 1; k <= 5; k += 2) {
28558 for (uint32_t m = 1; m <= 4; m++) {
28559 GemmMicrokernelTester()
28560 .mr(4)
28561 .nr(2)
28562 .kr(1)
28563 .sr(1)
28564 .m(m)
28565 .n(n)
28566 .k(k)
28567 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028568 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028569 }
28570 }
28571 }
28572}
28573
Marat Dukhande06f492020-04-09 00:19:31 -070028574TEST(F32_GEMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028575 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028576 for (uint32_t n = 1; n <= 2; n++) {
28577 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028578 GemmMicrokernelTester()
28579 .mr(4)
28580 .nr(2)
28581 .kr(1)
28582 .sr(1)
28583 .m(m)
28584 .n(n)
28585 .k(k)
28586 .cm_stride(5)
28587 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028588 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028589 }
28590 }
28591 }
28592}
28593
Marat Dukhande06f492020-04-09 00:19:31 -070028594TEST(F32_GEMM_MINMAX_4X2__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028595 GemmMicrokernelTester()
28596 .mr(4)
28597 .nr(2)
28598 .kr(1)
28599 .sr(1)
28600 .m(4)
28601 .n(2)
28602 .k(1)
28603 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028604 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028605}
28606
Marat Dukhande06f492020-04-09 00:19:31 -070028607TEST(F32_GEMM_MINMAX_4X2__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028608 GemmMicrokernelTester()
28609 .mr(4)
28610 .nr(2)
28611 .kr(1)
28612 .sr(1)
28613 .m(4)
28614 .n(2)
28615 .k(1)
28616 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028617 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028618}
28619
Marat Dukhande06f492020-04-09 00:19:31 -070028620TEST(F32_GEMM_MINMAX_4X2__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028621 GemmMicrokernelTester()
28622 .mr(4)
28623 .nr(2)
28624 .kr(1)
28625 .sr(1)
28626 .m(4)
28627 .n(2)
28628 .k(1)
28629 .cm_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028630 .Test(xnn_f32_gemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028631}
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028632
28633
28634#if XNN_ARCH_ARM && XNN_PLATFORM_JIT
28635 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4) {
28636 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028637 GemmMicrokernelTester()
28638 .mr(4)
28639 .nr(8)
28640 .kr(1)
28641 .sr(1)
28642 .m(4)
28643 .n(8)
28644 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028645 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028646 }
28647
28648 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cn) {
28649 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028650 GemmMicrokernelTester()
28651 .mr(4)
28652 .nr(8)
28653 .kr(1)
28654 .sr(1)
28655 .m(4)
28656 .n(8)
28657 .k(4)
28658 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028659 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028660 }
28661
28662 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_strided_a) {
28663 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028664 GemmMicrokernelTester()
28665 .mr(4)
28666 .nr(8)
28667 .kr(1)
28668 .sr(1)
28669 .m(4)
28670 .n(8)
28671 .k(4)
28672 .a_stride(7)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028673 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028674 }
28675
28676 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile) {
28677 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028678 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028679 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028680 GemmMicrokernelTester()
28681 .mr(4)
28682 .nr(8)
28683 .kr(1)
28684 .sr(1)
28685 .m(m)
28686 .n(n)
28687 .k(4)
28688 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028689 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028690 }
28691 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028692 }
28693
28694 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_m) {
28695 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028696 for (uint32_t m = 1; m <= 4; m++) {
28697 GemmMicrokernelTester()
28698 .mr(4)
28699 .nr(8)
28700 .kr(1)
28701 .sr(1)
28702 .m(m)
28703 .n(8)
28704 .k(4)
28705 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028706 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028707 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028708 }
28709
28710 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_n) {
28711 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028712 for (uint32_t n = 1; n <= 8; n++) {
28713 GemmMicrokernelTester()
28714 .mr(4)
28715 .nr(8)
28716 .kr(1)
28717 .sr(1)
28718 .m(4)
28719 .n(n)
28720 .k(4)
28721 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028722 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028723 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028724 }
28725
28726 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8) {
28727 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028728 GemmMicrokernelTester()
28729 .mr(4)
28730 .nr(8)
28731 .kr(1)
28732 .sr(1)
28733 .m(4)
28734 .n(8)
28735 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028736 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028737 }
28738
28739 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_strided_a) {
28740 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028741 GemmMicrokernelTester()
28742 .mr(4)
28743 .nr(8)
28744 .kr(1)
28745 .sr(1)
28746 .m(4)
28747 .n(8)
28748 .k(8)
28749 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028750 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028751 }
28752
28753 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_subtile) {
28754 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028755 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028756 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028757 GemmMicrokernelTester()
28758 .mr(4)
28759 .nr(8)
28760 .kr(1)
28761 .sr(1)
28762 .m(m)
28763 .n(n)
28764 .k(8)
28765 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028766 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028767 }
28768 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028769 }
28770
28771 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8) {
28772 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028773 for (size_t k = 1; k < 8; k++) {
28774 GemmMicrokernelTester()
28775 .mr(4)
28776 .nr(8)
28777 .kr(1)
28778 .sr(1)
28779 .m(4)
28780 .n(8)
28781 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028782 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028783 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028784 }
28785
28786 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_strided_a) {
28787 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028788 for (size_t k = 1; k < 8; k++) {
28789 GemmMicrokernelTester()
28790 .mr(4)
28791 .nr(8)
28792 .kr(1)
28793 .sr(1)
28794 .m(4)
28795 .n(8)
28796 .k(k)
28797 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028798 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028799 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028800 }
28801
28802 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_subtile) {
28803 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028804 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028805 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028806 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028807 GemmMicrokernelTester()
28808 .mr(4)
28809 .nr(8)
28810 .kr(1)
28811 .sr(1)
28812 .m(m)
28813 .n(n)
28814 .k(k)
28815 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028816 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028817 }
28818 }
28819 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028820 }
28821
28822 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8) {
28823 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028824 for (size_t k = 9; k < 16; k++) {
28825 GemmMicrokernelTester()
28826 .mr(4)
28827 .nr(8)
28828 .kr(1)
28829 .sr(1)
28830 .m(4)
28831 .n(8)
28832 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028833 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028834 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028835 }
28836
28837 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_strided_a) {
28838 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028839 for (size_t k = 9; k < 16; k++) {
28840 GemmMicrokernelTester()
28841 .mr(4)
28842 .nr(8)
28843 .kr(1)
28844 .sr(1)
28845 .m(4)
28846 .n(8)
28847 .k(k)
28848 .a_stride(19)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028849 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028850 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028851 }
28852
28853 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_subtile) {
28854 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028855 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028856 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028857 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028858 GemmMicrokernelTester()
28859 .mr(4)
28860 .nr(8)
28861 .kr(1)
28862 .sr(1)
28863 .m(m)
28864 .n(n)
28865 .k(k)
28866 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028867 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028868 }
28869 }
28870 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028871 }
28872
28873 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4) {
28874 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028875 for (size_t k = 12; k <= 40; k += 4) {
28876 GemmMicrokernelTester()
28877 .mr(4)
28878 .nr(8)
28879 .kr(1)
28880 .sr(1)
28881 .m(4)
28882 .n(8)
28883 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028884 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028885 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028886 }
28887
28888 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_strided_a) {
28889 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028890 for (size_t k = 12; k <= 40; k += 4) {
28891 GemmMicrokernelTester()
28892 .mr(4)
28893 .nr(8)
28894 .kr(1)
28895 .sr(1)
28896 .m(4)
28897 .n(8)
28898 .k(k)
28899 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028900 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028901 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028902 }
28903
28904 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_subtile) {
28905 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028906 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028907 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028908 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028909 GemmMicrokernelTester()
28910 .mr(4)
28911 .nr(8)
28912 .kr(1)
28913 .sr(1)
28914 .m(m)
28915 .n(n)
28916 .k(k)
28917 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028918 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028919 }
28920 }
28921 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028922 }
28923
28924 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8) {
28925 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028926 for (uint32_t n = 9; n < 16; n++) {
28927 for (size_t k = 1; k <= 20; k += 5) {
28928 GemmMicrokernelTester()
28929 .mr(4)
28930 .nr(8)
28931 .kr(1)
28932 .sr(1)
28933 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028934 .n(n)
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028935 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028936 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028937 }
28938 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028939 }
28940
28941 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_cn) {
28942 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028943 for (uint32_t n = 9; n < 16; n++) {
28944 for (size_t k = 1; k <= 20; k += 5) {
28945 GemmMicrokernelTester()
28946 .mr(4)
28947 .nr(8)
28948 .kr(1)
28949 .sr(1)
28950 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028951 .n(n)
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028952 .k(k)
28953 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028954 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028955 }
28956 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028957 }
28958
28959 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_a) {
28960 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028961 for (uint32_t n = 9; n < 16; n++) {
28962 for (size_t k = 1; k <= 20; k += 5) {
28963 GemmMicrokernelTester()
28964 .mr(4)
28965 .nr(8)
28966 .kr(1)
28967 .sr(1)
28968 .m(4)
28969 .n(n)
28970 .k(k)
28971 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028972 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028973 }
28974 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028975 }
28976
28977 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_subtile) {
28978 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028979 for (uint32_t n = 9; n < 16; n++) {
28980 for (size_t k = 1; k <= 20; k += 5) {
28981 for (uint32_t m = 1; m <= 4; m++) {
28982 GemmMicrokernelTester()
28983 .mr(4)
28984 .nr(8)
28985 .kr(1)
28986 .sr(1)
28987 .m(m)
28988 .n(n)
28989 .k(k)
28990 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080028991 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028992 }
28993 }
28994 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028995 }
28996
28997 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8) {
28998 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080028999 for (uint32_t n = 16; n <= 24; n += 8) {
29000 for (size_t k = 1; k <= 20; k += 5) {
29001 GemmMicrokernelTester()
29002 .mr(4)
29003 .nr(8)
29004 .kr(1)
29005 .sr(1)
29006 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029007 .n(n)
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029008 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029009 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029010 }
29011 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029012 }
29013
29014 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_cn) {
29015 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029016 for (uint32_t n = 16; n <= 24; n += 8) {
29017 for (size_t k = 1; k <= 20; k += 5) {
29018 GemmMicrokernelTester()
29019 .mr(4)
29020 .nr(8)
29021 .kr(1)
29022 .sr(1)
29023 .m(4)
29024 .n(n)
29025 .k(k)
29026 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029027 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029028 }
29029 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029030 }
29031
29032 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_a) {
29033 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029034 for (uint32_t n = 16; n <= 24; n += 8) {
29035 for (size_t k = 1; k <= 20; k += 5) {
29036 GemmMicrokernelTester()
29037 .mr(4)
29038 .nr(8)
29039 .kr(1)
29040 .sr(1)
29041 .m(4)
29042 .n(n)
29043 .k(k)
29044 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029045 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029046 }
29047 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029048 }
29049
29050 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_subtile) {
29051 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029052 for (uint32_t n = 16; n <= 24; n += 8) {
29053 for (size_t k = 1; k <= 20; k += 5) {
29054 for (uint32_t m = 1; m <= 4; m++) {
29055 GemmMicrokernelTester()
29056 .mr(4)
29057 .nr(8)
29058 .kr(1)
29059 .sr(1)
29060 .m(m)
29061 .n(n)
29062 .k(k)
29063 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029064 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029065 }
29066 }
29067 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029068 }
29069
29070 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cm_subtile) {
29071 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029072 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029073 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029074 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029075 GemmMicrokernelTester()
29076 .mr(4)
29077 .nr(8)
29078 .kr(1)
29079 .sr(1)
29080 .m(m)
29081 .n(n)
29082 .k(k)
29083 .cm_stride(11)
29084 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029085 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029086 }
29087 }
29088 }
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029089 }
29090
29091 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, qmin) {
29092 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029093 GemmMicrokernelTester()
29094 .mr(4)
29095 .nr(8)
29096 .kr(1)
29097 .sr(1)
29098 .m(4)
29099 .n(8)
29100 .k(4)
29101 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029102 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029103 }
29104
29105 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, qmax) {
29106 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029107 GemmMicrokernelTester()
29108 .mr(4)
29109 .nr(8)
29110 .kr(1)
29111 .sr(1)
29112 .m(4)
29113 .n(8)
29114 .k(4)
29115 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029116 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029117 }
29118
29119 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cm) {
29120 TEST_REQUIRES_ARM_NEON;
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029121 GemmMicrokernelTester()
29122 .mr(4)
29123 .nr(8)
29124 .kr(1)
29125 .sr(1)
29126 .m(4)
29127 .n(8)
29128 .k(4)
29129 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029130 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ngb43b47a2021-12-23 16:27:22 -080029131 }
29132#endif // XNN_ARCH_ARM && XNN_PLATFORM_JIT
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029133
29134
29135#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029136 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4) {
29137 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029138 GemmMicrokernelTester()
29139 .mr(4)
29140 .nr(8)
29141 .kr(1)
29142 .sr(1)
29143 .m(4)
29144 .n(8)
29145 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029146 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029147 }
29148
29149 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cn) {
29150 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029151 GemmMicrokernelTester()
29152 .mr(4)
29153 .nr(8)
29154 .kr(1)
29155 .sr(1)
29156 .m(4)
29157 .n(8)
29158 .k(4)
29159 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029160 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029161 }
29162
29163 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_strided_a) {
29164 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029165 GemmMicrokernelTester()
29166 .mr(4)
29167 .nr(8)
29168 .kr(1)
29169 .sr(1)
29170 .m(4)
29171 .n(8)
29172 .k(4)
29173 .a_stride(7)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029174 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029175 }
29176
29177 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile) {
29178 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029179 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029180 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029181 GemmMicrokernelTester()
29182 .mr(4)
29183 .nr(8)
29184 .kr(1)
29185 .sr(1)
29186 .m(m)
29187 .n(n)
29188 .k(4)
29189 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029190 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029191 }
29192 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029193 }
29194
29195 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_m) {
29196 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029197 for (uint32_t m = 1; m <= 4; m++) {
29198 GemmMicrokernelTester()
29199 .mr(4)
29200 .nr(8)
29201 .kr(1)
29202 .sr(1)
29203 .m(m)
29204 .n(8)
29205 .k(4)
29206 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029207 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029208 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029209 }
29210
29211 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_n) {
29212 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029213 for (uint32_t n = 1; n <= 8; n++) {
29214 GemmMicrokernelTester()
29215 .mr(4)
29216 .nr(8)
29217 .kr(1)
29218 .sr(1)
29219 .m(4)
29220 .n(n)
29221 .k(4)
29222 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029223 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029224 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029225 }
29226
29227 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8) {
29228 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029229 GemmMicrokernelTester()
29230 .mr(4)
29231 .nr(8)
29232 .kr(1)
29233 .sr(1)
29234 .m(4)
29235 .n(8)
29236 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029237 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029238 }
29239
29240 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_strided_a) {
29241 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029242 GemmMicrokernelTester()
29243 .mr(4)
29244 .nr(8)
29245 .kr(1)
29246 .sr(1)
29247 .m(4)
29248 .n(8)
29249 .k(8)
29250 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029251 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029252 }
29253
29254 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_subtile) {
29255 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029256 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029257 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029258 GemmMicrokernelTester()
29259 .mr(4)
29260 .nr(8)
29261 .kr(1)
29262 .sr(1)
29263 .m(m)
29264 .n(n)
29265 .k(8)
29266 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029267 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029268 }
29269 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029270 }
29271
29272 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8) {
29273 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029274 for (size_t k = 1; k < 8; k++) {
29275 GemmMicrokernelTester()
29276 .mr(4)
29277 .nr(8)
29278 .kr(1)
29279 .sr(1)
29280 .m(4)
29281 .n(8)
29282 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029283 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029284 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029285 }
29286
29287 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_strided_a) {
29288 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029289 for (size_t k = 1; k < 8; k++) {
29290 GemmMicrokernelTester()
29291 .mr(4)
29292 .nr(8)
29293 .kr(1)
29294 .sr(1)
29295 .m(4)
29296 .n(8)
29297 .k(k)
29298 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029299 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029300 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029301 }
29302
29303 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_subtile) {
29304 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029305 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029306 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029307 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029308 GemmMicrokernelTester()
29309 .mr(4)
29310 .nr(8)
29311 .kr(1)
29312 .sr(1)
29313 .m(m)
29314 .n(n)
29315 .k(k)
29316 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029317 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029318 }
29319 }
29320 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029321 }
29322
29323 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8) {
29324 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029325 for (size_t k = 9; k < 16; k++) {
29326 GemmMicrokernelTester()
29327 .mr(4)
29328 .nr(8)
29329 .kr(1)
29330 .sr(1)
29331 .m(4)
29332 .n(8)
29333 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029334 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029335 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029336 }
29337
29338 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_strided_a) {
29339 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029340 for (size_t k = 9; k < 16; k++) {
29341 GemmMicrokernelTester()
29342 .mr(4)
29343 .nr(8)
29344 .kr(1)
29345 .sr(1)
29346 .m(4)
29347 .n(8)
29348 .k(k)
29349 .a_stride(19)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029350 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029351 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029352 }
29353
29354 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_subtile) {
29355 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029356 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029357 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029358 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029359 GemmMicrokernelTester()
29360 .mr(4)
29361 .nr(8)
29362 .kr(1)
29363 .sr(1)
29364 .m(m)
29365 .n(n)
29366 .k(k)
29367 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029368 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029369 }
29370 }
29371 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029372 }
29373
29374 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4) {
29375 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029376 for (size_t k = 12; k <= 40; k += 4) {
29377 GemmMicrokernelTester()
29378 .mr(4)
29379 .nr(8)
29380 .kr(1)
29381 .sr(1)
29382 .m(4)
29383 .n(8)
29384 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029385 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029386 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029387 }
29388
29389 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_strided_a) {
29390 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029391 for (size_t k = 12; k <= 40; k += 4) {
29392 GemmMicrokernelTester()
29393 .mr(4)
29394 .nr(8)
29395 .kr(1)
29396 .sr(1)
29397 .m(4)
29398 .n(8)
29399 .k(k)
29400 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029401 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029402 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029403 }
29404
29405 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_subtile) {
29406 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029407 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029408 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029409 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029410 GemmMicrokernelTester()
29411 .mr(4)
29412 .nr(8)
29413 .kr(1)
29414 .sr(1)
29415 .m(m)
29416 .n(n)
29417 .k(k)
29418 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029419 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029420 }
29421 }
29422 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029423 }
29424
29425 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8) {
29426 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029427 for (uint32_t n = 9; n < 16; n++) {
29428 for (size_t k = 1; k <= 20; k += 5) {
29429 GemmMicrokernelTester()
29430 .mr(4)
29431 .nr(8)
29432 .kr(1)
29433 .sr(1)
29434 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029435 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029436 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029437 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029438 }
29439 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029440 }
29441
29442 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_cn) {
29443 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029444 for (uint32_t n = 9; n < 16; n++) {
29445 for (size_t k = 1; k <= 20; k += 5) {
29446 GemmMicrokernelTester()
29447 .mr(4)
29448 .nr(8)
29449 .kr(1)
29450 .sr(1)
29451 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029452 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029453 .k(k)
29454 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029455 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029456 }
29457 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029458 }
29459
29460 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_a) {
29461 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029462 for (uint32_t n = 9; n < 16; n++) {
29463 for (size_t k = 1; k <= 20; k += 5) {
29464 GemmMicrokernelTester()
29465 .mr(4)
29466 .nr(8)
29467 .kr(1)
29468 .sr(1)
29469 .m(4)
29470 .n(n)
29471 .k(k)
29472 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029473 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029474 }
29475 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029476 }
29477
29478 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_subtile) {
29479 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029480 for (uint32_t n = 9; n < 16; n++) {
29481 for (size_t k = 1; k <= 20; k += 5) {
29482 for (uint32_t m = 1; m <= 4; m++) {
29483 GemmMicrokernelTester()
29484 .mr(4)
29485 .nr(8)
29486 .kr(1)
29487 .sr(1)
29488 .m(m)
29489 .n(n)
29490 .k(k)
29491 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029492 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029493 }
29494 }
29495 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029496 }
29497
29498 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8) {
29499 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029500 for (uint32_t n = 16; n <= 24; n += 8) {
29501 for (size_t k = 1; k <= 20; k += 5) {
29502 GemmMicrokernelTester()
29503 .mr(4)
29504 .nr(8)
29505 .kr(1)
29506 .sr(1)
29507 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029508 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029509 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029510 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029511 }
29512 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029513 }
29514
29515 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_cn) {
29516 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029517 for (uint32_t n = 16; n <= 24; n += 8) {
29518 for (size_t k = 1; k <= 20; k += 5) {
29519 GemmMicrokernelTester()
29520 .mr(4)
29521 .nr(8)
29522 .kr(1)
29523 .sr(1)
29524 .m(4)
29525 .n(n)
29526 .k(k)
29527 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029528 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029529 }
29530 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029531 }
29532
29533 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_a) {
29534 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029535 for (uint32_t n = 16; n <= 24; n += 8) {
29536 for (size_t k = 1; k <= 20; k += 5) {
29537 GemmMicrokernelTester()
29538 .mr(4)
29539 .nr(8)
29540 .kr(1)
29541 .sr(1)
29542 .m(4)
29543 .n(n)
29544 .k(k)
29545 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029546 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029547 }
29548 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029549 }
29550
29551 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_subtile) {
29552 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029553 for (uint32_t n = 16; n <= 24; n += 8) {
29554 for (size_t k = 1; k <= 20; k += 5) {
29555 for (uint32_t m = 1; m <= 4; m++) {
29556 GemmMicrokernelTester()
29557 .mr(4)
29558 .nr(8)
29559 .kr(1)
29560 .sr(1)
29561 .m(m)
29562 .n(n)
29563 .k(k)
29564 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029565 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029566 }
29567 }
29568 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029569 }
29570
29571 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cm_subtile) {
29572 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029573 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029574 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029575 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029576 GemmMicrokernelTester()
29577 .mr(4)
29578 .nr(8)
29579 .kr(1)
29580 .sr(1)
29581 .m(m)
29582 .n(n)
29583 .k(k)
29584 .cm_stride(11)
29585 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029586 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029587 }
29588 }
29589 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029590 }
29591
29592 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, qmin) {
29593 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029594 GemmMicrokernelTester()
29595 .mr(4)
29596 .nr(8)
29597 .kr(1)
29598 .sr(1)
29599 .m(4)
29600 .n(8)
29601 .k(4)
29602 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029603 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029604 }
29605
29606 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, qmax) {
29607 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029608 GemmMicrokernelTester()
29609 .mr(4)
29610 .nr(8)
29611 .kr(1)
29612 .sr(1)
29613 .m(4)
29614 .n(8)
29615 .k(4)
29616 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029617 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029618 }
29619
29620 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cm) {
29621 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029622 GemmMicrokernelTester()
29623 .mr(4)
29624 .nr(8)
29625 .kr(1)
29626 .sr(1)
29627 .m(4)
29628 .n(8)
29629 .k(4)
29630 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029631 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029632 }
29633#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
29634
29635
29636#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
29637 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4) {
29638 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029639 GemmMicrokernelTester()
29640 .mr(4)
29641 .nr(8)
29642 .kr(1)
29643 .sr(1)
29644 .m(4)
29645 .n(8)
29646 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029647 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029648 }
29649
29650 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cn) {
29651 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029652 GemmMicrokernelTester()
29653 .mr(4)
29654 .nr(8)
29655 .kr(1)
29656 .sr(1)
29657 .m(4)
29658 .n(8)
29659 .k(4)
29660 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029661 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029662 }
29663
29664 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_strided_a) {
29665 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029666 GemmMicrokernelTester()
29667 .mr(4)
29668 .nr(8)
29669 .kr(1)
29670 .sr(1)
29671 .m(4)
29672 .n(8)
29673 .k(4)
29674 .a_stride(7)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029675 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029676 }
29677
29678 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile) {
29679 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029680 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029681 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029682 GemmMicrokernelTester()
29683 .mr(4)
29684 .nr(8)
29685 .kr(1)
29686 .sr(1)
29687 .m(m)
29688 .n(n)
29689 .k(4)
29690 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029691 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029692 }
29693 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029694 }
29695
29696 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_m) {
29697 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029698 for (uint32_t m = 1; m <= 4; m++) {
29699 GemmMicrokernelTester()
29700 .mr(4)
29701 .nr(8)
29702 .kr(1)
29703 .sr(1)
29704 .m(m)
29705 .n(8)
29706 .k(4)
29707 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029708 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029709 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029710 }
29711
29712 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_n) {
29713 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029714 for (uint32_t n = 1; n <= 8; n++) {
29715 GemmMicrokernelTester()
29716 .mr(4)
29717 .nr(8)
29718 .kr(1)
29719 .sr(1)
29720 .m(4)
29721 .n(n)
29722 .k(4)
29723 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029724 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029725 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029726 }
29727
29728 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8) {
29729 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029730 GemmMicrokernelTester()
29731 .mr(4)
29732 .nr(8)
29733 .kr(1)
29734 .sr(1)
29735 .m(4)
29736 .n(8)
29737 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029738 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029739 }
29740
29741 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8_strided_a) {
29742 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029743 GemmMicrokernelTester()
29744 .mr(4)
29745 .nr(8)
29746 .kr(1)
29747 .sr(1)
29748 .m(4)
29749 .n(8)
29750 .k(8)
29751 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029752 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029753 }
29754
29755 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8_subtile) {
29756 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029757 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029758 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029759 GemmMicrokernelTester()
29760 .mr(4)
29761 .nr(8)
29762 .kr(1)
29763 .sr(1)
29764 .m(m)
29765 .n(n)
29766 .k(8)
29767 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029768 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029769 }
29770 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029771 }
29772
29773 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8) {
29774 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029775 for (size_t k = 1; k < 8; k++) {
29776 GemmMicrokernelTester()
29777 .mr(4)
29778 .nr(8)
29779 .kr(1)
29780 .sr(1)
29781 .m(4)
29782 .n(8)
29783 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029784 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029785 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029786 }
29787
29788 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8_strided_a) {
29789 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029790 for (size_t k = 1; k < 8; k++) {
29791 GemmMicrokernelTester()
29792 .mr(4)
29793 .nr(8)
29794 .kr(1)
29795 .sr(1)
29796 .m(4)
29797 .n(8)
29798 .k(k)
29799 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029800 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029801 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029802 }
29803
29804 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8_subtile) {
29805 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029806 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029807 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029808 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029809 GemmMicrokernelTester()
29810 .mr(4)
29811 .nr(8)
29812 .kr(1)
29813 .sr(1)
29814 .m(m)
29815 .n(n)
29816 .k(k)
29817 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029818 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029819 }
29820 }
29821 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029822 }
29823
29824 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8) {
29825 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029826 for (size_t k = 9; k < 16; k++) {
29827 GemmMicrokernelTester()
29828 .mr(4)
29829 .nr(8)
29830 .kr(1)
29831 .sr(1)
29832 .m(4)
29833 .n(8)
29834 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029835 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029836 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029837 }
29838
29839 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8_strided_a) {
29840 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029841 for (size_t k = 9; k < 16; k++) {
29842 GemmMicrokernelTester()
29843 .mr(4)
29844 .nr(8)
29845 .kr(1)
29846 .sr(1)
29847 .m(4)
29848 .n(8)
29849 .k(k)
29850 .a_stride(19)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029851 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029852 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029853 }
29854
29855 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8_subtile) {
29856 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029857 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029858 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029859 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029860 GemmMicrokernelTester()
29861 .mr(4)
29862 .nr(8)
29863 .kr(1)
29864 .sr(1)
29865 .m(m)
29866 .n(n)
29867 .k(k)
29868 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029869 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029870 }
29871 }
29872 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029873 }
29874
29875 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4) {
29876 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029877 for (size_t k = 12; k <= 40; k += 4) {
29878 GemmMicrokernelTester()
29879 .mr(4)
29880 .nr(8)
29881 .kr(1)
29882 .sr(1)
29883 .m(4)
29884 .n(8)
29885 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029886 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029887 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029888 }
29889
29890 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4_strided_a) {
29891 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029892 for (size_t k = 12; k <= 40; k += 4) {
29893 GemmMicrokernelTester()
29894 .mr(4)
29895 .nr(8)
29896 .kr(1)
29897 .sr(1)
29898 .m(4)
29899 .n(8)
29900 .k(k)
29901 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029902 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029903 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029904 }
29905
29906 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4_subtile) {
29907 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029908 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029909 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029910 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029911 GemmMicrokernelTester()
29912 .mr(4)
29913 .nr(8)
29914 .kr(1)
29915 .sr(1)
29916 .m(m)
29917 .n(n)
29918 .k(k)
29919 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029920 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029921 }
29922 }
29923 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029924 }
29925
29926 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8) {
29927 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029928 for (uint32_t n = 9; n < 16; n++) {
29929 for (size_t k = 1; k <= 20; k += 5) {
29930 GemmMicrokernelTester()
29931 .mr(4)
29932 .nr(8)
29933 .kr(1)
29934 .sr(1)
29935 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029936 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029937 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029938 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029939 }
29940 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029941 }
29942
29943 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
29944 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029945 for (uint32_t n = 9; n < 16; n++) {
29946 for (size_t k = 1; k <= 20; k += 5) {
29947 GemmMicrokernelTester()
29948 .mr(4)
29949 .nr(8)
29950 .kr(1)
29951 .sr(1)
29952 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029953 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029954 .k(k)
29955 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029956 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029957 }
29958 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029959 }
29960
29961 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_strided_a) {
29962 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029963 for (uint32_t n = 9; n < 16; n++) {
29964 for (size_t k = 1; k <= 20; k += 5) {
29965 GemmMicrokernelTester()
29966 .mr(4)
29967 .nr(8)
29968 .kr(1)
29969 .sr(1)
29970 .m(4)
29971 .n(n)
29972 .k(k)
29973 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029974 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029975 }
29976 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029977 }
29978
29979 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_subtile) {
29980 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029981 for (uint32_t n = 9; n < 16; n++) {
29982 for (size_t k = 1; k <= 20; k += 5) {
29983 for (uint32_t m = 1; m <= 4; m++) {
29984 GemmMicrokernelTester()
29985 .mr(4)
29986 .nr(8)
29987 .kr(1)
29988 .sr(1)
29989 .m(m)
29990 .n(n)
29991 .k(k)
29992 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080029993 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029994 }
29995 }
29996 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080029997 }
29998
29999 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8) {
30000 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030001 for (uint32_t n = 16; n <= 24; n += 8) {
30002 for (size_t k = 1; k <= 20; k += 5) {
30003 GemmMicrokernelTester()
30004 .mr(4)
30005 .nr(8)
30006 .kr(1)
30007 .sr(1)
30008 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030009 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030010 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030011 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030012 }
30013 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030014 }
30015
30016 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_strided_cn) {
30017 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030018 for (uint32_t n = 16; n <= 24; n += 8) {
30019 for (size_t k = 1; k <= 20; k += 5) {
30020 GemmMicrokernelTester()
30021 .mr(4)
30022 .nr(8)
30023 .kr(1)
30024 .sr(1)
30025 .m(4)
30026 .n(n)
30027 .k(k)
30028 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030029 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030030 }
30031 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030032 }
30033
30034 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_strided_a) {
30035 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030036 for (uint32_t n = 16; n <= 24; n += 8) {
30037 for (size_t k = 1; k <= 20; k += 5) {
30038 GemmMicrokernelTester()
30039 .mr(4)
30040 .nr(8)
30041 .kr(1)
30042 .sr(1)
30043 .m(4)
30044 .n(n)
30045 .k(k)
30046 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030047 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030048 }
30049 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030050 }
30051
30052 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_subtile) {
30053 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030054 for (uint32_t n = 16; n <= 24; n += 8) {
30055 for (size_t k = 1; k <= 20; k += 5) {
30056 for (uint32_t m = 1; m <= 4; m++) {
30057 GemmMicrokernelTester()
30058 .mr(4)
30059 .nr(8)
30060 .kr(1)
30061 .sr(1)
30062 .m(m)
30063 .n(n)
30064 .k(k)
30065 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030066 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030067 }
30068 }
30069 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030070 }
30071
30072 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm_subtile) {
30073 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030074 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030075 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030076 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030077 GemmMicrokernelTester()
30078 .mr(4)
30079 .nr(8)
30080 .kr(1)
30081 .sr(1)
30082 .m(m)
30083 .n(n)
30084 .k(k)
30085 .cm_stride(11)
30086 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030087 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030088 }
30089 }
30090 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030091 }
30092
30093 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmin) {
30094 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030095 GemmMicrokernelTester()
30096 .mr(4)
30097 .nr(8)
30098 .kr(1)
30099 .sr(1)
30100 .m(4)
30101 .n(8)
30102 .k(4)
30103 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030104 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030105 }
30106
30107 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmax) {
30108 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030109 GemmMicrokernelTester()
30110 .mr(4)
30111 .nr(8)
30112 .kr(1)
30113 .sr(1)
30114 .m(4)
30115 .n(8)
30116 .k(4)
30117 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030118 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030119 }
30120
30121 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm) {
30122 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030123 GemmMicrokernelTester()
30124 .mr(4)
30125 .nr(8)
30126 .kr(1)
30127 .sr(1)
30128 .m(4)
30129 .n(8)
30130 .k(4)
30131 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030132 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030133 }
30134#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
30135
30136
30137#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030138 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2) {
30139 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030140 GemmMicrokernelTester()
30141 .mr(4)
30142 .nr(8)
30143 .kr(1)
30144 .sr(1)
30145 .m(4)
30146 .n(8)
30147 .k(2)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030148 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030149 }
30150
30151 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cn) {
30152 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030153 GemmMicrokernelTester()
30154 .mr(4)
30155 .nr(8)
30156 .kr(1)
30157 .sr(1)
30158 .m(4)
30159 .n(8)
30160 .k(2)
30161 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030162 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030163 }
30164
30165 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_strided_a) {
30166 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030167 GemmMicrokernelTester()
30168 .mr(4)
30169 .nr(8)
30170 .kr(1)
30171 .sr(1)
30172 .m(4)
30173 .n(8)
30174 .k(2)
30175 .a_stride(5)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030176 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030177 }
30178
30179 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile) {
30180 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030181 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030182 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030183 GemmMicrokernelTester()
30184 .mr(4)
30185 .nr(8)
30186 .kr(1)
30187 .sr(1)
30188 .m(m)
30189 .n(n)
30190 .k(2)
30191 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030192 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030193 }
30194 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030195 }
30196
30197 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_m) {
30198 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030199 for (uint32_t m = 1; m <= 4; m++) {
30200 GemmMicrokernelTester()
30201 .mr(4)
30202 .nr(8)
30203 .kr(1)
30204 .sr(1)
30205 .m(m)
30206 .n(8)
30207 .k(2)
30208 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030209 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030210 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030211 }
30212
30213 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_n) {
30214 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030215 for (uint32_t n = 1; n <= 8; n++) {
30216 GemmMicrokernelTester()
30217 .mr(4)
30218 .nr(8)
30219 .kr(1)
30220 .sr(1)
30221 .m(4)
30222 .n(n)
30223 .k(2)
30224 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030225 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030226 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030227 }
30228
30229 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2) {
30230 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030231 for (size_t k = 1; k < 2; k++) {
30232 GemmMicrokernelTester()
30233 .mr(4)
30234 .nr(8)
30235 .kr(1)
30236 .sr(1)
30237 .m(4)
30238 .n(8)
30239 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030240 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030241 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030242 }
30243
30244 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2_strided_a) {
30245 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030246 for (size_t k = 1; k < 2; k++) {
30247 GemmMicrokernelTester()
30248 .mr(4)
30249 .nr(8)
30250 .kr(1)
30251 .sr(1)
30252 .m(4)
30253 .n(8)
30254 .k(k)
30255 .a_stride(5)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030256 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030257 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030258 }
30259
30260 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2_subtile) {
30261 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030262 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030263 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030264 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030265 GemmMicrokernelTester()
30266 .mr(4)
30267 .nr(8)
30268 .kr(1)
30269 .sr(1)
30270 .m(m)
30271 .n(n)
30272 .k(k)
30273 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030274 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030275 }
30276 }
30277 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030278 }
30279
30280 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2) {
30281 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030282 for (size_t k = 3; k < 4; k++) {
30283 GemmMicrokernelTester()
30284 .mr(4)
30285 .nr(8)
30286 .kr(1)
30287 .sr(1)
30288 .m(4)
30289 .n(8)
30290 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030291 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030292 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030293 }
30294
30295 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2_strided_a) {
30296 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030297 for (size_t k = 3; k < 4; k++) {
30298 GemmMicrokernelTester()
30299 .mr(4)
30300 .nr(8)
30301 .kr(1)
30302 .sr(1)
30303 .m(4)
30304 .n(8)
30305 .k(k)
30306 .a_stride(7)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030307 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030308 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030309 }
30310
30311 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2_subtile) {
30312 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030313 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030314 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030315 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030316 GemmMicrokernelTester()
30317 .mr(4)
30318 .nr(8)
30319 .kr(1)
30320 .sr(1)
30321 .m(m)
30322 .n(n)
30323 .k(k)
30324 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030325 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030326 }
30327 }
30328 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030329 }
30330
30331 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2) {
30332 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030333 for (size_t k = 4; k <= 20; k += 2) {
30334 GemmMicrokernelTester()
30335 .mr(4)
30336 .nr(8)
30337 .kr(1)
30338 .sr(1)
30339 .m(4)
30340 .n(8)
30341 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030342 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030343 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030344 }
30345
30346 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2_strided_a) {
30347 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030348 for (size_t k = 4; k <= 20; k += 2) {
30349 GemmMicrokernelTester()
30350 .mr(4)
30351 .nr(8)
30352 .kr(1)
30353 .sr(1)
30354 .m(4)
30355 .n(8)
30356 .k(k)
30357 .a_stride(23)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030358 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030359 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030360 }
30361
30362 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2_subtile) {
30363 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030364 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030365 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030366 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030367 GemmMicrokernelTester()
30368 .mr(4)
30369 .nr(8)
30370 .kr(1)
30371 .sr(1)
30372 .m(m)
30373 .n(n)
30374 .k(k)
30375 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030376 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030377 }
30378 }
30379 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030380 }
30381
30382 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8) {
30383 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030384 for (uint32_t n = 9; n < 16; n++) {
30385 for (size_t k = 1; k <= 10; k += 3) {
30386 GemmMicrokernelTester()
30387 .mr(4)
30388 .nr(8)
30389 .kr(1)
30390 .sr(1)
30391 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030392 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030393 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030394 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030395 }
30396 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030397 }
30398
30399 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_strided_cn) {
30400 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030401 for (uint32_t n = 9; n < 16; n++) {
30402 for (size_t k = 1; k <= 10; k += 3) {
30403 GemmMicrokernelTester()
30404 .mr(4)
30405 .nr(8)
30406 .kr(1)
30407 .sr(1)
30408 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030409 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030410 .k(k)
30411 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030412 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030413 }
30414 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030415 }
30416
30417 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_strided_a) {
30418 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030419 for (uint32_t n = 9; n < 16; n++) {
30420 for (size_t k = 1; k <= 10; k += 3) {
30421 GemmMicrokernelTester()
30422 .mr(4)
30423 .nr(8)
30424 .kr(1)
30425 .sr(1)
30426 .m(4)
30427 .n(n)
30428 .k(k)
30429 .a_stride(13)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030430 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030431 }
30432 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030433 }
30434
30435 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_subtile) {
30436 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030437 for (uint32_t n = 9; n < 16; n++) {
30438 for (size_t k = 1; k <= 10; k += 3) {
30439 for (uint32_t m = 1; m <= 4; m++) {
30440 GemmMicrokernelTester()
30441 .mr(4)
30442 .nr(8)
30443 .kr(1)
30444 .sr(1)
30445 .m(m)
30446 .n(n)
30447 .k(k)
30448 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030449 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030450 }
30451 }
30452 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030453 }
30454
30455 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8) {
30456 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030457 for (uint32_t n = 16; n <= 24; n += 8) {
30458 for (size_t k = 1; k <= 10; k += 3) {
30459 GemmMicrokernelTester()
30460 .mr(4)
30461 .nr(8)
30462 .kr(1)
30463 .sr(1)
30464 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030465 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030466 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030467 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030468 }
30469 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030470 }
30471
30472 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_strided_cn) {
30473 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030474 for (uint32_t n = 16; n <= 24; n += 8) {
30475 for (size_t k = 1; k <= 10; k += 3) {
30476 GemmMicrokernelTester()
30477 .mr(4)
30478 .nr(8)
30479 .kr(1)
30480 .sr(1)
30481 .m(4)
30482 .n(n)
30483 .k(k)
30484 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030485 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030486 }
30487 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030488 }
30489
30490 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_strided_a) {
30491 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030492 for (uint32_t n = 16; n <= 24; n += 8) {
30493 for (size_t k = 1; k <= 10; k += 3) {
30494 GemmMicrokernelTester()
30495 .mr(4)
30496 .nr(8)
30497 .kr(1)
30498 .sr(1)
30499 .m(4)
30500 .n(n)
30501 .k(k)
30502 .a_stride(13)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030503 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030504 }
30505 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030506 }
30507
30508 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_subtile) {
30509 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030510 for (uint32_t n = 16; n <= 24; n += 8) {
30511 for (size_t k = 1; k <= 10; k += 3) {
30512 for (uint32_t m = 1; m <= 4; m++) {
30513 GemmMicrokernelTester()
30514 .mr(4)
30515 .nr(8)
30516 .kr(1)
30517 .sr(1)
30518 .m(m)
30519 .n(n)
30520 .k(k)
30521 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030522 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030523 }
30524 }
30525 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030526 }
30527
30528 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cm_subtile) {
30529 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030530 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030531 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030532 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030533 GemmMicrokernelTester()
30534 .mr(4)
30535 .nr(8)
30536 .kr(1)
30537 .sr(1)
30538 .m(m)
30539 .n(n)
30540 .k(k)
30541 .cm_stride(11)
30542 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030543 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030544 }
30545 }
30546 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030547 }
30548
30549 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, qmin) {
30550 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030551 GemmMicrokernelTester()
30552 .mr(4)
30553 .nr(8)
30554 .kr(1)
30555 .sr(1)
30556 .m(4)
30557 .n(8)
30558 .k(2)
30559 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030560 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030561 }
30562
30563 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, qmax) {
30564 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030565 GemmMicrokernelTester()
30566 .mr(4)
30567 .nr(8)
30568 .kr(1)
30569 .sr(1)
30570 .m(4)
30571 .n(8)
30572 .k(2)
30573 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030574 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030575 }
30576
30577 TEST(GENERATE_F32_GEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cm) {
30578 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030579 GemmMicrokernelTester()
30580 .mr(4)
30581 .nr(8)
30582 .kr(1)
30583 .sr(1)
30584 .m(4)
30585 .n(8)
30586 .k(2)
30587 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030588 .Test(xnn_generate_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030589 }
30590#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
Zhi An Ngc2e2da82022-01-25 16:51:58 -080030591
30592
30593#if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
Zhi An Ngeb7256b2022-02-03 16:02:54 -080030594 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
30595 TEST_REQUIRES_ARM_NEON_FMA;
30596 GemmMicrokernelTester()
30597 .mr(1)
30598 .nr(8)
30599 .kr(1)
30600 .sr(1)
30601 .m(1)
30602 .n(8)
30603 .k(8)
30604 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30605 }
30606
30607 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
30608 TEST_REQUIRES_ARM_NEON_FMA;
30609 GemmMicrokernelTester()
30610 .mr(1)
30611 .nr(8)
30612 .kr(1)
30613 .sr(1)
30614 .m(1)
30615 .n(8)
30616 .k(8)
30617 .cn_stride(11)
30618 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30619 }
30620
30621 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
30622 TEST_REQUIRES_ARM_NEON_FMA;
30623 GemmMicrokernelTester()
30624 .mr(1)
30625 .nr(8)
30626 .kr(1)
30627 .sr(1)
30628 .m(1)
30629 .n(8)
30630 .k(8)
30631 .a_stride(11)
30632 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30633 }
30634
30635 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
30636 TEST_REQUIRES_ARM_NEON_FMA;
30637 for (uint32_t n = 1; n <= 8; n++) {
30638 for (uint32_t m = 1; m <= 1; m++) {
30639 GemmMicrokernelTester()
30640 .mr(1)
30641 .nr(8)
30642 .kr(1)
30643 .sr(1)
30644 .m(m)
30645 .n(n)
30646 .k(8)
30647 .iterations(1)
30648 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30649 }
30650 }
30651 }
30652
30653 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
30654 TEST_REQUIRES_ARM_NEON_FMA;
30655 for (uint32_t m = 1; m <= 1; m++) {
30656 GemmMicrokernelTester()
30657 .mr(1)
30658 .nr(8)
30659 .kr(1)
30660 .sr(1)
30661 .m(m)
30662 .n(8)
30663 .k(8)
30664 .iterations(1)
30665 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30666 }
30667 }
30668
30669 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
30670 TEST_REQUIRES_ARM_NEON_FMA;
30671 for (uint32_t n = 1; n <= 8; n++) {
30672 GemmMicrokernelTester()
30673 .mr(1)
30674 .nr(8)
30675 .kr(1)
30676 .sr(1)
30677 .m(1)
30678 .n(n)
30679 .k(8)
30680 .iterations(1)
30681 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30682 }
30683 }
30684
30685 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
30686 TEST_REQUIRES_ARM_NEON_FMA;
30687 GemmMicrokernelTester()
30688 .mr(1)
30689 .nr(8)
30690 .kr(1)
30691 .sr(1)
30692 .m(1)
30693 .n(8)
30694 .k(16)
30695 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30696 }
30697
30698 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
30699 TEST_REQUIRES_ARM_NEON_FMA;
30700 GemmMicrokernelTester()
30701 .mr(1)
30702 .nr(8)
30703 .kr(1)
30704 .sr(1)
30705 .m(1)
30706 .n(8)
30707 .k(16)
30708 .a_stride(19)
30709 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30710 }
30711
30712 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
30713 TEST_REQUIRES_ARM_NEON_FMA;
30714 for (uint32_t n = 1; n <= 8; n++) {
30715 for (uint32_t m = 1; m <= 1; m++) {
30716 GemmMicrokernelTester()
30717 .mr(1)
30718 .nr(8)
30719 .kr(1)
30720 .sr(1)
30721 .m(m)
30722 .n(n)
30723 .k(16)
30724 .iterations(1)
30725 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30726 }
30727 }
30728 }
30729
30730 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
30731 TEST_REQUIRES_ARM_NEON_FMA;
30732 for (size_t k = 1; k < 16; k++) {
30733 GemmMicrokernelTester()
30734 .mr(1)
30735 .nr(8)
30736 .kr(1)
30737 .sr(1)
30738 .m(1)
30739 .n(8)
30740 .k(k)
30741 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30742 }
30743 }
30744
30745 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
30746 TEST_REQUIRES_ARM_NEON_FMA;
30747 for (size_t k = 1; k < 16; k++) {
30748 GemmMicrokernelTester()
30749 .mr(1)
30750 .nr(8)
30751 .kr(1)
30752 .sr(1)
30753 .m(1)
30754 .n(8)
30755 .k(k)
30756 .a_stride(19)
30757 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30758 }
30759 }
30760
30761 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
30762 TEST_REQUIRES_ARM_NEON_FMA;
30763 for (size_t k = 1; k < 16; k++) {
30764 for (uint32_t n = 1; n <= 8; n++) {
30765 for (uint32_t m = 1; m <= 1; m++) {
30766 GemmMicrokernelTester()
30767 .mr(1)
30768 .nr(8)
30769 .kr(1)
30770 .sr(1)
30771 .m(m)
30772 .n(n)
30773 .k(k)
30774 .iterations(1)
30775 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30776 }
30777 }
30778 }
30779 }
30780
30781 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
30782 TEST_REQUIRES_ARM_NEON_FMA;
30783 for (size_t k = 17; k < 32; k++) {
30784 GemmMicrokernelTester()
30785 .mr(1)
30786 .nr(8)
30787 .kr(1)
30788 .sr(1)
30789 .m(1)
30790 .n(8)
30791 .k(k)
30792 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30793 }
30794 }
30795
30796 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
30797 TEST_REQUIRES_ARM_NEON_FMA;
30798 for (size_t k = 17; k < 32; k++) {
30799 GemmMicrokernelTester()
30800 .mr(1)
30801 .nr(8)
30802 .kr(1)
30803 .sr(1)
30804 .m(1)
30805 .n(8)
30806 .k(k)
30807 .a_stride(37)
30808 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30809 }
30810 }
30811
30812 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
30813 TEST_REQUIRES_ARM_NEON_FMA;
30814 for (size_t k = 17; k < 32; k++) {
30815 for (uint32_t n = 1; n <= 8; n++) {
30816 for (uint32_t m = 1; m <= 1; m++) {
30817 GemmMicrokernelTester()
30818 .mr(1)
30819 .nr(8)
30820 .kr(1)
30821 .sr(1)
30822 .m(m)
30823 .n(n)
30824 .k(k)
30825 .iterations(1)
30826 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30827 }
30828 }
30829 }
30830 }
30831
30832 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
30833 TEST_REQUIRES_ARM_NEON_FMA;
30834 for (size_t k = 24; k <= 80; k += 8) {
30835 GemmMicrokernelTester()
30836 .mr(1)
30837 .nr(8)
30838 .kr(1)
30839 .sr(1)
30840 .m(1)
30841 .n(8)
30842 .k(k)
30843 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30844 }
30845 }
30846
30847 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
30848 TEST_REQUIRES_ARM_NEON_FMA;
30849 for (size_t k = 24; k <= 80; k += 8) {
30850 GemmMicrokernelTester()
30851 .mr(1)
30852 .nr(8)
30853 .kr(1)
30854 .sr(1)
30855 .m(1)
30856 .n(8)
30857 .k(k)
30858 .a_stride(83)
30859 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30860 }
30861 }
30862
30863 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
30864 TEST_REQUIRES_ARM_NEON_FMA;
30865 for (size_t k = 24; k <= 80; k += 8) {
30866 for (uint32_t n = 1; n <= 8; n++) {
30867 for (uint32_t m = 1; m <= 1; m++) {
30868 GemmMicrokernelTester()
30869 .mr(1)
30870 .nr(8)
30871 .kr(1)
30872 .sr(1)
30873 .m(m)
30874 .n(n)
30875 .k(k)
30876 .iterations(1)
30877 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30878 }
30879 }
30880 }
30881 }
30882
30883 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
30884 TEST_REQUIRES_ARM_NEON_FMA;
30885 for (uint32_t n = 9; n < 16; n++) {
30886 for (size_t k = 1; k <= 40; k += 9) {
30887 GemmMicrokernelTester()
30888 .mr(1)
30889 .nr(8)
30890 .kr(1)
30891 .sr(1)
30892 .m(1)
30893 .n(n)
30894 .k(k)
30895 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30896 }
30897 }
30898 }
30899
30900 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
30901 TEST_REQUIRES_ARM_NEON_FMA;
30902 for (uint32_t n = 9; n < 16; n++) {
30903 for (size_t k = 1; k <= 40; k += 9) {
30904 GemmMicrokernelTester()
30905 .mr(1)
30906 .nr(8)
30907 .kr(1)
30908 .sr(1)
30909 .m(1)
30910 .n(n)
30911 .k(k)
30912 .cn_stride(11)
30913 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30914 }
30915 }
30916 }
30917
30918 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
30919 TEST_REQUIRES_ARM_NEON_FMA;
30920 for (uint32_t n = 9; n < 16; n++) {
30921 for (size_t k = 1; k <= 40; k += 9) {
30922 GemmMicrokernelTester()
30923 .mr(1)
30924 .nr(8)
30925 .kr(1)
30926 .sr(1)
30927 .m(1)
30928 .n(n)
30929 .k(k)
30930 .a_stride(43)
30931 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30932 }
30933 }
30934 }
30935
30936 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
30937 TEST_REQUIRES_ARM_NEON_FMA;
30938 for (uint32_t n = 9; n < 16; n++) {
30939 for (size_t k = 1; k <= 40; k += 9) {
30940 for (uint32_t m = 1; m <= 1; m++) {
30941 GemmMicrokernelTester()
30942 .mr(1)
30943 .nr(8)
30944 .kr(1)
30945 .sr(1)
30946 .m(m)
30947 .n(n)
30948 .k(k)
30949 .iterations(1)
30950 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30951 }
30952 }
30953 }
30954 }
30955
30956 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
30957 TEST_REQUIRES_ARM_NEON_FMA;
30958 for (uint32_t n = 16; n <= 24; n += 8) {
30959 for (size_t k = 1; k <= 40; k += 9) {
30960 GemmMicrokernelTester()
30961 .mr(1)
30962 .nr(8)
30963 .kr(1)
30964 .sr(1)
30965 .m(1)
30966 .n(n)
30967 .k(k)
30968 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30969 }
30970 }
30971 }
30972
30973 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
30974 TEST_REQUIRES_ARM_NEON_FMA;
30975 for (uint32_t n = 16; n <= 24; n += 8) {
30976 for (size_t k = 1; k <= 40; k += 9) {
30977 GemmMicrokernelTester()
30978 .mr(1)
30979 .nr(8)
30980 .kr(1)
30981 .sr(1)
30982 .m(1)
30983 .n(n)
30984 .k(k)
30985 .cn_stride(11)
30986 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
30987 }
30988 }
30989 }
30990
30991 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
30992 TEST_REQUIRES_ARM_NEON_FMA;
30993 for (uint32_t n = 16; n <= 24; n += 8) {
30994 for (size_t k = 1; k <= 40; k += 9) {
30995 GemmMicrokernelTester()
30996 .mr(1)
30997 .nr(8)
30998 .kr(1)
30999 .sr(1)
31000 .m(1)
31001 .n(n)
31002 .k(k)
31003 .a_stride(43)
31004 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
31005 }
31006 }
31007 }
31008
31009 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
31010 TEST_REQUIRES_ARM_NEON_FMA;
31011 for (uint32_t n = 16; n <= 24; n += 8) {
31012 for (size_t k = 1; k <= 40; k += 9) {
31013 for (uint32_t m = 1; m <= 1; m++) {
31014 GemmMicrokernelTester()
31015 .mr(1)
31016 .nr(8)
31017 .kr(1)
31018 .sr(1)
31019 .m(m)
31020 .n(n)
31021 .k(k)
31022 .iterations(1)
31023 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
31024 }
31025 }
31026 }
31027 }
31028
31029 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
31030 TEST_REQUIRES_ARM_NEON_FMA;
31031 for (size_t k = 1; k <= 40; k += 9) {
31032 for (uint32_t n = 1; n <= 8; n++) {
31033 for (uint32_t m = 1; m <= 1; m++) {
31034 GemmMicrokernelTester()
31035 .mr(1)
31036 .nr(8)
31037 .kr(1)
31038 .sr(1)
31039 .m(m)
31040 .n(n)
31041 .k(k)
31042 .cm_stride(11)
31043 .iterations(1)
31044 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
31045 }
31046 }
31047 }
31048 }
31049
31050 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
31051 TEST_REQUIRES_ARM_NEON_FMA;
31052 GemmMicrokernelTester()
31053 .mr(1)
31054 .nr(8)
31055 .kr(1)
31056 .sr(1)
31057 .m(1)
31058 .n(8)
31059 .k(8)
31060 .qmin(128)
31061 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
31062 }
31063
31064 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
31065 TEST_REQUIRES_ARM_NEON_FMA;
31066 GemmMicrokernelTester()
31067 .mr(1)
31068 .nr(8)
31069 .kr(1)
31070 .sr(1)
31071 .m(1)
31072 .n(8)
31073 .k(8)
31074 .qmax(128)
31075 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
31076 }
31077
31078 TEST(GENERATE_F32_GEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
31079 TEST_REQUIRES_ARM_NEON_FMA;
31080 GemmMicrokernelTester()
31081 .mr(1)
31082 .nr(8)
31083 .kr(1)
31084 .sr(1)
31085 .m(1)
31086 .n(8)
31087 .k(8)
31088 .cm_stride(11)
31089 .Test(xnn_generate_f32_gemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
31090 }
31091#endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
31092
31093
31094#if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
Zhi An Ngc2e2da82022-01-25 16:51:58 -080031095 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
31096 TEST_REQUIRES_ARM_NEON_FMA;
31097 GemmMicrokernelTester()
31098 .mr(6)
31099 .nr(8)
31100 .kr(1)
31101 .sr(1)
31102 .m(6)
31103 .n(8)
31104 .k(8)
31105 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31106 }
31107
31108 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
31109 TEST_REQUIRES_ARM_NEON_FMA;
31110 GemmMicrokernelTester()
31111 .mr(6)
31112 .nr(8)
31113 .kr(1)
31114 .sr(1)
31115 .m(6)
31116 .n(8)
31117 .k(8)
31118 .cn_stride(11)
31119 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31120 }
31121
31122 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
31123 TEST_REQUIRES_ARM_NEON_FMA;
31124 GemmMicrokernelTester()
31125 .mr(6)
31126 .nr(8)
31127 .kr(1)
31128 .sr(1)
31129 .m(6)
31130 .n(8)
31131 .k(8)
31132 .a_stride(11)
31133 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31134 }
31135
31136 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
31137 TEST_REQUIRES_ARM_NEON_FMA;
31138 for (uint32_t n = 1; n <= 8; n++) {
31139 for (uint32_t m = 1; m <= 6; m++) {
31140 GemmMicrokernelTester()
31141 .mr(6)
31142 .nr(8)
31143 .kr(1)
31144 .sr(1)
31145 .m(m)
31146 .n(n)
31147 .k(8)
31148 .iterations(1)
31149 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31150 }
31151 }
31152 }
31153
31154 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
31155 TEST_REQUIRES_ARM_NEON_FMA;
31156 for (uint32_t m = 1; m <= 6; m++) {
31157 GemmMicrokernelTester()
31158 .mr(6)
31159 .nr(8)
31160 .kr(1)
31161 .sr(1)
31162 .m(m)
31163 .n(8)
31164 .k(8)
31165 .iterations(1)
31166 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31167 }
31168 }
31169
31170 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
31171 TEST_REQUIRES_ARM_NEON_FMA;
31172 for (uint32_t n = 1; n <= 8; n++) {
31173 GemmMicrokernelTester()
31174 .mr(6)
31175 .nr(8)
31176 .kr(1)
31177 .sr(1)
31178 .m(6)
31179 .n(n)
31180 .k(8)
31181 .iterations(1)
31182 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31183 }
31184 }
31185
31186 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
31187 TEST_REQUIRES_ARM_NEON_FMA;
31188 GemmMicrokernelTester()
31189 .mr(6)
31190 .nr(8)
31191 .kr(1)
31192 .sr(1)
31193 .m(6)
31194 .n(8)
31195 .k(16)
31196 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31197 }
31198
31199 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
31200 TEST_REQUIRES_ARM_NEON_FMA;
31201 GemmMicrokernelTester()
31202 .mr(6)
31203 .nr(8)
31204 .kr(1)
31205 .sr(1)
31206 .m(6)
31207 .n(8)
31208 .k(16)
31209 .a_stride(19)
31210 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31211 }
31212
31213 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
31214 TEST_REQUIRES_ARM_NEON_FMA;
31215 for (uint32_t n = 1; n <= 8; n++) {
31216 for (uint32_t m = 1; m <= 6; m++) {
31217 GemmMicrokernelTester()
31218 .mr(6)
31219 .nr(8)
31220 .kr(1)
31221 .sr(1)
31222 .m(m)
31223 .n(n)
31224 .k(16)
31225 .iterations(1)
31226 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31227 }
31228 }
31229 }
31230
31231 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
31232 TEST_REQUIRES_ARM_NEON_FMA;
31233 for (size_t k = 1; k < 16; k++) {
31234 GemmMicrokernelTester()
31235 .mr(6)
31236 .nr(8)
31237 .kr(1)
31238 .sr(1)
31239 .m(6)
31240 .n(8)
31241 .k(k)
31242 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31243 }
31244 }
31245
31246 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
31247 TEST_REQUIRES_ARM_NEON_FMA;
31248 for (size_t k = 1; k < 16; k++) {
31249 GemmMicrokernelTester()
31250 .mr(6)
31251 .nr(8)
31252 .kr(1)
31253 .sr(1)
31254 .m(6)
31255 .n(8)
31256 .k(k)
31257 .a_stride(19)
31258 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31259 }
31260 }
31261
31262 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
31263 TEST_REQUIRES_ARM_NEON_FMA;
31264 for (size_t k = 1; k < 16; k++) {
31265 for (uint32_t n = 1; n <= 8; n++) {
31266 for (uint32_t m = 1; m <= 6; m++) {
31267 GemmMicrokernelTester()
31268 .mr(6)
31269 .nr(8)
31270 .kr(1)
31271 .sr(1)
31272 .m(m)
31273 .n(n)
31274 .k(k)
31275 .iterations(1)
31276 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31277 }
31278 }
31279 }
31280 }
31281
31282 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
31283 TEST_REQUIRES_ARM_NEON_FMA;
31284 for (size_t k = 17; k < 32; k++) {
31285 GemmMicrokernelTester()
31286 .mr(6)
31287 .nr(8)
31288 .kr(1)
31289 .sr(1)
31290 .m(6)
31291 .n(8)
31292 .k(k)
31293 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31294 }
31295 }
31296
31297 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
31298 TEST_REQUIRES_ARM_NEON_FMA;
31299 for (size_t k = 17; k < 32; k++) {
31300 GemmMicrokernelTester()
31301 .mr(6)
31302 .nr(8)
31303 .kr(1)
31304 .sr(1)
31305 .m(6)
31306 .n(8)
31307 .k(k)
31308 .a_stride(37)
31309 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31310 }
31311 }
31312
31313 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
31314 TEST_REQUIRES_ARM_NEON_FMA;
31315 for (size_t k = 17; k < 32; k++) {
31316 for (uint32_t n = 1; n <= 8; n++) {
31317 for (uint32_t m = 1; m <= 6; m++) {
31318 GemmMicrokernelTester()
31319 .mr(6)
31320 .nr(8)
31321 .kr(1)
31322 .sr(1)
31323 .m(m)
31324 .n(n)
31325 .k(k)
31326 .iterations(1)
31327 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31328 }
31329 }
31330 }
31331 }
31332
31333 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
31334 TEST_REQUIRES_ARM_NEON_FMA;
31335 for (size_t k = 24; k <= 80; k += 8) {
31336 GemmMicrokernelTester()
31337 .mr(6)
31338 .nr(8)
31339 .kr(1)
31340 .sr(1)
31341 .m(6)
31342 .n(8)
31343 .k(k)
31344 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31345 }
31346 }
31347
31348 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
31349 TEST_REQUIRES_ARM_NEON_FMA;
31350 for (size_t k = 24; k <= 80; k += 8) {
31351 GemmMicrokernelTester()
31352 .mr(6)
31353 .nr(8)
31354 .kr(1)
31355 .sr(1)
31356 .m(6)
31357 .n(8)
31358 .k(k)
31359 .a_stride(83)
31360 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31361 }
31362 }
31363
31364 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
31365 TEST_REQUIRES_ARM_NEON_FMA;
31366 for (size_t k = 24; k <= 80; k += 8) {
31367 for (uint32_t n = 1; n <= 8; n++) {
31368 for (uint32_t m = 1; m <= 6; m++) {
31369 GemmMicrokernelTester()
31370 .mr(6)
31371 .nr(8)
31372 .kr(1)
31373 .sr(1)
31374 .m(m)
31375 .n(n)
31376 .k(k)
31377 .iterations(1)
31378 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31379 }
31380 }
31381 }
31382 }
31383
31384 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
31385 TEST_REQUIRES_ARM_NEON_FMA;
31386 for (uint32_t n = 9; n < 16; n++) {
31387 for (size_t k = 1; k <= 40; k += 9) {
31388 GemmMicrokernelTester()
31389 .mr(6)
31390 .nr(8)
31391 .kr(1)
31392 .sr(1)
31393 .m(6)
31394 .n(n)
31395 .k(k)
31396 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31397 }
31398 }
31399 }
31400
31401 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
31402 TEST_REQUIRES_ARM_NEON_FMA;
31403 for (uint32_t n = 9; n < 16; n++) {
31404 for (size_t k = 1; k <= 40; k += 9) {
31405 GemmMicrokernelTester()
31406 .mr(6)
31407 .nr(8)
31408 .kr(1)
31409 .sr(1)
31410 .m(6)
31411 .n(n)
31412 .k(k)
31413 .cn_stride(11)
31414 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31415 }
31416 }
31417 }
31418
31419 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
31420 TEST_REQUIRES_ARM_NEON_FMA;
31421 for (uint32_t n = 9; n < 16; n++) {
31422 for (size_t k = 1; k <= 40; k += 9) {
31423 GemmMicrokernelTester()
31424 .mr(6)
31425 .nr(8)
31426 .kr(1)
31427 .sr(1)
31428 .m(6)
31429 .n(n)
31430 .k(k)
31431 .a_stride(43)
31432 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31433 }
31434 }
31435 }
31436
31437 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
31438 TEST_REQUIRES_ARM_NEON_FMA;
31439 for (uint32_t n = 9; n < 16; n++) {
31440 for (size_t k = 1; k <= 40; k += 9) {
31441 for (uint32_t m = 1; m <= 6; m++) {
31442 GemmMicrokernelTester()
31443 .mr(6)
31444 .nr(8)
31445 .kr(1)
31446 .sr(1)
31447 .m(m)
31448 .n(n)
31449 .k(k)
31450 .iterations(1)
31451 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31452 }
31453 }
31454 }
31455 }
31456
31457 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
31458 TEST_REQUIRES_ARM_NEON_FMA;
31459 for (uint32_t n = 16; n <= 24; n += 8) {
31460 for (size_t k = 1; k <= 40; k += 9) {
31461 GemmMicrokernelTester()
31462 .mr(6)
31463 .nr(8)
31464 .kr(1)
31465 .sr(1)
31466 .m(6)
31467 .n(n)
31468 .k(k)
31469 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31470 }
31471 }
31472 }
31473
31474 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
31475 TEST_REQUIRES_ARM_NEON_FMA;
31476 for (uint32_t n = 16; n <= 24; n += 8) {
31477 for (size_t k = 1; k <= 40; k += 9) {
31478 GemmMicrokernelTester()
31479 .mr(6)
31480 .nr(8)
31481 .kr(1)
31482 .sr(1)
31483 .m(6)
31484 .n(n)
31485 .k(k)
31486 .cn_stride(11)
31487 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31488 }
31489 }
31490 }
31491
31492 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
31493 TEST_REQUIRES_ARM_NEON_FMA;
31494 for (uint32_t n = 16; n <= 24; n += 8) {
31495 for (size_t k = 1; k <= 40; k += 9) {
31496 GemmMicrokernelTester()
31497 .mr(6)
31498 .nr(8)
31499 .kr(1)
31500 .sr(1)
31501 .m(6)
31502 .n(n)
31503 .k(k)
31504 .a_stride(43)
31505 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31506 }
31507 }
31508 }
31509
31510 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
31511 TEST_REQUIRES_ARM_NEON_FMA;
31512 for (uint32_t n = 16; n <= 24; n += 8) {
31513 for (size_t k = 1; k <= 40; k += 9) {
31514 for (uint32_t m = 1; m <= 6; m++) {
31515 GemmMicrokernelTester()
31516 .mr(6)
31517 .nr(8)
31518 .kr(1)
31519 .sr(1)
31520 .m(m)
31521 .n(n)
31522 .k(k)
31523 .iterations(1)
31524 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31525 }
31526 }
31527 }
31528 }
31529
31530 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
31531 TEST_REQUIRES_ARM_NEON_FMA;
31532 for (size_t k = 1; k <= 40; k += 9) {
31533 for (uint32_t n = 1; n <= 8; n++) {
31534 for (uint32_t m = 1; m <= 6; m++) {
31535 GemmMicrokernelTester()
31536 .mr(6)
31537 .nr(8)
31538 .kr(1)
31539 .sr(1)
31540 .m(m)
31541 .n(n)
31542 .k(k)
31543 .cm_stride(11)
31544 .iterations(1)
31545 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31546 }
31547 }
31548 }
31549 }
31550
31551 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
31552 TEST_REQUIRES_ARM_NEON_FMA;
31553 GemmMicrokernelTester()
31554 .mr(6)
31555 .nr(8)
31556 .kr(1)
31557 .sr(1)
31558 .m(6)
31559 .n(8)
31560 .k(8)
31561 .qmin(128)
31562 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31563 }
31564
31565 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
31566 TEST_REQUIRES_ARM_NEON_FMA;
31567 GemmMicrokernelTester()
31568 .mr(6)
31569 .nr(8)
31570 .kr(1)
31571 .sr(1)
31572 .m(6)
31573 .n(8)
31574 .k(8)
31575 .qmax(128)
31576 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31577 }
31578
31579 TEST(GENERATE_F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
31580 TEST_REQUIRES_ARM_NEON_FMA;
31581 GemmMicrokernelTester()
31582 .mr(6)
31583 .nr(8)
31584 .kr(1)
31585 .sr(1)
31586 .m(6)
31587 .n(8)
31588 .k(8)
31589 .cm_stride(11)
31590 .Test(xnn_generate_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
31591 }
31592#endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT