blob: a09290ed4c1b9f838f84c062fd6038d357b01cd0 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-gemm.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4) {
28 GemmMicrokernelTester()
29 .mr(1)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(1)
34 .n(8)
35 .k(4)
36 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
37 }
38
39 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, strided_cn) {
40 GemmMicrokernelTester()
41 .mr(1)
42 .nr(8)
43 .kr(1)
44 .sr(1)
45 .m(1)
46 .n(8)
47 .k(4)
48 .cn_stride(11)
49 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
50 }
51
52 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
53 GemmMicrokernelTester()
54 .mr(1)
55 .nr(8)
56 .kr(1)
57 .sr(1)
58 .m(1)
59 .n(8)
60 .k(4)
61 .a_stride(7)
62 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
63 }
64
65 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080066 for (uint32_t n = 1; n <= 8; n++) {
67 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080068 GemmMicrokernelTester()
69 .mr(1)
70 .nr(8)
71 .kr(1)
72 .sr(1)
73 .m(m)
74 .n(n)
75 .k(4)
76 .iterations(1)
77 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
78 }
79 }
80 }
81
82 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
83 for (uint32_t m = 1; m <= 1; m++) {
84 GemmMicrokernelTester()
85 .mr(1)
86 .nr(8)
87 .kr(1)
88 .sr(1)
89 .m(m)
90 .n(8)
91 .k(4)
92 .iterations(1)
93 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
94 }
95 }
96
97 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
98 for (uint32_t n = 1; n <= 8; n++) {
99 GemmMicrokernelTester()
100 .mr(1)
101 .nr(8)
102 .kr(1)
103 .sr(1)
104 .m(1)
105 .n(n)
106 .k(4)
107 .iterations(1)
108 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
109 }
110 }
111
112 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_lt_4) {
113 for (size_t k = 1; k < 4; k++) {
114 GemmMicrokernelTester()
115 .mr(1)
116 .nr(8)
117 .kr(1)
118 .sr(1)
119 .m(1)
120 .n(8)
121 .k(k)
122 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
123 }
124 }
125
126 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
127 for (size_t k = 1; k < 4; k++) {
128 GemmMicrokernelTester()
129 .mr(1)
130 .nr(8)
131 .kr(1)
132 .sr(1)
133 .m(1)
134 .n(8)
135 .k(k)
136 .a_stride(7)
137 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
138 }
139 }
140
141 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
142 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800143 for (uint32_t n = 1; n <= 8; n++) {
144 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800145 GemmMicrokernelTester()
146 .mr(1)
147 .nr(8)
148 .kr(1)
149 .sr(1)
150 .m(m)
151 .n(n)
152 .k(k)
153 .iterations(1)
154 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
155 }
156 }
157 }
158 }
159
160 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_gt_4) {
161 for (size_t k = 5; k < 8; k++) {
162 GemmMicrokernelTester()
163 .mr(1)
164 .nr(8)
165 .kr(1)
166 .sr(1)
167 .m(1)
168 .n(8)
169 .k(k)
170 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
171 }
172 }
173
174 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
175 for (size_t k = 5; k < 8; k++) {
176 GemmMicrokernelTester()
177 .mr(1)
178 .nr(8)
179 .kr(1)
180 .sr(1)
181 .m(1)
182 .n(8)
183 .k(k)
184 .a_stride(11)
185 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
186 }
187 }
188
189 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
190 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800191 for (uint32_t n = 1; n <= 8; n++) {
192 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800193 GemmMicrokernelTester()
194 .mr(1)
195 .nr(8)
196 .kr(1)
197 .sr(1)
198 .m(m)
199 .n(n)
200 .k(k)
201 .iterations(1)
202 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
203 }
204 }
205 }
206 }
207
208 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_div_4) {
209 for (size_t k = 8; k <= 40; k += 4) {
210 GemmMicrokernelTester()
211 .mr(1)
212 .nr(8)
213 .kr(1)
214 .sr(1)
215 .m(1)
216 .n(8)
217 .k(k)
218 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
219 }
220 }
221
222 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
223 for (size_t k = 8; k <= 40; k += 4) {
224 GemmMicrokernelTester()
225 .mr(1)
226 .nr(8)
227 .kr(1)
228 .sr(1)
229 .m(1)
230 .n(8)
231 .k(k)
232 .a_stride(43)
233 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
234 }
235 }
236
237 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
238 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800239 for (uint32_t n = 1; n <= 8; n++) {
240 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800241 GemmMicrokernelTester()
242 .mr(1)
243 .nr(8)
244 .kr(1)
245 .sr(1)
246 .m(m)
247 .n(n)
248 .k(k)
249 .iterations(1)
250 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
251 }
252 }
253 }
254 }
255
256 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8) {
257 for (uint32_t n = 9; n < 16; n++) {
258 for (size_t k = 1; k <= 20; k += 5) {
259 GemmMicrokernelTester()
260 .mr(1)
261 .nr(8)
262 .kr(1)
263 .sr(1)
264 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800265 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800266 .k(k)
267 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
268 }
269 }
270 }
271
272 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 20; k += 5) {
275 GemmMicrokernelTester()
276 .mr(1)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800282 .k(k)
283 .cn_stride(11)
284 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
285 }
286 }
287 }
288
289 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
290 for (uint32_t n = 9; n < 16; n++) {
291 for (size_t k = 1; k <= 20; k += 5) {
292 GemmMicrokernelTester()
293 .mr(1)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(1)
298 .n(n)
299 .k(k)
300 .a_stride(23)
301 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
302 }
303 }
304 }
305
306 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
307 for (uint32_t n = 9; n < 16; n++) {
308 for (size_t k = 1; k <= 20; k += 5) {
309 for (uint32_t m = 1; m <= 1; m++) {
310 GemmMicrokernelTester()
311 .mr(1)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(m)
316 .n(n)
317 .k(k)
318 .iterations(1)
319 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
320 }
321 }
322 }
323 }
324
325 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8) {
326 for (uint32_t n = 16; n <= 24; n += 8) {
327 for (size_t k = 1; k <= 20; k += 5) {
328 GemmMicrokernelTester()
329 .mr(1)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800334 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800335 .k(k)
336 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
337 }
338 }
339 }
340
341 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
342 for (uint32_t n = 16; n <= 24; n += 8) {
343 for (size_t k = 1; k <= 20; k += 5) {
344 GemmMicrokernelTester()
345 .mr(1)
346 .nr(8)
347 .kr(1)
348 .sr(1)
349 .m(1)
350 .n(n)
351 .k(k)
352 .cn_stride(11)
353 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
354 }
355 }
356 }
357
358 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
359 for (uint32_t n = 16; n <= 24; n += 8) {
360 for (size_t k = 1; k <= 20; k += 5) {
361 GemmMicrokernelTester()
362 .mr(1)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(1)
367 .n(n)
368 .k(k)
369 .a_stride(23)
370 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
371 }
372 }
373 }
374
375 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 20; k += 5) {
378 for (uint32_t m = 1; m <= 1; m++) {
379 GemmMicrokernelTester()
380 .mr(1)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(m)
385 .n(n)
386 .k(k)
387 .iterations(1)
388 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
389 }
390 }
391 }
392 }
393
394 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
395 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800396 for (uint32_t n = 1; n <= 8; n++) {
397 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800398 GemmMicrokernelTester()
399 .mr(1)
400 .nr(8)
401 .kr(1)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(11)
407 .iterations(1)
408 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
409 }
410 }
411 }
412 }
413
414 TEST(F32_GEMM_1X8__WASMSIMD_SPLAT, strided_cm) {
415 GemmMicrokernelTester()
416 .mr(1)
417 .nr(8)
418 .kr(1)
419 .sr(1)
420 .m(1)
421 .n(8)
422 .k(4)
423 .cm_stride(11)
424 .Test(xnn_f32_gemm_ukernel_1x8__wasmsimd_splat);
425 }
426#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
427
428
429#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
430 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4) {
431 GemmMicrokernelTester()
432 .mr(4)
433 .nr(8)
434 .kr(1)
435 .sr(1)
436 .m(4)
437 .n(8)
438 .k(4)
439 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
440 }
441
442 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, strided_cn) {
443 GemmMicrokernelTester()
444 .mr(4)
445 .nr(8)
446 .kr(1)
447 .sr(1)
448 .m(4)
449 .n(8)
450 .k(4)
451 .cn_stride(11)
452 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
453 }
454
455 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
456 GemmMicrokernelTester()
457 .mr(4)
458 .nr(8)
459 .kr(1)
460 .sr(1)
461 .m(4)
462 .n(8)
463 .k(4)
464 .a_stride(7)
465 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
466 }
467
468 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800469 for (uint32_t n = 1; n <= 8; n++) {
470 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800471 GemmMicrokernelTester()
472 .mr(4)
473 .nr(8)
474 .kr(1)
475 .sr(1)
476 .m(m)
477 .n(n)
478 .k(4)
479 .iterations(1)
480 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
481 }
482 }
483 }
484
485 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
486 for (uint32_t m = 1; m <= 4; m++) {
487 GemmMicrokernelTester()
488 .mr(4)
489 .nr(8)
490 .kr(1)
491 .sr(1)
492 .m(m)
493 .n(8)
494 .k(4)
495 .iterations(1)
496 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
497 }
498 }
499
500 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
501 for (uint32_t n = 1; n <= 8; n++) {
502 GemmMicrokernelTester()
503 .mr(4)
504 .nr(8)
505 .kr(1)
506 .sr(1)
507 .m(4)
508 .n(n)
509 .k(4)
510 .iterations(1)
511 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
512 }
513 }
514
515 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_lt_4) {
516 for (size_t k = 1; k < 4; k++) {
517 GemmMicrokernelTester()
518 .mr(4)
519 .nr(8)
520 .kr(1)
521 .sr(1)
522 .m(4)
523 .n(8)
524 .k(k)
525 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
526 }
527 }
528
529 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
530 for (size_t k = 1; k < 4; k++) {
531 GemmMicrokernelTester()
532 .mr(4)
533 .nr(8)
534 .kr(1)
535 .sr(1)
536 .m(4)
537 .n(8)
538 .k(k)
539 .a_stride(7)
540 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
541 }
542 }
543
544 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
545 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800546 for (uint32_t n = 1; n <= 8; n++) {
547 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(8)
551 .kr(1)
552 .sr(1)
553 .m(m)
554 .n(n)
555 .k(k)
556 .iterations(1)
557 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
558 }
559 }
560 }
561 }
562
563 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_gt_4) {
564 for (size_t k = 5; k < 8; k++) {
565 GemmMicrokernelTester()
566 .mr(4)
567 .nr(8)
568 .kr(1)
569 .sr(1)
570 .m(4)
571 .n(8)
572 .k(k)
573 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
574 }
575 }
576
577 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
578 for (size_t k = 5; k < 8; k++) {
579 GemmMicrokernelTester()
580 .mr(4)
581 .nr(8)
582 .kr(1)
583 .sr(1)
584 .m(4)
585 .n(8)
586 .k(k)
587 .a_stride(11)
588 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
589 }
590 }
591
592 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
593 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800594 for (uint32_t n = 1; n <= 8; n++) {
595 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800596 GemmMicrokernelTester()
597 .mr(4)
598 .nr(8)
599 .kr(1)
600 .sr(1)
601 .m(m)
602 .n(n)
603 .k(k)
604 .iterations(1)
605 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
606 }
607 }
608 }
609 }
610
611 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_div_4) {
612 for (size_t k = 8; k <= 40; k += 4) {
613 GemmMicrokernelTester()
614 .mr(4)
615 .nr(8)
616 .kr(1)
617 .sr(1)
618 .m(4)
619 .n(8)
620 .k(k)
621 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
622 }
623 }
624
625 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
626 for (size_t k = 8; k <= 40; k += 4) {
627 GemmMicrokernelTester()
628 .mr(4)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(4)
633 .n(8)
634 .k(k)
635 .a_stride(43)
636 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
637 }
638 }
639
640 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
641 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800642 for (uint32_t n = 1; n <= 8; n++) {
643 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800644 GemmMicrokernelTester()
645 .mr(4)
646 .nr(8)
647 .kr(1)
648 .sr(1)
649 .m(m)
650 .n(n)
651 .k(k)
652 .iterations(1)
653 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
654 }
655 }
656 }
657 }
658
659 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8) {
660 for (uint32_t n = 9; n < 16; n++) {
661 for (size_t k = 1; k <= 20; k += 5) {
662 GemmMicrokernelTester()
663 .mr(4)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800668 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800669 .k(k)
670 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
671 }
672 }
673 }
674
675 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
676 for (uint32_t n = 9; n < 16; n++) {
677 for (size_t k = 1; k <= 20; k += 5) {
678 GemmMicrokernelTester()
679 .mr(4)
680 .nr(8)
681 .kr(1)
682 .sr(1)
683 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800684 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800685 .k(k)
686 .cn_stride(11)
687 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
688 }
689 }
690 }
691
692 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
693 for (uint32_t n = 9; n < 16; n++) {
694 for (size_t k = 1; k <= 20; k += 5) {
695 GemmMicrokernelTester()
696 .mr(4)
697 .nr(8)
698 .kr(1)
699 .sr(1)
700 .m(4)
701 .n(n)
702 .k(k)
703 .a_stride(23)
704 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
705 }
706 }
707 }
708
709 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
710 for (uint32_t n = 9; n < 16; n++) {
711 for (size_t k = 1; k <= 20; k += 5) {
712 for (uint32_t m = 1; m <= 4; m++) {
713 GemmMicrokernelTester()
714 .mr(4)
715 .nr(8)
716 .kr(1)
717 .sr(1)
718 .m(m)
719 .n(n)
720 .k(k)
721 .iterations(1)
722 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
723 }
724 }
725 }
726 }
727
728 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8) {
729 for (uint32_t n = 16; n <= 24; n += 8) {
730 for (size_t k = 1; k <= 20; k += 5) {
731 GemmMicrokernelTester()
732 .mr(4)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800737 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800738 .k(k)
739 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
740 }
741 }
742 }
743
744 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
745 for (uint32_t n = 16; n <= 24; n += 8) {
746 for (size_t k = 1; k <= 20; k += 5) {
747 GemmMicrokernelTester()
748 .mr(4)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(4)
753 .n(n)
754 .k(k)
755 .cn_stride(11)
756 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
757 }
758 }
759 }
760
761 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
762 for (uint32_t n = 16; n <= 24; n += 8) {
763 for (size_t k = 1; k <= 20; k += 5) {
764 GemmMicrokernelTester()
765 .mr(4)
766 .nr(8)
767 .kr(1)
768 .sr(1)
769 .m(4)
770 .n(n)
771 .k(k)
772 .a_stride(23)
773 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
774 }
775 }
776 }
777
778 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
779 for (uint32_t n = 16; n <= 24; n += 8) {
780 for (size_t k = 1; k <= 20; k += 5) {
781 for (uint32_t m = 1; m <= 4; m++) {
782 GemmMicrokernelTester()
783 .mr(4)
784 .nr(8)
785 .kr(1)
786 .sr(1)
787 .m(m)
788 .n(n)
789 .k(k)
790 .iterations(1)
791 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
792 }
793 }
794 }
795 }
796
797 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
798 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800799 for (uint32_t n = 1; n <= 8; n++) {
800 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800801 GemmMicrokernelTester()
802 .mr(4)
803 .nr(8)
804 .kr(1)
805 .sr(1)
806 .m(m)
807 .n(n)
808 .k(k)
809 .cm_stride(11)
810 .iterations(1)
811 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
812 }
813 }
814 }
815 }
816
817 TEST(F32_GEMM_4X8__WASMSIMD_SPLAT, strided_cm) {
818 GemmMicrokernelTester()
819 .mr(4)
820 .nr(8)
821 .kr(1)
822 .sr(1)
823 .m(4)
824 .n(8)
825 .k(4)
826 .cm_stride(11)
827 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_splat);
828 }
829#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
830
831
832#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
833 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4) {
834 GemmMicrokernelTester()
835 .mr(5)
836 .nr(8)
837 .kr(1)
838 .sr(1)
839 .m(5)
840 .n(8)
841 .k(4)
842 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
843 }
844
845 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, strided_cn) {
846 GemmMicrokernelTester()
847 .mr(5)
848 .nr(8)
849 .kr(1)
850 .sr(1)
851 .m(5)
852 .n(8)
853 .k(4)
854 .cn_stride(11)
855 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
856 }
857
858 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
859 GemmMicrokernelTester()
860 .mr(5)
861 .nr(8)
862 .kr(1)
863 .sr(1)
864 .m(5)
865 .n(8)
866 .k(4)
867 .a_stride(7)
868 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
869 }
870
871 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800872 for (uint32_t n = 1; n <= 8; n++) {
873 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800874 GemmMicrokernelTester()
875 .mr(5)
876 .nr(8)
877 .kr(1)
878 .sr(1)
879 .m(m)
880 .n(n)
881 .k(4)
882 .iterations(1)
883 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
884 }
885 }
886 }
887
888 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
889 for (uint32_t m = 1; m <= 5; m++) {
890 GemmMicrokernelTester()
891 .mr(5)
892 .nr(8)
893 .kr(1)
894 .sr(1)
895 .m(m)
896 .n(8)
897 .k(4)
898 .iterations(1)
899 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
900 }
901 }
902
903 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
904 for (uint32_t n = 1; n <= 8; n++) {
905 GemmMicrokernelTester()
906 .mr(5)
907 .nr(8)
908 .kr(1)
909 .sr(1)
910 .m(5)
911 .n(n)
912 .k(4)
913 .iterations(1)
914 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
915 }
916 }
917
918 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_lt_4) {
919 for (size_t k = 1; k < 4; k++) {
920 GemmMicrokernelTester()
921 .mr(5)
922 .nr(8)
923 .kr(1)
924 .sr(1)
925 .m(5)
926 .n(8)
927 .k(k)
928 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
929 }
930 }
931
932 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
933 for (size_t k = 1; k < 4; k++) {
934 GemmMicrokernelTester()
935 .mr(5)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(5)
940 .n(8)
941 .k(k)
942 .a_stride(7)
943 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
944 }
945 }
946
947 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
948 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800949 for (uint32_t n = 1; n <= 8; n++) {
950 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800951 GemmMicrokernelTester()
952 .mr(5)
953 .nr(8)
954 .kr(1)
955 .sr(1)
956 .m(m)
957 .n(n)
958 .k(k)
959 .iterations(1)
960 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
961 }
962 }
963 }
964 }
965
966 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_gt_4) {
967 for (size_t k = 5; k < 8; k++) {
968 GemmMicrokernelTester()
969 .mr(5)
970 .nr(8)
971 .kr(1)
972 .sr(1)
973 .m(5)
974 .n(8)
975 .k(k)
976 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
977 }
978 }
979
980 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
981 for (size_t k = 5; k < 8; k++) {
982 GemmMicrokernelTester()
983 .mr(5)
984 .nr(8)
985 .kr(1)
986 .sr(1)
987 .m(5)
988 .n(8)
989 .k(k)
990 .a_stride(11)
991 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
992 }
993 }
994
995 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
996 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800997 for (uint32_t n = 1; n <= 8; n++) {
998 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800999 GemmMicrokernelTester()
1000 .mr(5)
1001 .nr(8)
1002 .kr(1)
1003 .sr(1)
1004 .m(m)
1005 .n(n)
1006 .k(k)
1007 .iterations(1)
1008 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1009 }
1010 }
1011 }
1012 }
1013
1014 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_div_4) {
1015 for (size_t k = 8; k <= 40; k += 4) {
1016 GemmMicrokernelTester()
1017 .mr(5)
1018 .nr(8)
1019 .kr(1)
1020 .sr(1)
1021 .m(5)
1022 .n(8)
1023 .k(k)
1024 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1025 }
1026 }
1027
1028 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
1029 for (size_t k = 8; k <= 40; k += 4) {
1030 GemmMicrokernelTester()
1031 .mr(5)
1032 .nr(8)
1033 .kr(1)
1034 .sr(1)
1035 .m(5)
1036 .n(8)
1037 .k(k)
1038 .a_stride(43)
1039 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1040 }
1041 }
1042
1043 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1044 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001045 for (uint32_t n = 1; n <= 8; n++) {
1046 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001047 GemmMicrokernelTester()
1048 .mr(5)
1049 .nr(8)
1050 .kr(1)
1051 .sr(1)
1052 .m(m)
1053 .n(n)
1054 .k(k)
1055 .iterations(1)
1056 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1057 }
1058 }
1059 }
1060 }
1061
1062 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8) {
1063 for (uint32_t n = 9; n < 16; n++) {
1064 for (size_t k = 1; k <= 20; k += 5) {
1065 GemmMicrokernelTester()
1066 .mr(5)
1067 .nr(8)
1068 .kr(1)
1069 .sr(1)
1070 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001071 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001072 .k(k)
1073 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1074 }
1075 }
1076 }
1077
1078 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1079 for (uint32_t n = 9; n < 16; n++) {
1080 for (size_t k = 1; k <= 20; k += 5) {
1081 GemmMicrokernelTester()
1082 .mr(5)
1083 .nr(8)
1084 .kr(1)
1085 .sr(1)
1086 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001087 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001088 .k(k)
1089 .cn_stride(11)
1090 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1091 }
1092 }
1093 }
1094
1095 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1096 for (uint32_t n = 9; n < 16; n++) {
1097 for (size_t k = 1; k <= 20; k += 5) {
1098 GemmMicrokernelTester()
1099 .mr(5)
1100 .nr(8)
1101 .kr(1)
1102 .sr(1)
1103 .m(5)
1104 .n(n)
1105 .k(k)
1106 .a_stride(23)
1107 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1108 }
1109 }
1110 }
1111
1112 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1113 for (uint32_t n = 9; n < 16; n++) {
1114 for (size_t k = 1; k <= 20; k += 5) {
1115 for (uint32_t m = 1; m <= 5; m++) {
1116 GemmMicrokernelTester()
1117 .mr(5)
1118 .nr(8)
1119 .kr(1)
1120 .sr(1)
1121 .m(m)
1122 .n(n)
1123 .k(k)
1124 .iterations(1)
1125 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1126 }
1127 }
1128 }
1129 }
1130
1131 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8) {
1132 for (uint32_t n = 16; n <= 24; n += 8) {
1133 for (size_t k = 1; k <= 20; k += 5) {
1134 GemmMicrokernelTester()
1135 .mr(5)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001140 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001141 .k(k)
1142 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1143 }
1144 }
1145 }
1146
1147 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1148 for (uint32_t n = 16; n <= 24; n += 8) {
1149 for (size_t k = 1; k <= 20; k += 5) {
1150 GemmMicrokernelTester()
1151 .mr(5)
1152 .nr(8)
1153 .kr(1)
1154 .sr(1)
1155 .m(5)
1156 .n(n)
1157 .k(k)
1158 .cn_stride(11)
1159 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1160 }
1161 }
1162 }
1163
1164 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1165 for (uint32_t n = 16; n <= 24; n += 8) {
1166 for (size_t k = 1; k <= 20; k += 5) {
1167 GemmMicrokernelTester()
1168 .mr(5)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(5)
1173 .n(n)
1174 .k(k)
1175 .a_stride(23)
1176 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1177 }
1178 }
1179 }
1180
1181 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1182 for (uint32_t n = 16; n <= 24; n += 8) {
1183 for (size_t k = 1; k <= 20; k += 5) {
1184 for (uint32_t m = 1; m <= 5; m++) {
1185 GemmMicrokernelTester()
1186 .mr(5)
1187 .nr(8)
1188 .kr(1)
1189 .sr(1)
1190 .m(m)
1191 .n(n)
1192 .k(k)
1193 .iterations(1)
1194 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1195 }
1196 }
1197 }
1198 }
1199
1200 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1201 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001202 for (uint32_t n = 1; n <= 8; n++) {
1203 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001204 GemmMicrokernelTester()
1205 .mr(5)
1206 .nr(8)
1207 .kr(1)
1208 .sr(1)
1209 .m(m)
1210 .n(n)
1211 .k(k)
1212 .cm_stride(11)
1213 .iterations(1)
1214 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1215 }
1216 }
1217 }
1218 }
1219
1220 TEST(F32_GEMM_5X8__WASMSIMD_SPLAT, strided_cm) {
1221 GemmMicrokernelTester()
1222 .mr(5)
1223 .nr(8)
1224 .kr(1)
1225 .sr(1)
1226 .m(5)
1227 .n(8)
1228 .k(4)
1229 .cm_stride(11)
1230 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_splat);
1231 }
1232#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1233
1234
1235#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1236 TEST(F32_GEMM_1X4__WASM, k_eq_1) {
1237 GemmMicrokernelTester()
1238 .mr(1)
1239 .nr(4)
1240 .kr(1)
1241 .sr(1)
1242 .m(1)
1243 .n(4)
1244 .k(1)
1245 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1246 }
1247
1248 TEST(F32_GEMM_1X4__WASM, strided_cn) {
1249 GemmMicrokernelTester()
1250 .mr(1)
1251 .nr(4)
1252 .kr(1)
1253 .sr(1)
1254 .m(1)
1255 .n(4)
1256 .k(1)
1257 .cn_stride(7)
1258 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1259 }
1260
1261 TEST(F32_GEMM_1X4__WASM, k_eq_1_strided_a) {
1262 GemmMicrokernelTester()
1263 .mr(1)
1264 .nr(4)
1265 .kr(1)
1266 .sr(1)
1267 .m(1)
1268 .n(4)
1269 .k(1)
1270 .a_stride(3)
1271 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1272 }
1273
1274 TEST(F32_GEMM_1X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001275 for (uint32_t n = 1; n <= 4; n++) {
1276 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001277 GemmMicrokernelTester()
1278 .mr(1)
1279 .nr(4)
1280 .kr(1)
1281 .sr(1)
1282 .m(m)
1283 .n(n)
1284 .k(1)
1285 .iterations(1)
1286 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1287 }
1288 }
1289 }
1290
1291 TEST(F32_GEMM_1X4__WASM, k_eq_1_subtile_m) {
1292 for (uint32_t m = 1; m <= 1; m++) {
1293 GemmMicrokernelTester()
1294 .mr(1)
1295 .nr(4)
1296 .kr(1)
1297 .sr(1)
1298 .m(m)
1299 .n(4)
1300 .k(1)
1301 .iterations(1)
1302 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1303 }
1304 }
1305
1306 TEST(F32_GEMM_1X4__WASM, k_eq_1_subtile_n) {
1307 for (uint32_t n = 1; n <= 4; n++) {
1308 GemmMicrokernelTester()
1309 .mr(1)
1310 .nr(4)
1311 .kr(1)
1312 .sr(1)
1313 .m(1)
1314 .n(n)
1315 .k(1)
1316 .iterations(1)
1317 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1318 }
1319 }
1320
1321 TEST(F32_GEMM_1X4__WASM, k_gt_1) {
1322 for (size_t k = 2; k < 10; k++) {
1323 GemmMicrokernelTester()
1324 .mr(1)
1325 .nr(4)
1326 .kr(1)
1327 .sr(1)
1328 .m(1)
1329 .n(4)
1330 .k(k)
1331 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1332 }
1333 }
1334
1335 TEST(F32_GEMM_1X4__WASM, k_gt_1_strided_a) {
1336 for (size_t k = 2; k < 10; k++) {
1337 GemmMicrokernelTester()
1338 .mr(1)
1339 .nr(4)
1340 .kr(1)
1341 .sr(1)
1342 .m(1)
1343 .n(4)
1344 .k(k)
1345 .a_stride(11)
1346 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1347 }
1348 }
1349
1350 TEST(F32_GEMM_1X4__WASM, k_gt_1_subtile) {
1351 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001352 for (uint32_t n = 1; n <= 4; n++) {
1353 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001354 GemmMicrokernelTester()
1355 .mr(1)
1356 .nr(4)
1357 .kr(1)
1358 .sr(1)
1359 .m(m)
1360 .n(n)
1361 .k(k)
1362 .iterations(1)
1363 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1364 }
1365 }
1366 }
1367 }
1368
1369 TEST(F32_GEMM_1X4__WASM, n_gt_4) {
1370 for (uint32_t n = 5; n < 8; n++) {
1371 for (size_t k = 1; k <= 5; k += 2) {
1372 GemmMicrokernelTester()
1373 .mr(1)
1374 .nr(4)
1375 .kr(1)
1376 .sr(1)
1377 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001378 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001379 .k(k)
1380 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1381 }
1382 }
1383 }
1384
1385 TEST(F32_GEMM_1X4__WASM, n_gt_4_strided_cn) {
1386 for (uint32_t n = 5; n < 8; n++) {
1387 for (size_t k = 1; k <= 5; k += 2) {
1388 GemmMicrokernelTester()
1389 .mr(1)
1390 .nr(4)
1391 .kr(1)
1392 .sr(1)
1393 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001394 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001395 .k(k)
1396 .cn_stride(7)
1397 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1398 }
1399 }
1400 }
1401
1402 TEST(F32_GEMM_1X4__WASM, n_gt_4_strided_a) {
1403 for (uint32_t n = 5; n < 8; n++) {
1404 for (size_t k = 1; k <= 5; k += 2) {
1405 GemmMicrokernelTester()
1406 .mr(1)
1407 .nr(4)
1408 .kr(1)
1409 .sr(1)
1410 .m(1)
1411 .n(n)
1412 .k(k)
1413 .a_stride(7)
1414 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1415 }
1416 }
1417 }
1418
1419 TEST(F32_GEMM_1X4__WASM, n_gt_4_subtile) {
1420 for (uint32_t n = 5; n < 8; n++) {
1421 for (size_t k = 1; k <= 5; k += 2) {
1422 for (uint32_t m = 1; m <= 1; m++) {
1423 GemmMicrokernelTester()
1424 .mr(1)
1425 .nr(4)
1426 .kr(1)
1427 .sr(1)
1428 .m(m)
1429 .n(n)
1430 .k(k)
1431 .iterations(1)
1432 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1433 }
1434 }
1435 }
1436 }
1437
1438 TEST(F32_GEMM_1X4__WASM, n_div_4) {
1439 for (uint32_t n = 8; n <= 12; n += 4) {
1440 for (size_t k = 1; k <= 5; k += 2) {
1441 GemmMicrokernelTester()
1442 .mr(1)
1443 .nr(4)
1444 .kr(1)
1445 .sr(1)
1446 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001447 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001448 .k(k)
1449 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1450 }
1451 }
1452 }
1453
1454 TEST(F32_GEMM_1X4__WASM, n_div_4_strided_cn) {
1455 for (uint32_t n = 8; n <= 12; n += 4) {
1456 for (size_t k = 1; k <= 5; k += 2) {
1457 GemmMicrokernelTester()
1458 .mr(1)
1459 .nr(4)
1460 .kr(1)
1461 .sr(1)
1462 .m(1)
1463 .n(n)
1464 .k(k)
1465 .cn_stride(7)
1466 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1467 }
1468 }
1469 }
1470
1471 TEST(F32_GEMM_1X4__WASM, n_div_4_strided_a) {
1472 for (uint32_t n = 8; n <= 12; n += 4) {
1473 for (size_t k = 1; k <= 5; k += 2) {
1474 GemmMicrokernelTester()
1475 .mr(1)
1476 .nr(4)
1477 .kr(1)
1478 .sr(1)
1479 .m(1)
1480 .n(n)
1481 .k(k)
1482 .a_stride(7)
1483 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1484 }
1485 }
1486 }
1487
1488 TEST(F32_GEMM_1X4__WASM, n_div_4_subtile) {
1489 for (uint32_t n = 8; n <= 12; n += 4) {
1490 for (size_t k = 1; k <= 5; k += 2) {
1491 for (uint32_t m = 1; m <= 1; m++) {
1492 GemmMicrokernelTester()
1493 .mr(1)
1494 .nr(4)
1495 .kr(1)
1496 .sr(1)
1497 .m(m)
1498 .n(n)
1499 .k(k)
1500 .iterations(1)
1501 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1502 }
1503 }
1504 }
1505 }
1506
1507 TEST(F32_GEMM_1X4__WASM, strided_cm_subtile) {
1508 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001509 for (uint32_t n = 1; n <= 4; n++) {
1510 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001511 GemmMicrokernelTester()
1512 .mr(1)
1513 .nr(4)
1514 .kr(1)
1515 .sr(1)
1516 .m(m)
1517 .n(n)
1518 .k(k)
1519 .cm_stride(7)
1520 .iterations(1)
1521 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1522 }
1523 }
1524 }
1525 }
1526
1527 TEST(F32_GEMM_1X4__WASM, strided_cm) {
1528 GemmMicrokernelTester()
1529 .mr(1)
1530 .nr(4)
1531 .kr(1)
1532 .sr(1)
1533 .m(1)
1534 .n(4)
1535 .k(1)
1536 .cm_stride(7)
1537 .Test(xnn_f32_gemm_ukernel_1x4__wasm);
1538 }
1539#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1540
1541
1542TEST(F32_GEMM_1X4__SCALAR, k_eq_1) {
1543 GemmMicrokernelTester()
1544 .mr(1)
1545 .nr(4)
1546 .kr(1)
1547 .sr(1)
1548 .m(1)
1549 .n(4)
1550 .k(1)
1551 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1552}
1553
1554TEST(F32_GEMM_1X4__SCALAR, strided_cn) {
1555 GemmMicrokernelTester()
1556 .mr(1)
1557 .nr(4)
1558 .kr(1)
1559 .sr(1)
1560 .m(1)
1561 .n(4)
1562 .k(1)
1563 .cn_stride(7)
1564 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1565}
1566
1567TEST(F32_GEMM_1X4__SCALAR, k_eq_1_strided_a) {
1568 GemmMicrokernelTester()
1569 .mr(1)
1570 .nr(4)
1571 .kr(1)
1572 .sr(1)
1573 .m(1)
1574 .n(4)
1575 .k(1)
1576 .a_stride(3)
1577 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1578}
1579
1580TEST(F32_GEMM_1X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001581 for (uint32_t n = 1; n <= 4; n++) {
1582 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001583 GemmMicrokernelTester()
1584 .mr(1)
1585 .nr(4)
1586 .kr(1)
1587 .sr(1)
1588 .m(m)
1589 .n(n)
1590 .k(1)
1591 .iterations(1)
1592 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1593 }
1594 }
1595}
1596
1597TEST(F32_GEMM_1X4__SCALAR, k_eq_1_subtile_m) {
1598 for (uint32_t m = 1; m <= 1; m++) {
1599 GemmMicrokernelTester()
1600 .mr(1)
1601 .nr(4)
1602 .kr(1)
1603 .sr(1)
1604 .m(m)
1605 .n(4)
1606 .k(1)
1607 .iterations(1)
1608 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1609 }
1610}
1611
1612TEST(F32_GEMM_1X4__SCALAR, k_eq_1_subtile_n) {
1613 for (uint32_t n = 1; n <= 4; n++) {
1614 GemmMicrokernelTester()
1615 .mr(1)
1616 .nr(4)
1617 .kr(1)
1618 .sr(1)
1619 .m(1)
1620 .n(n)
1621 .k(1)
1622 .iterations(1)
1623 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1624 }
1625}
1626
1627TEST(F32_GEMM_1X4__SCALAR, k_gt_1) {
1628 for (size_t k = 2; k < 10; k++) {
1629 GemmMicrokernelTester()
1630 .mr(1)
1631 .nr(4)
1632 .kr(1)
1633 .sr(1)
1634 .m(1)
1635 .n(4)
1636 .k(k)
1637 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1638 }
1639}
1640
1641TEST(F32_GEMM_1X4__SCALAR, k_gt_1_strided_a) {
1642 for (size_t k = 2; k < 10; k++) {
1643 GemmMicrokernelTester()
1644 .mr(1)
1645 .nr(4)
1646 .kr(1)
1647 .sr(1)
1648 .m(1)
1649 .n(4)
1650 .k(k)
1651 .a_stride(11)
1652 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1653 }
1654}
1655
1656TEST(F32_GEMM_1X4__SCALAR, k_gt_1_subtile) {
1657 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001658 for (uint32_t n = 1; n <= 4; n++) {
1659 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001660 GemmMicrokernelTester()
1661 .mr(1)
1662 .nr(4)
1663 .kr(1)
1664 .sr(1)
1665 .m(m)
1666 .n(n)
1667 .k(k)
1668 .iterations(1)
1669 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1670 }
1671 }
1672 }
1673}
1674
1675TEST(F32_GEMM_1X4__SCALAR, n_gt_4) {
1676 for (uint32_t n = 5; n < 8; n++) {
1677 for (size_t k = 1; k <= 5; k += 2) {
1678 GemmMicrokernelTester()
1679 .mr(1)
1680 .nr(4)
1681 .kr(1)
1682 .sr(1)
1683 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001684 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001685 .k(k)
1686 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1687 }
1688 }
1689}
1690
1691TEST(F32_GEMM_1X4__SCALAR, n_gt_4_strided_cn) {
1692 for (uint32_t n = 5; n < 8; n++) {
1693 for (size_t k = 1; k <= 5; k += 2) {
1694 GemmMicrokernelTester()
1695 .mr(1)
1696 .nr(4)
1697 .kr(1)
1698 .sr(1)
1699 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001700 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001701 .k(k)
1702 .cn_stride(7)
1703 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1704 }
1705 }
1706}
1707
1708TEST(F32_GEMM_1X4__SCALAR, n_gt_4_strided_a) {
1709 for (uint32_t n = 5; n < 8; n++) {
1710 for (size_t k = 1; k <= 5; k += 2) {
1711 GemmMicrokernelTester()
1712 .mr(1)
1713 .nr(4)
1714 .kr(1)
1715 .sr(1)
1716 .m(1)
1717 .n(n)
1718 .k(k)
1719 .a_stride(7)
1720 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1721 }
1722 }
1723}
1724
1725TEST(F32_GEMM_1X4__SCALAR, n_gt_4_subtile) {
1726 for (uint32_t n = 5; n < 8; n++) {
1727 for (size_t k = 1; k <= 5; k += 2) {
1728 for (uint32_t m = 1; m <= 1; m++) {
1729 GemmMicrokernelTester()
1730 .mr(1)
1731 .nr(4)
1732 .kr(1)
1733 .sr(1)
1734 .m(m)
1735 .n(n)
1736 .k(k)
1737 .iterations(1)
1738 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1739 }
1740 }
1741 }
1742}
1743
1744TEST(F32_GEMM_1X4__SCALAR, n_div_4) {
1745 for (uint32_t n = 8; n <= 12; n += 4) {
1746 for (size_t k = 1; k <= 5; k += 2) {
1747 GemmMicrokernelTester()
1748 .mr(1)
1749 .nr(4)
1750 .kr(1)
1751 .sr(1)
1752 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001753 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001754 .k(k)
1755 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1756 }
1757 }
1758}
1759
1760TEST(F32_GEMM_1X4__SCALAR, n_div_4_strided_cn) {
1761 for (uint32_t n = 8; n <= 12; n += 4) {
1762 for (size_t k = 1; k <= 5; k += 2) {
1763 GemmMicrokernelTester()
1764 .mr(1)
1765 .nr(4)
1766 .kr(1)
1767 .sr(1)
1768 .m(1)
1769 .n(n)
1770 .k(k)
1771 .cn_stride(7)
1772 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1773 }
1774 }
1775}
1776
1777TEST(F32_GEMM_1X4__SCALAR, n_div_4_strided_a) {
1778 for (uint32_t n = 8; n <= 12; n += 4) {
1779 for (size_t k = 1; k <= 5; k += 2) {
1780 GemmMicrokernelTester()
1781 .mr(1)
1782 .nr(4)
1783 .kr(1)
1784 .sr(1)
1785 .m(1)
1786 .n(n)
1787 .k(k)
1788 .a_stride(7)
1789 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1790 }
1791 }
1792}
1793
1794TEST(F32_GEMM_1X4__SCALAR, n_div_4_subtile) {
1795 for (uint32_t n = 8; n <= 12; n += 4) {
1796 for (size_t k = 1; k <= 5; k += 2) {
1797 for (uint32_t m = 1; m <= 1; m++) {
1798 GemmMicrokernelTester()
1799 .mr(1)
1800 .nr(4)
1801 .kr(1)
1802 .sr(1)
1803 .m(m)
1804 .n(n)
1805 .k(k)
1806 .iterations(1)
1807 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1808 }
1809 }
1810 }
1811}
1812
1813TEST(F32_GEMM_1X4__SCALAR, strided_cm_subtile) {
1814 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001815 for (uint32_t n = 1; n <= 4; n++) {
1816 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001817 GemmMicrokernelTester()
1818 .mr(1)
1819 .nr(4)
1820 .kr(1)
1821 .sr(1)
1822 .m(m)
1823 .n(n)
1824 .k(k)
1825 .cm_stride(7)
1826 .iterations(1)
1827 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1828 }
1829 }
1830 }
1831}
1832
1833TEST(F32_GEMM_1X4__SCALAR, strided_cm) {
1834 GemmMicrokernelTester()
1835 .mr(1)
1836 .nr(4)
1837 .kr(1)
1838 .sr(1)
1839 .m(1)
1840 .n(4)
1841 .k(1)
1842 .cm_stride(7)
1843 .Test(xnn_f32_gemm_ukernel_1x4__scalar);
1844}
1845
1846
1847TEST(F32_GEMM_4X4__SCALAR, k_eq_1) {
1848 GemmMicrokernelTester()
1849 .mr(4)
1850 .nr(4)
1851 .kr(1)
1852 .sr(1)
1853 .m(4)
1854 .n(4)
1855 .k(1)
1856 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1857}
1858
1859TEST(F32_GEMM_4X4__SCALAR, strided_cn) {
1860 GemmMicrokernelTester()
1861 .mr(4)
1862 .nr(4)
1863 .kr(1)
1864 .sr(1)
1865 .m(4)
1866 .n(4)
1867 .k(1)
1868 .cn_stride(7)
1869 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1870}
1871
1872TEST(F32_GEMM_4X4__SCALAR, k_eq_1_strided_a) {
1873 GemmMicrokernelTester()
1874 .mr(4)
1875 .nr(4)
1876 .kr(1)
1877 .sr(1)
1878 .m(4)
1879 .n(4)
1880 .k(1)
1881 .a_stride(3)
1882 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1883}
1884
1885TEST(F32_GEMM_4X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001886 for (uint32_t n = 1; n <= 4; n++) {
1887 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001888 GemmMicrokernelTester()
1889 .mr(4)
1890 .nr(4)
1891 .kr(1)
1892 .sr(1)
1893 .m(m)
1894 .n(n)
1895 .k(1)
1896 .iterations(1)
1897 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1898 }
1899 }
1900}
1901
1902TEST(F32_GEMM_4X4__SCALAR, k_eq_1_subtile_m) {
1903 for (uint32_t m = 1; m <= 4; m++) {
1904 GemmMicrokernelTester()
1905 .mr(4)
1906 .nr(4)
1907 .kr(1)
1908 .sr(1)
1909 .m(m)
1910 .n(4)
1911 .k(1)
1912 .iterations(1)
1913 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1914 }
1915}
1916
1917TEST(F32_GEMM_4X4__SCALAR, k_eq_1_subtile_n) {
1918 for (uint32_t n = 1; n <= 4; n++) {
1919 GemmMicrokernelTester()
1920 .mr(4)
1921 .nr(4)
1922 .kr(1)
1923 .sr(1)
1924 .m(4)
1925 .n(n)
1926 .k(1)
1927 .iterations(1)
1928 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1929 }
1930}
1931
1932TEST(F32_GEMM_4X4__SCALAR, k_gt_1) {
1933 for (size_t k = 2; k < 10; k++) {
1934 GemmMicrokernelTester()
1935 .mr(4)
1936 .nr(4)
1937 .kr(1)
1938 .sr(1)
1939 .m(4)
1940 .n(4)
1941 .k(k)
1942 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1943 }
1944}
1945
1946TEST(F32_GEMM_4X4__SCALAR, k_gt_1_strided_a) {
1947 for (size_t k = 2; k < 10; k++) {
1948 GemmMicrokernelTester()
1949 .mr(4)
1950 .nr(4)
1951 .kr(1)
1952 .sr(1)
1953 .m(4)
1954 .n(4)
1955 .k(k)
1956 .a_stride(11)
1957 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1958 }
1959}
1960
1961TEST(F32_GEMM_4X4__SCALAR, k_gt_1_subtile) {
1962 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001963 for (uint32_t n = 1; n <= 4; n++) {
1964 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001965 GemmMicrokernelTester()
1966 .mr(4)
1967 .nr(4)
1968 .kr(1)
1969 .sr(1)
1970 .m(m)
1971 .n(n)
1972 .k(k)
1973 .iterations(1)
1974 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1975 }
1976 }
1977 }
1978}
1979
1980TEST(F32_GEMM_4X4__SCALAR, n_gt_4) {
1981 for (uint32_t n = 5; n < 8; n++) {
1982 for (size_t k = 1; k <= 5; k += 2) {
1983 GemmMicrokernelTester()
1984 .mr(4)
1985 .nr(4)
1986 .kr(1)
1987 .sr(1)
1988 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001989 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001990 .k(k)
1991 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
1992 }
1993 }
1994}
1995
1996TEST(F32_GEMM_4X4__SCALAR, n_gt_4_strided_cn) {
1997 for (uint32_t n = 5; n < 8; n++) {
1998 for (size_t k = 1; k <= 5; k += 2) {
1999 GemmMicrokernelTester()
2000 .mr(4)
2001 .nr(4)
2002 .kr(1)
2003 .sr(1)
2004 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002005 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002006 .k(k)
2007 .cn_stride(7)
2008 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2009 }
2010 }
2011}
2012
2013TEST(F32_GEMM_4X4__SCALAR, n_gt_4_strided_a) {
2014 for (uint32_t n = 5; n < 8; n++) {
2015 for (size_t k = 1; k <= 5; k += 2) {
2016 GemmMicrokernelTester()
2017 .mr(4)
2018 .nr(4)
2019 .kr(1)
2020 .sr(1)
2021 .m(4)
2022 .n(n)
2023 .k(k)
2024 .a_stride(7)
2025 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2026 }
2027 }
2028}
2029
2030TEST(F32_GEMM_4X4__SCALAR, n_gt_4_subtile) {
2031 for (uint32_t n = 5; n < 8; n++) {
2032 for (size_t k = 1; k <= 5; k += 2) {
2033 for (uint32_t m = 1; m <= 4; m++) {
2034 GemmMicrokernelTester()
2035 .mr(4)
2036 .nr(4)
2037 .kr(1)
2038 .sr(1)
2039 .m(m)
2040 .n(n)
2041 .k(k)
2042 .iterations(1)
2043 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2044 }
2045 }
2046 }
2047}
2048
2049TEST(F32_GEMM_4X4__SCALAR, n_div_4) {
2050 for (uint32_t n = 8; n <= 12; n += 4) {
2051 for (size_t k = 1; k <= 5; k += 2) {
2052 GemmMicrokernelTester()
2053 .mr(4)
2054 .nr(4)
2055 .kr(1)
2056 .sr(1)
2057 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002058 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002059 .k(k)
2060 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2061 }
2062 }
2063}
2064
2065TEST(F32_GEMM_4X4__SCALAR, n_div_4_strided_cn) {
2066 for (uint32_t n = 8; n <= 12; n += 4) {
2067 for (size_t k = 1; k <= 5; k += 2) {
2068 GemmMicrokernelTester()
2069 .mr(4)
2070 .nr(4)
2071 .kr(1)
2072 .sr(1)
2073 .m(4)
2074 .n(n)
2075 .k(k)
2076 .cn_stride(7)
2077 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2078 }
2079 }
2080}
2081
2082TEST(F32_GEMM_4X4__SCALAR, n_div_4_strided_a) {
2083 for (uint32_t n = 8; n <= 12; n += 4) {
2084 for (size_t k = 1; k <= 5; k += 2) {
2085 GemmMicrokernelTester()
2086 .mr(4)
2087 .nr(4)
2088 .kr(1)
2089 .sr(1)
2090 .m(4)
2091 .n(n)
2092 .k(k)
2093 .a_stride(7)
2094 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2095 }
2096 }
2097}
2098
2099TEST(F32_GEMM_4X4__SCALAR, n_div_4_subtile) {
2100 for (uint32_t n = 8; n <= 12; n += 4) {
2101 for (size_t k = 1; k <= 5; k += 2) {
2102 for (uint32_t m = 1; m <= 4; m++) {
2103 GemmMicrokernelTester()
2104 .mr(4)
2105 .nr(4)
2106 .kr(1)
2107 .sr(1)
2108 .m(m)
2109 .n(n)
2110 .k(k)
2111 .iterations(1)
2112 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2113 }
2114 }
2115 }
2116}
2117
2118TEST(F32_GEMM_4X4__SCALAR, strided_cm_subtile) {
2119 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002120 for (uint32_t n = 1; n <= 4; n++) {
2121 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002122 GemmMicrokernelTester()
2123 .mr(4)
2124 .nr(4)
2125 .kr(1)
2126 .sr(1)
2127 .m(m)
2128 .n(n)
2129 .k(k)
2130 .cm_stride(7)
2131 .iterations(1)
2132 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2133 }
2134 }
2135 }
2136}
2137
2138TEST(F32_GEMM_4X4__SCALAR, strided_cm) {
2139 GemmMicrokernelTester()
2140 .mr(4)
2141 .nr(4)
2142 .kr(1)
2143 .sr(1)
2144 .m(4)
2145 .n(4)
2146 .k(1)
2147 .cm_stride(7)
2148 .Test(xnn_f32_gemm_ukernel_4x4__scalar);
2149}
2150
2151
2152TEST(F32_GEMM_4X2__SCALAR, k_eq_1) {
2153 GemmMicrokernelTester()
2154 .mr(4)
2155 .nr(2)
2156 .kr(1)
2157 .sr(1)
2158 .m(4)
2159 .n(2)
2160 .k(1)
2161 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2162}
2163
2164TEST(F32_GEMM_4X2__SCALAR, strided_cn) {
2165 GemmMicrokernelTester()
2166 .mr(4)
2167 .nr(2)
2168 .kr(1)
2169 .sr(1)
2170 .m(4)
2171 .n(2)
2172 .k(1)
2173 .cn_stride(5)
2174 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2175}
2176
2177TEST(F32_GEMM_4X2__SCALAR, k_eq_1_strided_a) {
2178 GemmMicrokernelTester()
2179 .mr(4)
2180 .nr(2)
2181 .kr(1)
2182 .sr(1)
2183 .m(4)
2184 .n(2)
2185 .k(1)
2186 .a_stride(3)
2187 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2188}
2189
2190TEST(F32_GEMM_4X2__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002191 for (uint32_t n = 1; n <= 2; n++) {
2192 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002193 GemmMicrokernelTester()
2194 .mr(4)
2195 .nr(2)
2196 .kr(1)
2197 .sr(1)
2198 .m(m)
2199 .n(n)
2200 .k(1)
2201 .iterations(1)
2202 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2203 }
2204 }
2205}
2206
2207TEST(F32_GEMM_4X2__SCALAR, k_eq_1_subtile_m) {
2208 for (uint32_t m = 1; m <= 4; m++) {
2209 GemmMicrokernelTester()
2210 .mr(4)
2211 .nr(2)
2212 .kr(1)
2213 .sr(1)
2214 .m(m)
2215 .n(2)
2216 .k(1)
2217 .iterations(1)
2218 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2219 }
2220}
2221
2222TEST(F32_GEMM_4X2__SCALAR, k_eq_1_subtile_n) {
2223 for (uint32_t n = 1; n <= 2; n++) {
2224 GemmMicrokernelTester()
2225 .mr(4)
2226 .nr(2)
2227 .kr(1)
2228 .sr(1)
2229 .m(4)
2230 .n(n)
2231 .k(1)
2232 .iterations(1)
2233 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2234 }
2235}
2236
2237TEST(F32_GEMM_4X2__SCALAR, k_gt_1) {
2238 for (size_t k = 2; k < 10; k++) {
2239 GemmMicrokernelTester()
2240 .mr(4)
2241 .nr(2)
2242 .kr(1)
2243 .sr(1)
2244 .m(4)
2245 .n(2)
2246 .k(k)
2247 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2248 }
2249}
2250
2251TEST(F32_GEMM_4X2__SCALAR, k_gt_1_strided_a) {
2252 for (size_t k = 2; k < 10; k++) {
2253 GemmMicrokernelTester()
2254 .mr(4)
2255 .nr(2)
2256 .kr(1)
2257 .sr(1)
2258 .m(4)
2259 .n(2)
2260 .k(k)
2261 .a_stride(11)
2262 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2263 }
2264}
2265
2266TEST(F32_GEMM_4X2__SCALAR, k_gt_1_subtile) {
2267 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002268 for (uint32_t n = 1; n <= 2; n++) {
2269 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002270 GemmMicrokernelTester()
2271 .mr(4)
2272 .nr(2)
2273 .kr(1)
2274 .sr(1)
2275 .m(m)
2276 .n(n)
2277 .k(k)
2278 .iterations(1)
2279 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2280 }
2281 }
2282 }
2283}
2284
2285TEST(F32_GEMM_4X2__SCALAR, n_gt_2) {
2286 for (uint32_t n = 3; n < 4; n++) {
2287 for (size_t k = 1; k <= 5; k += 2) {
2288 GemmMicrokernelTester()
2289 .mr(4)
2290 .nr(2)
2291 .kr(1)
2292 .sr(1)
2293 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002294 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002295 .k(k)
2296 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2297 }
2298 }
2299}
2300
2301TEST(F32_GEMM_4X2__SCALAR, n_gt_2_strided_cn) {
2302 for (uint32_t n = 3; n < 4; n++) {
2303 for (size_t k = 1; k <= 5; k += 2) {
2304 GemmMicrokernelTester()
2305 .mr(4)
2306 .nr(2)
2307 .kr(1)
2308 .sr(1)
2309 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002310 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002311 .k(k)
2312 .cn_stride(5)
2313 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2314 }
2315 }
2316}
2317
2318TEST(F32_GEMM_4X2__SCALAR, n_gt_2_strided_a) {
2319 for (uint32_t n = 3; n < 4; n++) {
2320 for (size_t k = 1; k <= 5; k += 2) {
2321 GemmMicrokernelTester()
2322 .mr(4)
2323 .nr(2)
2324 .kr(1)
2325 .sr(1)
2326 .m(4)
2327 .n(n)
2328 .k(k)
2329 .a_stride(7)
2330 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2331 }
2332 }
2333}
2334
2335TEST(F32_GEMM_4X2__SCALAR, n_gt_2_subtile) {
2336 for (uint32_t n = 3; n < 4; n++) {
2337 for (size_t k = 1; k <= 5; k += 2) {
2338 for (uint32_t m = 1; m <= 4; m++) {
2339 GemmMicrokernelTester()
2340 .mr(4)
2341 .nr(2)
2342 .kr(1)
2343 .sr(1)
2344 .m(m)
2345 .n(n)
2346 .k(k)
2347 .iterations(1)
2348 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2349 }
2350 }
2351 }
2352}
2353
2354TEST(F32_GEMM_4X2__SCALAR, n_div_2) {
2355 for (uint32_t n = 4; n <= 6; n += 2) {
2356 for (size_t k = 1; k <= 5; k += 2) {
2357 GemmMicrokernelTester()
2358 .mr(4)
2359 .nr(2)
2360 .kr(1)
2361 .sr(1)
2362 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002363 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002364 .k(k)
2365 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2366 }
2367 }
2368}
2369
2370TEST(F32_GEMM_4X2__SCALAR, n_div_2_strided_cn) {
2371 for (uint32_t n = 4; n <= 6; n += 2) {
2372 for (size_t k = 1; k <= 5; k += 2) {
2373 GemmMicrokernelTester()
2374 .mr(4)
2375 .nr(2)
2376 .kr(1)
2377 .sr(1)
2378 .m(4)
2379 .n(n)
2380 .k(k)
2381 .cn_stride(5)
2382 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2383 }
2384 }
2385}
2386
2387TEST(F32_GEMM_4X2__SCALAR, n_div_2_strided_a) {
2388 for (uint32_t n = 4; n <= 6; n += 2) {
2389 for (size_t k = 1; k <= 5; k += 2) {
2390 GemmMicrokernelTester()
2391 .mr(4)
2392 .nr(2)
2393 .kr(1)
2394 .sr(1)
2395 .m(4)
2396 .n(n)
2397 .k(k)
2398 .a_stride(7)
2399 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2400 }
2401 }
2402}
2403
2404TEST(F32_GEMM_4X2__SCALAR, n_div_2_subtile) {
2405 for (uint32_t n = 4; n <= 6; n += 2) {
2406 for (size_t k = 1; k <= 5; k += 2) {
2407 for (uint32_t m = 1; m <= 4; m++) {
2408 GemmMicrokernelTester()
2409 .mr(4)
2410 .nr(2)
2411 .kr(1)
2412 .sr(1)
2413 .m(m)
2414 .n(n)
2415 .k(k)
2416 .iterations(1)
2417 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2418 }
2419 }
2420 }
2421}
2422
2423TEST(F32_GEMM_4X2__SCALAR, strided_cm_subtile) {
2424 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002425 for (uint32_t n = 1; n <= 2; n++) {
2426 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002427 GemmMicrokernelTester()
2428 .mr(4)
2429 .nr(2)
2430 .kr(1)
2431 .sr(1)
2432 .m(m)
2433 .n(n)
2434 .k(k)
2435 .cm_stride(5)
2436 .iterations(1)
2437 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2438 }
2439 }
2440 }
2441}
2442
2443TEST(F32_GEMM_4X2__SCALAR, strided_cm) {
2444 GemmMicrokernelTester()
2445 .mr(4)
2446 .nr(2)
2447 .kr(1)
2448 .sr(1)
2449 .m(4)
2450 .n(2)
2451 .k(1)
2452 .cm_stride(5)
2453 .Test(xnn_f32_gemm_ukernel_4x2__scalar);
2454}