blob: 53fe350a58acb7723a89802185c9c803aea96117 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-gemm-relu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4) {
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(2)
31 .kr(4)
32 .sr(1)
33 .m(4)
34 .n(2)
35 .k(4)
36 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
37 }
38
39 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cn) {
40 GemmMicrokernelTester()
41 .mr(4)
42 .nr(2)
43 .kr(4)
44 .sr(1)
45 .m(4)
46 .n(2)
47 .k(4)
48 .cn_stride(5)
49 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
50 }
51
52 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_strided_a) {
53 GemmMicrokernelTester()
54 .mr(4)
55 .nr(2)
56 .kr(4)
57 .sr(1)
58 .m(4)
59 .n(2)
60 .k(4)
61 .a_stride(7)
62 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
63 }
64
65 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080066 for (uint32_t n = 1; n <= 2; n++) {
67 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080068 GemmMicrokernelTester()
69 .mr(4)
70 .nr(2)
71 .kr(4)
72 .sr(1)
73 .m(m)
74 .n(n)
75 .k(4)
76 .iterations(1)
77 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
78 }
79 }
80 }
81
82 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
83 for (uint32_t m = 1; m <= 4; m++) {
84 GemmMicrokernelTester()
85 .mr(4)
86 .nr(2)
87 .kr(4)
88 .sr(1)
89 .m(m)
90 .n(2)
91 .k(4)
92 .iterations(1)
93 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
94 }
95 }
96
97 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
98 for (uint32_t n = 1; n <= 2; n++) {
99 GemmMicrokernelTester()
100 .mr(4)
101 .nr(2)
102 .kr(4)
103 .sr(1)
104 .m(4)
105 .n(n)
106 .k(4)
107 .iterations(1)
108 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
109 }
110 }
111
112 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4) {
113 for (size_t k = 1; k < 4; k++) {
114 GemmMicrokernelTester()
115 .mr(4)
116 .nr(2)
117 .kr(4)
118 .sr(1)
119 .m(4)
120 .n(2)
121 .k(k)
122 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
123 }
124 }
125
126 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4_strided_a) {
127 for (size_t k = 1; k < 4; k++) {
128 GemmMicrokernelTester()
129 .mr(4)
130 .nr(2)
131 .kr(4)
132 .sr(1)
133 .m(4)
134 .n(2)
135 .k(k)
136 .a_stride(7)
137 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
138 }
139 }
140
141 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4_subtile) {
142 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800143 for (uint32_t n = 1; n <= 2; n++) {
144 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800145 GemmMicrokernelTester()
146 .mr(4)
147 .nr(2)
148 .kr(4)
149 .sr(1)
150 .m(m)
151 .n(n)
152 .k(k)
153 .iterations(1)
154 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
155 }
156 }
157 }
158 }
159
160 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4) {
161 for (size_t k = 5; k < 8; k++) {
162 GemmMicrokernelTester()
163 .mr(4)
164 .nr(2)
165 .kr(4)
166 .sr(1)
167 .m(4)
168 .n(2)
169 .k(k)
170 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
171 }
172 }
173
174 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4_strided_a) {
175 for (size_t k = 5; k < 8; k++) {
176 GemmMicrokernelTester()
177 .mr(4)
178 .nr(2)
179 .kr(4)
180 .sr(1)
181 .m(4)
182 .n(2)
183 .k(k)
184 .a_stride(11)
185 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
186 }
187 }
188
189 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4_subtile) {
190 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800191 for (uint32_t n = 1; n <= 2; n++) {
192 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800193 GemmMicrokernelTester()
194 .mr(4)
195 .nr(2)
196 .kr(4)
197 .sr(1)
198 .m(m)
199 .n(n)
200 .k(k)
201 .iterations(1)
202 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
203 }
204 }
205 }
206 }
207
208 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4) {
209 for (size_t k = 8; k <= 40; k += 4) {
210 GemmMicrokernelTester()
211 .mr(4)
212 .nr(2)
213 .kr(4)
214 .sr(1)
215 .m(4)
216 .n(2)
217 .k(k)
218 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
219 }
220 }
221
222 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4_strided_a) {
223 for (size_t k = 8; k <= 40; k += 4) {
224 GemmMicrokernelTester()
225 .mr(4)
226 .nr(2)
227 .kr(4)
228 .sr(1)
229 .m(4)
230 .n(2)
231 .k(k)
232 .a_stride(43)
233 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
234 }
235 }
236
237 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4_subtile) {
238 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800239 for (uint32_t n = 1; n <= 2; n++) {
240 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800241 GemmMicrokernelTester()
242 .mr(4)
243 .nr(2)
244 .kr(4)
245 .sr(1)
246 .m(m)
247 .n(n)
248 .k(k)
249 .iterations(1)
250 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
251 }
252 }
253 }
254 }
255
256 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2) {
257 for (uint32_t n = 3; n < 4; n++) {
258 for (size_t k = 1; k <= 20; k += 5) {
259 GemmMicrokernelTester()
260 .mr(4)
261 .nr(2)
262 .kr(4)
263 .sr(1)
264 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800265 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800266 .k(k)
267 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
268 }
269 }
270 }
271
272 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
273 for (uint32_t n = 3; n < 4; n++) {
274 for (size_t k = 1; k <= 20; k += 5) {
275 GemmMicrokernelTester()
276 .mr(4)
277 .nr(2)
278 .kr(4)
279 .sr(1)
280 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800282 .k(k)
283 .cn_stride(5)
284 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
285 }
286 }
287 }
288
289 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_a) {
290 for (uint32_t n = 3; n < 4; n++) {
291 for (size_t k = 1; k <= 20; k += 5) {
292 GemmMicrokernelTester()
293 .mr(4)
294 .nr(2)
295 .kr(4)
296 .sr(1)
297 .m(4)
298 .n(n)
299 .k(k)
300 .a_stride(23)
301 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
302 }
303 }
304 }
305
306 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_subtile) {
307 for (uint32_t n = 3; n < 4; n++) {
308 for (size_t k = 1; k <= 20; k += 5) {
309 for (uint32_t m = 1; m <= 4; m++) {
310 GemmMicrokernelTester()
311 .mr(4)
312 .nr(2)
313 .kr(4)
314 .sr(1)
315 .m(m)
316 .n(n)
317 .k(k)
318 .iterations(1)
319 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
320 }
321 }
322 }
323 }
324
325 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2) {
326 for (uint32_t n = 4; n <= 6; n += 2) {
327 for (size_t k = 1; k <= 20; k += 5) {
328 GemmMicrokernelTester()
329 .mr(4)
330 .nr(2)
331 .kr(4)
332 .sr(1)
333 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800334 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800335 .k(k)
336 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
337 }
338 }
339 }
340
341 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_cn) {
342 for (uint32_t n = 4; n <= 6; n += 2) {
343 for (size_t k = 1; k <= 20; k += 5) {
344 GemmMicrokernelTester()
345 .mr(4)
346 .nr(2)
347 .kr(4)
348 .sr(1)
349 .m(4)
350 .n(n)
351 .k(k)
352 .cn_stride(5)
353 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
354 }
355 }
356 }
357
358 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_a) {
359 for (uint32_t n = 4; n <= 6; n += 2) {
360 for (size_t k = 1; k <= 20; k += 5) {
361 GemmMicrokernelTester()
362 .mr(4)
363 .nr(2)
364 .kr(4)
365 .sr(1)
366 .m(4)
367 .n(n)
368 .k(k)
369 .a_stride(23)
370 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
371 }
372 }
373 }
374
375 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_subtile) {
376 for (uint32_t n = 4; n <= 6; n += 2) {
377 for (size_t k = 1; k <= 20; k += 5) {
378 for (uint32_t m = 1; m <= 4; m++) {
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(2)
382 .kr(4)
383 .sr(1)
384 .m(m)
385 .n(n)
386 .k(k)
387 .iterations(1)
388 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
389 }
390 }
391 }
392 }
393
394 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cm_subtile) {
395 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800396 for (uint32_t n = 1; n <= 2; n++) {
397 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800398 GemmMicrokernelTester()
399 .mr(4)
400 .nr(2)
401 .kr(4)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(5)
407 .iterations(1)
408 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
409 }
410 }
411 }
412 }
413
414 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cm) {
415 GemmMicrokernelTester()
416 .mr(4)
417 .nr(2)
418 .kr(4)
419 .sr(1)
420 .m(4)
421 .n(2)
422 .k(4)
423 .cm_stride(5)
424 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
425 }
426#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
427
428
429#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
430 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1) {
431 GemmMicrokernelTester()
432 .mr(2)
433 .nr(4)
434 .kr(1)
435 .sr(1)
436 .m(2)
437 .n(4)
438 .k(1)
439 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
440 }
441
442 TEST(F32_GEMM_RELU_2X4__WASM, strided_cn) {
443 GemmMicrokernelTester()
444 .mr(2)
445 .nr(4)
446 .kr(1)
447 .sr(1)
448 .m(2)
449 .n(4)
450 .k(1)
451 .cn_stride(7)
452 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
453 }
454
455 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_strided_a) {
456 GemmMicrokernelTester()
457 .mr(2)
458 .nr(4)
459 .kr(1)
460 .sr(1)
461 .m(2)
462 .n(4)
463 .k(1)
464 .a_stride(3)
465 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
466 }
467
468 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800469 for (uint32_t n = 1; n <= 4; n++) {
470 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800471 GemmMicrokernelTester()
472 .mr(2)
473 .nr(4)
474 .kr(1)
475 .sr(1)
476 .m(m)
477 .n(n)
478 .k(1)
479 .iterations(1)
480 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
481 }
482 }
483 }
484
485 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile_m) {
486 for (uint32_t m = 1; m <= 2; m++) {
487 GemmMicrokernelTester()
488 .mr(2)
489 .nr(4)
490 .kr(1)
491 .sr(1)
492 .m(m)
493 .n(4)
494 .k(1)
495 .iterations(1)
496 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
497 }
498 }
499
500 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile_n) {
501 for (uint32_t n = 1; n <= 4; n++) {
502 GemmMicrokernelTester()
503 .mr(2)
504 .nr(4)
505 .kr(1)
506 .sr(1)
507 .m(2)
508 .n(n)
509 .k(1)
510 .iterations(1)
511 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
512 }
513 }
514
515 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1) {
516 for (size_t k = 2; k < 10; k++) {
517 GemmMicrokernelTester()
518 .mr(2)
519 .nr(4)
520 .kr(1)
521 .sr(1)
522 .m(2)
523 .n(4)
524 .k(k)
525 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
526 }
527 }
528
529 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1_strided_a) {
530 for (size_t k = 2; k < 10; k++) {
531 GemmMicrokernelTester()
532 .mr(2)
533 .nr(4)
534 .kr(1)
535 .sr(1)
536 .m(2)
537 .n(4)
538 .k(k)
539 .a_stride(11)
540 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
541 }
542 }
543
544 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1_subtile) {
545 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800546 for (uint32_t n = 1; n <= 4; n++) {
547 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800548 GemmMicrokernelTester()
549 .mr(2)
550 .nr(4)
551 .kr(1)
552 .sr(1)
553 .m(m)
554 .n(n)
555 .k(k)
556 .iterations(1)
557 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
558 }
559 }
560 }
561 }
562
563 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4) {
564 for (uint32_t n = 5; n < 8; n++) {
565 for (size_t k = 1; k <= 5; k += 2) {
566 GemmMicrokernelTester()
567 .mr(2)
568 .nr(4)
569 .kr(1)
570 .sr(1)
571 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800572 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800573 .k(k)
574 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
575 }
576 }
577 }
578
579 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_strided_cn) {
580 for (uint32_t n = 5; n < 8; n++) {
581 for (size_t k = 1; k <= 5; k += 2) {
582 GemmMicrokernelTester()
583 .mr(2)
584 .nr(4)
585 .kr(1)
586 .sr(1)
587 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800588 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800589 .k(k)
590 .cn_stride(7)
591 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
592 }
593 }
594 }
595
596 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_strided_a) {
597 for (uint32_t n = 5; n < 8; n++) {
598 for (size_t k = 1; k <= 5; k += 2) {
599 GemmMicrokernelTester()
600 .mr(2)
601 .nr(4)
602 .kr(1)
603 .sr(1)
604 .m(2)
605 .n(n)
606 .k(k)
607 .a_stride(7)
608 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
609 }
610 }
611 }
612
613 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_subtile) {
614 for (uint32_t n = 5; n < 8; n++) {
615 for (size_t k = 1; k <= 5; k += 2) {
616 for (uint32_t m = 1; m <= 2; m++) {
617 GemmMicrokernelTester()
618 .mr(2)
619 .nr(4)
620 .kr(1)
621 .sr(1)
622 .m(m)
623 .n(n)
624 .k(k)
625 .iterations(1)
626 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
627 }
628 }
629 }
630 }
631
632 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4) {
633 for (uint32_t n = 8; n <= 12; n += 4) {
634 for (size_t k = 1; k <= 5; k += 2) {
635 GemmMicrokernelTester()
636 .mr(2)
637 .nr(4)
638 .kr(1)
639 .sr(1)
640 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800641 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800642 .k(k)
643 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
644 }
645 }
646 }
647
648 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_strided_cn) {
649 for (uint32_t n = 8; n <= 12; n += 4) {
650 for (size_t k = 1; k <= 5; k += 2) {
651 GemmMicrokernelTester()
652 .mr(2)
653 .nr(4)
654 .kr(1)
655 .sr(1)
656 .m(2)
657 .n(n)
658 .k(k)
659 .cn_stride(7)
660 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
661 }
662 }
663 }
664
665 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_strided_a) {
666 for (uint32_t n = 8; n <= 12; n += 4) {
667 for (size_t k = 1; k <= 5; k += 2) {
668 GemmMicrokernelTester()
669 .mr(2)
670 .nr(4)
671 .kr(1)
672 .sr(1)
673 .m(2)
674 .n(n)
675 .k(k)
676 .a_stride(7)
677 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
678 }
679 }
680 }
681
682 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_subtile) {
683 for (uint32_t n = 8; n <= 12; n += 4) {
684 for (size_t k = 1; k <= 5; k += 2) {
685 for (uint32_t m = 1; m <= 2; m++) {
686 GemmMicrokernelTester()
687 .mr(2)
688 .nr(4)
689 .kr(1)
690 .sr(1)
691 .m(m)
692 .n(n)
693 .k(k)
694 .iterations(1)
695 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
696 }
697 }
698 }
699 }
700
701 TEST(F32_GEMM_RELU_2X4__WASM, strided_cm_subtile) {
702 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800703 for (uint32_t n = 1; n <= 4; n++) {
704 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800705 GemmMicrokernelTester()
706 .mr(2)
707 .nr(4)
708 .kr(1)
709 .sr(1)
710 .m(m)
711 .n(n)
712 .k(k)
713 .cm_stride(7)
714 .iterations(1)
715 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
716 }
717 }
718 }
719 }
720
721 TEST(F32_GEMM_RELU_2X4__WASM, strided_cm) {
722 GemmMicrokernelTester()
723 .mr(2)
724 .nr(4)
725 .kr(1)
726 .sr(1)
727 .m(2)
728 .n(4)
729 .k(1)
730 .cm_stride(7)
731 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
732 }
733#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
734
735
736#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
737 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1) {
738 GemmMicrokernelTester()
739 .mr(4)
740 .nr(4)
741 .kr(1)
742 .sr(1)
743 .m(4)
744 .n(4)
745 .k(1)
746 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
747 }
748
749 TEST(F32_GEMM_RELU_4X4__WASM, strided_cn) {
750 GemmMicrokernelTester()
751 .mr(4)
752 .nr(4)
753 .kr(1)
754 .sr(1)
755 .m(4)
756 .n(4)
757 .k(1)
758 .cn_stride(7)
759 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
760 }
761
762 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_strided_a) {
763 GemmMicrokernelTester()
764 .mr(4)
765 .nr(4)
766 .kr(1)
767 .sr(1)
768 .m(4)
769 .n(4)
770 .k(1)
771 .a_stride(3)
772 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
773 }
774
775 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800776 for (uint32_t n = 1; n <= 4; n++) {
777 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800778 GemmMicrokernelTester()
779 .mr(4)
780 .nr(4)
781 .kr(1)
782 .sr(1)
783 .m(m)
784 .n(n)
785 .k(1)
786 .iterations(1)
787 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
788 }
789 }
790 }
791
792 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile_m) {
793 for (uint32_t m = 1; m <= 4; m++) {
794 GemmMicrokernelTester()
795 .mr(4)
796 .nr(4)
797 .kr(1)
798 .sr(1)
799 .m(m)
800 .n(4)
801 .k(1)
802 .iterations(1)
803 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
804 }
805 }
806
807 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile_n) {
808 for (uint32_t n = 1; n <= 4; n++) {
809 GemmMicrokernelTester()
810 .mr(4)
811 .nr(4)
812 .kr(1)
813 .sr(1)
814 .m(4)
815 .n(n)
816 .k(1)
817 .iterations(1)
818 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
819 }
820 }
821
822 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1) {
823 for (size_t k = 2; k < 10; k++) {
824 GemmMicrokernelTester()
825 .mr(4)
826 .nr(4)
827 .kr(1)
828 .sr(1)
829 .m(4)
830 .n(4)
831 .k(k)
832 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
833 }
834 }
835
836 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1_strided_a) {
837 for (size_t k = 2; k < 10; k++) {
838 GemmMicrokernelTester()
839 .mr(4)
840 .nr(4)
841 .kr(1)
842 .sr(1)
843 .m(4)
844 .n(4)
845 .k(k)
846 .a_stride(11)
847 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
848 }
849 }
850
851 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1_subtile) {
852 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800853 for (uint32_t n = 1; n <= 4; n++) {
854 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800855 GemmMicrokernelTester()
856 .mr(4)
857 .nr(4)
858 .kr(1)
859 .sr(1)
860 .m(m)
861 .n(n)
862 .k(k)
863 .iterations(1)
864 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
865 }
866 }
867 }
868 }
869
870 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4) {
871 for (uint32_t n = 5; n < 8; n++) {
872 for (size_t k = 1; k <= 5; k += 2) {
873 GemmMicrokernelTester()
874 .mr(4)
875 .nr(4)
876 .kr(1)
877 .sr(1)
878 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800879 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800880 .k(k)
881 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
882 }
883 }
884 }
885
886 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_strided_cn) {
887 for (uint32_t n = 5; n < 8; n++) {
888 for (size_t k = 1; k <= 5; k += 2) {
889 GemmMicrokernelTester()
890 .mr(4)
891 .nr(4)
892 .kr(1)
893 .sr(1)
894 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800895 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800896 .k(k)
897 .cn_stride(7)
898 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
899 }
900 }
901 }
902
903 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_strided_a) {
904 for (uint32_t n = 5; n < 8; n++) {
905 for (size_t k = 1; k <= 5; k += 2) {
906 GemmMicrokernelTester()
907 .mr(4)
908 .nr(4)
909 .kr(1)
910 .sr(1)
911 .m(4)
912 .n(n)
913 .k(k)
914 .a_stride(7)
915 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
916 }
917 }
918 }
919
920 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_subtile) {
921 for (uint32_t n = 5; n < 8; n++) {
922 for (size_t k = 1; k <= 5; k += 2) {
923 for (uint32_t m = 1; m <= 4; m++) {
924 GemmMicrokernelTester()
925 .mr(4)
926 .nr(4)
927 .kr(1)
928 .sr(1)
929 .m(m)
930 .n(n)
931 .k(k)
932 .iterations(1)
933 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
934 }
935 }
936 }
937 }
938
939 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4) {
940 for (uint32_t n = 8; n <= 12; n += 4) {
941 for (size_t k = 1; k <= 5; k += 2) {
942 GemmMicrokernelTester()
943 .mr(4)
944 .nr(4)
945 .kr(1)
946 .sr(1)
947 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800948 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800949 .k(k)
950 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
951 }
952 }
953 }
954
955 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_strided_cn) {
956 for (uint32_t n = 8; n <= 12; n += 4) {
957 for (size_t k = 1; k <= 5; k += 2) {
958 GemmMicrokernelTester()
959 .mr(4)
960 .nr(4)
961 .kr(1)
962 .sr(1)
963 .m(4)
964 .n(n)
965 .k(k)
966 .cn_stride(7)
967 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
968 }
969 }
970 }
971
972 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_strided_a) {
973 for (uint32_t n = 8; n <= 12; n += 4) {
974 for (size_t k = 1; k <= 5; k += 2) {
975 GemmMicrokernelTester()
976 .mr(4)
977 .nr(4)
978 .kr(1)
979 .sr(1)
980 .m(4)
981 .n(n)
982 .k(k)
983 .a_stride(7)
984 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
985 }
986 }
987 }
988
989 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_subtile) {
990 for (uint32_t n = 8; n <= 12; n += 4) {
991 for (size_t k = 1; k <= 5; k += 2) {
992 for (uint32_t m = 1; m <= 4; m++) {
993 GemmMicrokernelTester()
994 .mr(4)
995 .nr(4)
996 .kr(1)
997 .sr(1)
998 .m(m)
999 .n(n)
1000 .k(k)
1001 .iterations(1)
1002 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
1003 }
1004 }
1005 }
1006 }
1007
1008 TEST(F32_GEMM_RELU_4X4__WASM, strided_cm_subtile) {
1009 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001010 for (uint32_t n = 1; n <= 4; n++) {
1011 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001012 GemmMicrokernelTester()
1013 .mr(4)
1014 .nr(4)
1015 .kr(1)
1016 .sr(1)
1017 .m(m)
1018 .n(n)
1019 .k(k)
1020 .cm_stride(7)
1021 .iterations(1)
1022 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
1023 }
1024 }
1025 }
1026 }
1027
1028 TEST(F32_GEMM_RELU_4X4__WASM, strided_cm) {
1029 GemmMicrokernelTester()
1030 .mr(4)
1031 .nr(4)
1032 .kr(1)
1033 .sr(1)
1034 .m(4)
1035 .n(4)
1036 .k(1)
1037 .cm_stride(7)
1038 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
1039 }
1040#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1041
1042
1043#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1044 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1) {
1045 GemmMicrokernelTester()
1046 .mr(4)
1047 .nr(2)
1048 .kr(1)
1049 .sr(1)
1050 .m(4)
1051 .n(2)
1052 .k(1)
1053 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1054 }
1055
1056 TEST(F32_GEMM_RELU_4X2__WASM, strided_cn) {
1057 GemmMicrokernelTester()
1058 .mr(4)
1059 .nr(2)
1060 .kr(1)
1061 .sr(1)
1062 .m(4)
1063 .n(2)
1064 .k(1)
1065 .cn_stride(5)
1066 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1067 }
1068
1069 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_strided_a) {
1070 GemmMicrokernelTester()
1071 .mr(4)
1072 .nr(2)
1073 .kr(1)
1074 .sr(1)
1075 .m(4)
1076 .n(2)
1077 .k(1)
1078 .a_stride(3)
1079 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1080 }
1081
1082 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001083 for (uint32_t n = 1; n <= 2; n++) {
1084 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001085 GemmMicrokernelTester()
1086 .mr(4)
1087 .nr(2)
1088 .kr(1)
1089 .sr(1)
1090 .m(m)
1091 .n(n)
1092 .k(1)
1093 .iterations(1)
1094 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1095 }
1096 }
1097 }
1098
1099 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile_m) {
1100 for (uint32_t m = 1; m <= 4; m++) {
1101 GemmMicrokernelTester()
1102 .mr(4)
1103 .nr(2)
1104 .kr(1)
1105 .sr(1)
1106 .m(m)
1107 .n(2)
1108 .k(1)
1109 .iterations(1)
1110 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1111 }
1112 }
1113
1114 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile_n) {
1115 for (uint32_t n = 1; n <= 2; n++) {
1116 GemmMicrokernelTester()
1117 .mr(4)
1118 .nr(2)
1119 .kr(1)
1120 .sr(1)
1121 .m(4)
1122 .n(n)
1123 .k(1)
1124 .iterations(1)
1125 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1126 }
1127 }
1128
1129 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1) {
1130 for (size_t k = 2; k < 10; k++) {
1131 GemmMicrokernelTester()
1132 .mr(4)
1133 .nr(2)
1134 .kr(1)
1135 .sr(1)
1136 .m(4)
1137 .n(2)
1138 .k(k)
1139 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1140 }
1141 }
1142
1143 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1_strided_a) {
1144 for (size_t k = 2; k < 10; k++) {
1145 GemmMicrokernelTester()
1146 .mr(4)
1147 .nr(2)
1148 .kr(1)
1149 .sr(1)
1150 .m(4)
1151 .n(2)
1152 .k(k)
1153 .a_stride(11)
1154 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1155 }
1156 }
1157
1158 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1_subtile) {
1159 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001160 for (uint32_t n = 1; n <= 2; n++) {
1161 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001162 GemmMicrokernelTester()
1163 .mr(4)
1164 .nr(2)
1165 .kr(1)
1166 .sr(1)
1167 .m(m)
1168 .n(n)
1169 .k(k)
1170 .iterations(1)
1171 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1172 }
1173 }
1174 }
1175 }
1176
1177 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2) {
1178 for (uint32_t n = 3; n < 4; n++) {
1179 for (size_t k = 1; k <= 5; k += 2) {
1180 GemmMicrokernelTester()
1181 .mr(4)
1182 .nr(2)
1183 .kr(1)
1184 .sr(1)
1185 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001186 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001187 .k(k)
1188 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1189 }
1190 }
1191 }
1192
1193 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_strided_cn) {
1194 for (uint32_t n = 3; n < 4; n++) {
1195 for (size_t k = 1; k <= 5; k += 2) {
1196 GemmMicrokernelTester()
1197 .mr(4)
1198 .nr(2)
1199 .kr(1)
1200 .sr(1)
1201 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001202 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001203 .k(k)
1204 .cn_stride(5)
1205 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1206 }
1207 }
1208 }
1209
1210 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_strided_a) {
1211 for (uint32_t n = 3; n < 4; n++) {
1212 for (size_t k = 1; k <= 5; k += 2) {
1213 GemmMicrokernelTester()
1214 .mr(4)
1215 .nr(2)
1216 .kr(1)
1217 .sr(1)
1218 .m(4)
1219 .n(n)
1220 .k(k)
1221 .a_stride(7)
1222 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1223 }
1224 }
1225 }
1226
1227 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_subtile) {
1228 for (uint32_t n = 3; n < 4; n++) {
1229 for (size_t k = 1; k <= 5; k += 2) {
1230 for (uint32_t m = 1; m <= 4; m++) {
1231 GemmMicrokernelTester()
1232 .mr(4)
1233 .nr(2)
1234 .kr(1)
1235 .sr(1)
1236 .m(m)
1237 .n(n)
1238 .k(k)
1239 .iterations(1)
1240 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1241 }
1242 }
1243 }
1244 }
1245
1246 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2) {
1247 for (uint32_t n = 4; n <= 6; n += 2) {
1248 for (size_t k = 1; k <= 5; k += 2) {
1249 GemmMicrokernelTester()
1250 .mr(4)
1251 .nr(2)
1252 .kr(1)
1253 .sr(1)
1254 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001255 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001256 .k(k)
1257 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1258 }
1259 }
1260 }
1261
1262 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_strided_cn) {
1263 for (uint32_t n = 4; n <= 6; n += 2) {
1264 for (size_t k = 1; k <= 5; k += 2) {
1265 GemmMicrokernelTester()
1266 .mr(4)
1267 .nr(2)
1268 .kr(1)
1269 .sr(1)
1270 .m(4)
1271 .n(n)
1272 .k(k)
1273 .cn_stride(5)
1274 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1275 }
1276 }
1277 }
1278
1279 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_strided_a) {
1280 for (uint32_t n = 4; n <= 6; n += 2) {
1281 for (size_t k = 1; k <= 5; k += 2) {
1282 GemmMicrokernelTester()
1283 .mr(4)
1284 .nr(2)
1285 .kr(1)
1286 .sr(1)
1287 .m(4)
1288 .n(n)
1289 .k(k)
1290 .a_stride(7)
1291 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1292 }
1293 }
1294 }
1295
1296 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_subtile) {
1297 for (uint32_t n = 4; n <= 6; n += 2) {
1298 for (size_t k = 1; k <= 5; k += 2) {
1299 for (uint32_t m = 1; m <= 4; m++) {
1300 GemmMicrokernelTester()
1301 .mr(4)
1302 .nr(2)
1303 .kr(1)
1304 .sr(1)
1305 .m(m)
1306 .n(n)
1307 .k(k)
1308 .iterations(1)
1309 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1310 }
1311 }
1312 }
1313 }
1314
1315 TEST(F32_GEMM_RELU_4X2__WASM, strided_cm_subtile) {
1316 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001317 for (uint32_t n = 1; n <= 2; n++) {
1318 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001319 GemmMicrokernelTester()
1320 .mr(4)
1321 .nr(2)
1322 .kr(1)
1323 .sr(1)
1324 .m(m)
1325 .n(n)
1326 .k(k)
1327 .cm_stride(5)
1328 .iterations(1)
1329 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1330 }
1331 }
1332 }
1333 }
1334
1335 TEST(F32_GEMM_RELU_4X2__WASM, strided_cm) {
1336 GemmMicrokernelTester()
1337 .mr(4)
1338 .nr(2)
1339 .kr(1)
1340 .sr(1)
1341 .m(4)
1342 .n(2)
1343 .k(1)
1344 .cm_stride(5)
1345 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
1346 }
1347#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1348
1349
1350TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1) {
1351 GemmMicrokernelTester()
1352 .mr(2)
1353 .nr(4)
1354 .kr(1)
1355 .sr(1)
1356 .m(2)
1357 .n(4)
1358 .k(1)
1359 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1360}
1361
1362TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cn) {
1363 GemmMicrokernelTester()
1364 .mr(2)
1365 .nr(4)
1366 .kr(1)
1367 .sr(1)
1368 .m(2)
1369 .n(4)
1370 .k(1)
1371 .cn_stride(7)
1372 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1373}
1374
1375TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_strided_a) {
1376 GemmMicrokernelTester()
1377 .mr(2)
1378 .nr(4)
1379 .kr(1)
1380 .sr(1)
1381 .m(2)
1382 .n(4)
1383 .k(1)
1384 .a_stride(3)
1385 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1386}
1387
1388TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001389 for (uint32_t n = 1; n <= 4; n++) {
1390 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001391 GemmMicrokernelTester()
1392 .mr(2)
1393 .nr(4)
1394 .kr(1)
1395 .sr(1)
1396 .m(m)
1397 .n(n)
1398 .k(1)
1399 .iterations(1)
1400 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1401 }
1402 }
1403}
1404
1405TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile_m) {
1406 for (uint32_t m = 1; m <= 2; m++) {
1407 GemmMicrokernelTester()
1408 .mr(2)
1409 .nr(4)
1410 .kr(1)
1411 .sr(1)
1412 .m(m)
1413 .n(4)
1414 .k(1)
1415 .iterations(1)
1416 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1417 }
1418}
1419
1420TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile_n) {
1421 for (uint32_t n = 1; n <= 4; n++) {
1422 GemmMicrokernelTester()
1423 .mr(2)
1424 .nr(4)
1425 .kr(1)
1426 .sr(1)
1427 .m(2)
1428 .n(n)
1429 .k(1)
1430 .iterations(1)
1431 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1432 }
1433}
1434
1435TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1) {
1436 for (size_t k = 2; k < 10; k++) {
1437 GemmMicrokernelTester()
1438 .mr(2)
1439 .nr(4)
1440 .kr(1)
1441 .sr(1)
1442 .m(2)
1443 .n(4)
1444 .k(k)
1445 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1446 }
1447}
1448
1449TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1_strided_a) {
1450 for (size_t k = 2; k < 10; k++) {
1451 GemmMicrokernelTester()
1452 .mr(2)
1453 .nr(4)
1454 .kr(1)
1455 .sr(1)
1456 .m(2)
1457 .n(4)
1458 .k(k)
1459 .a_stride(11)
1460 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1461 }
1462}
1463
1464TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1_subtile) {
1465 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001466 for (uint32_t n = 1; n <= 4; n++) {
1467 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001468 GemmMicrokernelTester()
1469 .mr(2)
1470 .nr(4)
1471 .kr(1)
1472 .sr(1)
1473 .m(m)
1474 .n(n)
1475 .k(k)
1476 .iterations(1)
1477 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1478 }
1479 }
1480 }
1481}
1482
1483TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4) {
1484 for (uint32_t n = 5; n < 8; n++) {
1485 for (size_t k = 1; k <= 5; k += 2) {
1486 GemmMicrokernelTester()
1487 .mr(2)
1488 .nr(4)
1489 .kr(1)
1490 .sr(1)
1491 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001492 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001493 .k(k)
1494 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1495 }
1496 }
1497}
1498
1499TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_strided_cn) {
1500 for (uint32_t n = 5; n < 8; n++) {
1501 for (size_t k = 1; k <= 5; k += 2) {
1502 GemmMicrokernelTester()
1503 .mr(2)
1504 .nr(4)
1505 .kr(1)
1506 .sr(1)
1507 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001508 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001509 .k(k)
1510 .cn_stride(7)
1511 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1512 }
1513 }
1514}
1515
1516TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_strided_a) {
1517 for (uint32_t n = 5; n < 8; n++) {
1518 for (size_t k = 1; k <= 5; k += 2) {
1519 GemmMicrokernelTester()
1520 .mr(2)
1521 .nr(4)
1522 .kr(1)
1523 .sr(1)
1524 .m(2)
1525 .n(n)
1526 .k(k)
1527 .a_stride(7)
1528 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1529 }
1530 }
1531}
1532
1533TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_subtile) {
1534 for (uint32_t n = 5; n < 8; n++) {
1535 for (size_t k = 1; k <= 5; k += 2) {
1536 for (uint32_t m = 1; m <= 2; m++) {
1537 GemmMicrokernelTester()
1538 .mr(2)
1539 .nr(4)
1540 .kr(1)
1541 .sr(1)
1542 .m(m)
1543 .n(n)
1544 .k(k)
1545 .iterations(1)
1546 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1547 }
1548 }
1549 }
1550}
1551
1552TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4) {
1553 for (uint32_t n = 8; n <= 12; n += 4) {
1554 for (size_t k = 1; k <= 5; k += 2) {
1555 GemmMicrokernelTester()
1556 .mr(2)
1557 .nr(4)
1558 .kr(1)
1559 .sr(1)
1560 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001561 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001562 .k(k)
1563 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1564 }
1565 }
1566}
1567
1568TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_strided_cn) {
1569 for (uint32_t n = 8; n <= 12; n += 4) {
1570 for (size_t k = 1; k <= 5; k += 2) {
1571 GemmMicrokernelTester()
1572 .mr(2)
1573 .nr(4)
1574 .kr(1)
1575 .sr(1)
1576 .m(2)
1577 .n(n)
1578 .k(k)
1579 .cn_stride(7)
1580 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1581 }
1582 }
1583}
1584
1585TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_strided_a) {
1586 for (uint32_t n = 8; n <= 12; n += 4) {
1587 for (size_t k = 1; k <= 5; k += 2) {
1588 GemmMicrokernelTester()
1589 .mr(2)
1590 .nr(4)
1591 .kr(1)
1592 .sr(1)
1593 .m(2)
1594 .n(n)
1595 .k(k)
1596 .a_stride(7)
1597 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1598 }
1599 }
1600}
1601
1602TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_subtile) {
1603 for (uint32_t n = 8; n <= 12; n += 4) {
1604 for (size_t k = 1; k <= 5; k += 2) {
1605 for (uint32_t m = 1; m <= 2; m++) {
1606 GemmMicrokernelTester()
1607 .mr(2)
1608 .nr(4)
1609 .kr(1)
1610 .sr(1)
1611 .m(m)
1612 .n(n)
1613 .k(k)
1614 .iterations(1)
1615 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1616 }
1617 }
1618 }
1619}
1620
1621TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cm_subtile) {
1622 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001623 for (uint32_t n = 1; n <= 4; n++) {
1624 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001625 GemmMicrokernelTester()
1626 .mr(2)
1627 .nr(4)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(n)
1632 .k(k)
1633 .cm_stride(7)
1634 .iterations(1)
1635 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1636 }
1637 }
1638 }
1639}
1640
1641TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cm) {
1642 GemmMicrokernelTester()
1643 .mr(2)
1644 .nr(4)
1645 .kr(1)
1646 .sr(1)
1647 .m(2)
1648 .n(4)
1649 .k(1)
1650 .cm_stride(7)
1651 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
1652}