blob: 2d3a29909d01c56178b39ac69ad7279198078fb0 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-igemm-relu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4) {
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(2)
31 .kr(4)
32 .sr(1)
33 .m(4)
34 .n(2)
35 .k(4)
36 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
37 }
38
39 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cn) {
40 GemmMicrokernelTester()
41 .mr(4)
42 .nr(2)
43 .kr(4)
44 .sr(1)
45 .m(4)
46 .n(2)
47 .k(4)
48 .cn_stride(5)
49 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
50 }
51
52 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080053 for (uint32_t n = 1; n <= 2; n++) {
54 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080055 GemmMicrokernelTester()
56 .mr(4)
57 .nr(2)
58 .kr(4)
59 .sr(1)
60 .m(m)
61 .n(n)
62 .k(4)
63 .iterations(1)
64 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
65 }
66 }
67 }
68
69 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
70 for (uint32_t m = 1; m <= 4; m++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(2)
74 .kr(4)
75 .sr(1)
76 .m(m)
77 .n(2)
78 .k(4)
79 .iterations(1)
80 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
81 }
82 }
83
84 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
85 for (uint32_t n = 1; n <= 2; n++) {
86 GemmMicrokernelTester()
87 .mr(4)
88 .nr(2)
89 .kr(4)
90 .sr(1)
91 .m(4)
92 .n(n)
93 .k(4)
94 .iterations(1)
95 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
96 }
97 }
98
99 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_lt_4) {
100 for (size_t k = 1; k < 4; k++) {
101 GemmMicrokernelTester()
102 .mr(4)
103 .nr(2)
104 .kr(4)
105 .sr(1)
106 .m(4)
107 .n(2)
108 .k(k)
109 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
110 }
111 }
112
113 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_lt_4_subtile) {
114 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800115 for (uint32_t n = 1; n <= 2; n++) {
116 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800117 GemmMicrokernelTester()
118 .mr(4)
119 .nr(2)
120 .kr(4)
121 .sr(1)
122 .m(m)
123 .n(n)
124 .k(k)
125 .iterations(1)
126 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
127 }
128 }
129 }
130 }
131
132 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_gt_4) {
133 for (size_t k = 5; k < 8; k++) {
134 GemmMicrokernelTester()
135 .mr(4)
136 .nr(2)
137 .kr(4)
138 .sr(1)
139 .m(4)
140 .n(2)
141 .k(k)
142 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
143 }
144 }
145
146 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_gt_4_subtile) {
147 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800148 for (uint32_t n = 1; n <= 2; n++) {
149 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800150 GemmMicrokernelTester()
151 .mr(4)
152 .nr(2)
153 .kr(4)
154 .sr(1)
155 .m(m)
156 .n(n)
157 .k(k)
158 .iterations(1)
159 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
160 }
161 }
162 }
163 }
164
165 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_div_4) {
166 for (size_t k = 8; k <= 40; k += 4) {
167 GemmMicrokernelTester()
168 .mr(4)
169 .nr(2)
170 .kr(4)
171 .sr(1)
172 .m(4)
173 .n(2)
174 .k(k)
175 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
176 }
177 }
178
179 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_div_4_subtile) {
180 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800181 for (uint32_t n = 1; n <= 2; n++) {
182 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800183 GemmMicrokernelTester()
184 .mr(4)
185 .nr(2)
186 .kr(4)
187 .sr(1)
188 .m(m)
189 .n(n)
190 .k(k)
191 .iterations(1)
192 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
193 }
194 }
195 }
196 }
197
198 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2) {
199 for (uint32_t n = 3; n < 4; n++) {
200 for (size_t k = 1; k <= 20; k += 5) {
201 GemmMicrokernelTester()
202 .mr(4)
203 .nr(2)
204 .kr(4)
205 .sr(1)
206 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800207 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800208 .k(k)
209 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
210 }
211 }
212 }
213
214 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
215 for (uint32_t n = 3; n < 4; n++) {
216 for (size_t k = 1; k <= 20; k += 5) {
217 GemmMicrokernelTester()
218 .mr(4)
219 .nr(2)
220 .kr(4)
221 .sr(1)
222 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800223 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800224 .k(k)
225 .cn_stride(5)
226 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
227 }
228 }
229 }
230
231 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_subtile) {
232 for (uint32_t n = 3; n < 4; n++) {
233 for (size_t k = 1; k <= 20; k += 5) {
234 for (uint32_t m = 1; m <= 4; m++) {
235 GemmMicrokernelTester()
236 .mr(4)
237 .nr(2)
238 .kr(4)
239 .sr(1)
240 .m(m)
241 .n(n)
242 .k(k)
243 .iterations(1)
244 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
245 }
246 }
247 }
248 }
249
250 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2) {
251 for (uint32_t n = 4; n <= 6; n += 2) {
252 for (size_t k = 1; k <= 20; k += 5) {
253 GemmMicrokernelTester()
254 .mr(4)
255 .nr(2)
256 .kr(4)
257 .sr(1)
258 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800259 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800260 .k(k)
261 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
262 }
263 }
264 }
265
266 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_cn) {
267 for (uint32_t n = 4; n <= 6; n += 2) {
268 for (size_t k = 1; k <= 20; k += 5) {
269 GemmMicrokernelTester()
270 .mr(4)
271 .nr(2)
272 .kr(4)
273 .sr(1)
274 .m(4)
275 .n(n)
276 .k(k)
277 .cn_stride(5)
278 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
279 }
280 }
281 }
282
283 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_subtile) {
284 for (uint32_t n = 4; n <= 6; n += 2) {
285 for (size_t k = 1; k <= 20; k += 5) {
286 for (uint32_t m = 1; m <= 4; m++) {
287 GemmMicrokernelTester()
288 .mr(4)
289 .nr(2)
290 .kr(4)
291 .sr(1)
292 .m(m)
293 .n(n)
294 .k(k)
295 .iterations(1)
296 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
297 }
298 }
299 }
300 }
301
302 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, small_kernel) {
303 for (size_t k = 1; k <= 20; k += 5) {
304 GemmMicrokernelTester()
305 .mr(4)
306 .nr(2)
307 .kr(4)
308 .sr(1)
309 .m(4)
310 .n(2)
311 .k(k)
312 .ks(3)
313 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
314 }
315 }
316
317 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, small_kernel_subtile) {
318 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800319 for (uint32_t n = 1; n <= 2; n++) {
320 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800321 GemmMicrokernelTester()
322 .mr(4)
323 .nr(2)
324 .kr(4)
325 .sr(1)
326 .m(m)
327 .n(n)
328 .k(k)
329 .ks(3)
330 .iterations(1)
331 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
332 }
333 }
334 }
335 }
336
337 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_small_kernel) {
338 for (uint32_t n = 3; n < 4; n++) {
339 for (size_t k = 1; k <= 20; k += 5) {
340 GemmMicrokernelTester()
341 .mr(4)
342 .nr(2)
343 .kr(4)
344 .sr(1)
345 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800346 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800347 .k(k)
348 .ks(3)
349 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
350 }
351 }
352 }
353
354 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_small_kernel) {
355 for (uint32_t n = 4; n <= 6; n += 2) {
356 for (size_t k = 1; k <= 20; k += 5) {
357 GemmMicrokernelTester()
358 .mr(4)
359 .nr(2)
360 .kr(4)
361 .sr(1)
362 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800363 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800364 .k(k)
365 .ks(3)
366 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
367 }
368 }
369 }
370
371 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cm_subtile) {
372 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800373 for (uint32_t n = 1; n <= 2; n++) {
374 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800375 GemmMicrokernelTester()
376 .mr(4)
377 .nr(2)
378 .kr(4)
379 .sr(1)
380 .m(m)
381 .n(n)
382 .k(k)
383 .cm_stride(5)
384 .iterations(1)
385 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
386 }
387 }
388 }
389 }
390
391 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, a_offset) {
392 for (size_t k = 1; k <= 20; k += 5) {
393 GemmMicrokernelTester()
394 .mr(4)
395 .nr(2)
396 .kr(4)
397 .sr(1)
398 .m(4)
399 .n(2)
400 .k(k)
401 .ks(3)
402 .a_offset(83)
403 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
404 }
405 }
406
407 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800408 for (size_t k = 1; k <= 20; k += 5) {
409 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800410 GemmMicrokernelTester()
411 .mr(4)
412 .nr(2)
413 .kr(4)
414 .sr(1)
415 .m(4)
416 .n(2)
417 .k(k)
418 .ks(3)
419 .a_offset(83)
420 .zero_index(mz)
421 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
422 }
423 }
424 }
425
426 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cm) {
427 GemmMicrokernelTester()
428 .mr(4)
429 .nr(2)
430 .kr(4)
431 .sr(1)
432 .m(4)
433 .n(2)
434 .k(4)
435 .cm_stride(5)
436 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
437 }
438#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
439
440
441#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
442 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1) {
443 GemmMicrokernelTester()
444 .mr(2)
445 .nr(4)
446 .kr(1)
447 .sr(1)
448 .m(2)
449 .n(4)
450 .k(1)
451 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
452 }
453
454 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cn) {
455 GemmMicrokernelTester()
456 .mr(2)
457 .nr(4)
458 .kr(1)
459 .sr(1)
460 .m(2)
461 .n(4)
462 .k(1)
463 .cn_stride(7)
464 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
465 }
466
467 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800468 for (uint32_t n = 1; n <= 4; n++) {
469 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800470 GemmMicrokernelTester()
471 .mr(2)
472 .nr(4)
473 .kr(1)
474 .sr(1)
475 .m(m)
476 .n(n)
477 .k(1)
478 .iterations(1)
479 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
480 }
481 }
482 }
483
484 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile_m) {
485 for (uint32_t m = 1; m <= 2; m++) {
486 GemmMicrokernelTester()
487 .mr(2)
488 .nr(4)
489 .kr(1)
490 .sr(1)
491 .m(m)
492 .n(4)
493 .k(1)
494 .iterations(1)
495 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
496 }
497 }
498
499 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile_n) {
500 for (uint32_t n = 1; n <= 4; n++) {
501 GemmMicrokernelTester()
502 .mr(2)
503 .nr(4)
504 .kr(1)
505 .sr(1)
506 .m(2)
507 .n(n)
508 .k(1)
509 .iterations(1)
510 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
511 }
512 }
513
514 TEST(F32_IGEMM_RELU_2X4__WASM, k_gt_1) {
515 for (size_t k = 2; k < 10; k++) {
516 GemmMicrokernelTester()
517 .mr(2)
518 .nr(4)
519 .kr(1)
520 .sr(1)
521 .m(2)
522 .n(4)
523 .k(k)
524 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
525 }
526 }
527
528 TEST(F32_IGEMM_RELU_2X4__WASM, k_gt_1_subtile) {
529 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800530 for (uint32_t n = 1; n <= 4; n++) {
531 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800532 GemmMicrokernelTester()
533 .mr(2)
534 .nr(4)
535 .kr(1)
536 .sr(1)
537 .m(m)
538 .n(n)
539 .k(k)
540 .iterations(1)
541 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
542 }
543 }
544 }
545 }
546
547 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4) {
548 for (uint32_t n = 5; n < 8; n++) {
549 for (size_t k = 1; k <= 5; k += 2) {
550 GemmMicrokernelTester()
551 .mr(2)
552 .nr(4)
553 .kr(1)
554 .sr(1)
555 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800556 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800557 .k(k)
558 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
559 }
560 }
561 }
562
563 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_strided_cn) {
564 for (uint32_t n = 5; n < 8; n++) {
565 for (size_t k = 1; k <= 5; k += 2) {
566 GemmMicrokernelTester()
567 .mr(2)
568 .nr(4)
569 .kr(1)
570 .sr(1)
571 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800572 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800573 .k(k)
574 .cn_stride(7)
575 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
576 }
577 }
578 }
579
580 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_subtile) {
581 for (uint32_t n = 5; n < 8; n++) {
582 for (size_t k = 1; k <= 5; k += 2) {
583 for (uint32_t m = 1; m <= 2; m++) {
584 GemmMicrokernelTester()
585 .mr(2)
586 .nr(4)
587 .kr(1)
588 .sr(1)
589 .m(m)
590 .n(n)
591 .k(k)
592 .iterations(1)
593 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
594 }
595 }
596 }
597 }
598
599 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4) {
600 for (uint32_t n = 8; n <= 12; n += 4) {
601 for (size_t k = 1; k <= 5; k += 2) {
602 GemmMicrokernelTester()
603 .mr(2)
604 .nr(4)
605 .kr(1)
606 .sr(1)
607 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800608 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800609 .k(k)
610 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
611 }
612 }
613 }
614
615 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_strided_cn) {
616 for (uint32_t n = 8; n <= 12; n += 4) {
617 for (size_t k = 1; k <= 5; k += 2) {
618 GemmMicrokernelTester()
619 .mr(2)
620 .nr(4)
621 .kr(1)
622 .sr(1)
623 .m(2)
624 .n(n)
625 .k(k)
626 .cn_stride(7)
627 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
628 }
629 }
630 }
631
632 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_subtile) {
633 for (uint32_t n = 8; n <= 12; n += 4) {
634 for (size_t k = 1; k <= 5; k += 2) {
635 for (uint32_t m = 1; m <= 2; m++) {
636 GemmMicrokernelTester()
637 .mr(2)
638 .nr(4)
639 .kr(1)
640 .sr(1)
641 .m(m)
642 .n(n)
643 .k(k)
644 .iterations(1)
645 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
646 }
647 }
648 }
649 }
650
651 TEST(F32_IGEMM_RELU_2X4__WASM, small_kernel) {
652 for (size_t k = 1; k <= 5; k += 2) {
653 GemmMicrokernelTester()
654 .mr(2)
655 .nr(4)
656 .kr(1)
657 .sr(1)
658 .m(2)
659 .n(4)
660 .k(k)
661 .ks(3)
662 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
663 }
664 }
665
666 TEST(F32_IGEMM_RELU_2X4__WASM, small_kernel_subtile) {
667 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800668 for (uint32_t n = 1; n <= 4; n++) {
669 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800670 GemmMicrokernelTester()
671 .mr(2)
672 .nr(4)
673 .kr(1)
674 .sr(1)
675 .m(m)
676 .n(n)
677 .k(k)
678 .ks(3)
679 .iterations(1)
680 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
681 }
682 }
683 }
684 }
685
686 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_small_kernel) {
687 for (uint32_t n = 5; n < 8; n++) {
688 for (size_t k = 1; k <= 5; k += 2) {
689 GemmMicrokernelTester()
690 .mr(2)
691 .nr(4)
692 .kr(1)
693 .sr(1)
694 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800695 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800696 .k(k)
697 .ks(3)
698 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
699 }
700 }
701 }
702
703 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_small_kernel) {
704 for (uint32_t n = 8; n <= 12; n += 4) {
705 for (size_t k = 1; k <= 5; k += 2) {
706 GemmMicrokernelTester()
707 .mr(2)
708 .nr(4)
709 .kr(1)
710 .sr(1)
711 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800712 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800713 .k(k)
714 .ks(3)
715 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
716 }
717 }
718 }
719
720 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cm_subtile) {
721 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800722 for (uint32_t n = 1; n <= 4; n++) {
723 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800724 GemmMicrokernelTester()
725 .mr(2)
726 .nr(4)
727 .kr(1)
728 .sr(1)
729 .m(m)
730 .n(n)
731 .k(k)
732 .cm_stride(7)
733 .iterations(1)
734 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
735 }
736 }
737 }
738 }
739
740 TEST(F32_IGEMM_RELU_2X4__WASM, a_offset) {
741 for (size_t k = 1; k <= 5; k += 2) {
742 GemmMicrokernelTester()
743 .mr(2)
744 .nr(4)
745 .kr(1)
746 .sr(1)
747 .m(2)
748 .n(4)
749 .k(k)
750 .ks(3)
751 .a_offset(13)
752 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
753 }
754 }
755
756 TEST(F32_IGEMM_RELU_2X4__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800757 for (size_t k = 1; k <= 5; k += 2) {
758 for (uint32_t mz = 0; mz < 2; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800759 GemmMicrokernelTester()
760 .mr(2)
761 .nr(4)
762 .kr(1)
763 .sr(1)
764 .m(2)
765 .n(4)
766 .k(k)
767 .ks(3)
768 .a_offset(13)
769 .zero_index(mz)
770 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
771 }
772 }
773 }
774
775 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cm) {
776 GemmMicrokernelTester()
777 .mr(2)
778 .nr(4)
779 .kr(1)
780 .sr(1)
781 .m(2)
782 .n(4)
783 .k(1)
784 .cm_stride(7)
785 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
786 }
787#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
788
789
790#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
791 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1) {
792 GemmMicrokernelTester()
793 .mr(4)
794 .nr(4)
795 .kr(1)
796 .sr(1)
797 .m(4)
798 .n(4)
799 .k(1)
800 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
801 }
802
803 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cn) {
804 GemmMicrokernelTester()
805 .mr(4)
806 .nr(4)
807 .kr(1)
808 .sr(1)
809 .m(4)
810 .n(4)
811 .k(1)
812 .cn_stride(7)
813 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
814 }
815
816 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800817 for (uint32_t n = 1; n <= 4; n++) {
818 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800819 GemmMicrokernelTester()
820 .mr(4)
821 .nr(4)
822 .kr(1)
823 .sr(1)
824 .m(m)
825 .n(n)
826 .k(1)
827 .iterations(1)
828 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
829 }
830 }
831 }
832
833 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile_m) {
834 for (uint32_t m = 1; m <= 4; m++) {
835 GemmMicrokernelTester()
836 .mr(4)
837 .nr(4)
838 .kr(1)
839 .sr(1)
840 .m(m)
841 .n(4)
842 .k(1)
843 .iterations(1)
844 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
845 }
846 }
847
848 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile_n) {
849 for (uint32_t n = 1; n <= 4; n++) {
850 GemmMicrokernelTester()
851 .mr(4)
852 .nr(4)
853 .kr(1)
854 .sr(1)
855 .m(4)
856 .n(n)
857 .k(1)
858 .iterations(1)
859 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
860 }
861 }
862
863 TEST(F32_IGEMM_RELU_4X4__WASM, k_gt_1) {
864 for (size_t k = 2; k < 10; k++) {
865 GemmMicrokernelTester()
866 .mr(4)
867 .nr(4)
868 .kr(1)
869 .sr(1)
870 .m(4)
871 .n(4)
872 .k(k)
873 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
874 }
875 }
876
877 TEST(F32_IGEMM_RELU_4X4__WASM, k_gt_1_subtile) {
878 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800879 for (uint32_t n = 1; n <= 4; n++) {
880 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800881 GemmMicrokernelTester()
882 .mr(4)
883 .nr(4)
884 .kr(1)
885 .sr(1)
886 .m(m)
887 .n(n)
888 .k(k)
889 .iterations(1)
890 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
891 }
892 }
893 }
894 }
895
896 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4) {
897 for (uint32_t n = 5; n < 8; n++) {
898 for (size_t k = 1; k <= 5; k += 2) {
899 GemmMicrokernelTester()
900 .mr(4)
901 .nr(4)
902 .kr(1)
903 .sr(1)
904 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800905 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800906 .k(k)
907 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
908 }
909 }
910 }
911
912 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_strided_cn) {
913 for (uint32_t n = 5; n < 8; n++) {
914 for (size_t k = 1; k <= 5; k += 2) {
915 GemmMicrokernelTester()
916 .mr(4)
917 .nr(4)
918 .kr(1)
919 .sr(1)
920 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800921 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800922 .k(k)
923 .cn_stride(7)
924 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
925 }
926 }
927 }
928
929 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_subtile) {
930 for (uint32_t n = 5; n < 8; n++) {
931 for (size_t k = 1; k <= 5; k += 2) {
932 for (uint32_t m = 1; m <= 4; m++) {
933 GemmMicrokernelTester()
934 .mr(4)
935 .nr(4)
936 .kr(1)
937 .sr(1)
938 .m(m)
939 .n(n)
940 .k(k)
941 .iterations(1)
942 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
943 }
944 }
945 }
946 }
947
948 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4) {
949 for (uint32_t n = 8; n <= 12; n += 4) {
950 for (size_t k = 1; k <= 5; k += 2) {
951 GemmMicrokernelTester()
952 .mr(4)
953 .nr(4)
954 .kr(1)
955 .sr(1)
956 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800957 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800958 .k(k)
959 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
960 }
961 }
962 }
963
964 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_strided_cn) {
965 for (uint32_t n = 8; n <= 12; n += 4) {
966 for (size_t k = 1; k <= 5; k += 2) {
967 GemmMicrokernelTester()
968 .mr(4)
969 .nr(4)
970 .kr(1)
971 .sr(1)
972 .m(4)
973 .n(n)
974 .k(k)
975 .cn_stride(7)
976 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
977 }
978 }
979 }
980
981 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_subtile) {
982 for (uint32_t n = 8; n <= 12; n += 4) {
983 for (size_t k = 1; k <= 5; k += 2) {
984 for (uint32_t m = 1; m <= 4; m++) {
985 GemmMicrokernelTester()
986 .mr(4)
987 .nr(4)
988 .kr(1)
989 .sr(1)
990 .m(m)
991 .n(n)
992 .k(k)
993 .iterations(1)
994 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
995 }
996 }
997 }
998 }
999
1000 TEST(F32_IGEMM_RELU_4X4__WASM, small_kernel) {
1001 for (size_t k = 1; k <= 5; k += 2) {
1002 GemmMicrokernelTester()
1003 .mr(4)
1004 .nr(4)
1005 .kr(1)
1006 .sr(1)
1007 .m(4)
1008 .n(4)
1009 .k(k)
1010 .ks(3)
1011 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1012 }
1013 }
1014
1015 TEST(F32_IGEMM_RELU_4X4__WASM, small_kernel_subtile) {
1016 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001017 for (uint32_t n = 1; n <= 4; n++) {
1018 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001019 GemmMicrokernelTester()
1020 .mr(4)
1021 .nr(4)
1022 .kr(1)
1023 .sr(1)
1024 .m(m)
1025 .n(n)
1026 .k(k)
1027 .ks(3)
1028 .iterations(1)
1029 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1030 }
1031 }
1032 }
1033 }
1034
1035 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_small_kernel) {
1036 for (uint32_t n = 5; n < 8; n++) {
1037 for (size_t k = 1; k <= 5; k += 2) {
1038 GemmMicrokernelTester()
1039 .mr(4)
1040 .nr(4)
1041 .kr(1)
1042 .sr(1)
1043 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001044 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001045 .k(k)
1046 .ks(3)
1047 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1048 }
1049 }
1050 }
1051
1052 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_small_kernel) {
1053 for (uint32_t n = 8; n <= 12; n += 4) {
1054 for (size_t k = 1; k <= 5; k += 2) {
1055 GemmMicrokernelTester()
1056 .mr(4)
1057 .nr(4)
1058 .kr(1)
1059 .sr(1)
1060 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001061 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001062 .k(k)
1063 .ks(3)
1064 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1065 }
1066 }
1067 }
1068
1069 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cm_subtile) {
1070 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001071 for (uint32_t n = 1; n <= 4; n++) {
1072 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001073 GemmMicrokernelTester()
1074 .mr(4)
1075 .nr(4)
1076 .kr(1)
1077 .sr(1)
1078 .m(m)
1079 .n(n)
1080 .k(k)
1081 .cm_stride(7)
1082 .iterations(1)
1083 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1084 }
1085 }
1086 }
1087 }
1088
1089 TEST(F32_IGEMM_RELU_4X4__WASM, a_offset) {
1090 for (size_t k = 1; k <= 5; k += 2) {
1091 GemmMicrokernelTester()
1092 .mr(4)
1093 .nr(4)
1094 .kr(1)
1095 .sr(1)
1096 .m(4)
1097 .n(4)
1098 .k(k)
1099 .ks(3)
1100 .a_offset(23)
1101 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1102 }
1103 }
1104
1105 TEST(F32_IGEMM_RELU_4X4__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001106 for (size_t k = 1; k <= 5; k += 2) {
1107 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001108 GemmMicrokernelTester()
1109 .mr(4)
1110 .nr(4)
1111 .kr(1)
1112 .sr(1)
1113 .m(4)
1114 .n(4)
1115 .k(k)
1116 .ks(3)
1117 .a_offset(23)
1118 .zero_index(mz)
1119 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1120 }
1121 }
1122 }
1123
1124 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cm) {
1125 GemmMicrokernelTester()
1126 .mr(4)
1127 .nr(4)
1128 .kr(1)
1129 .sr(1)
1130 .m(4)
1131 .n(4)
1132 .k(1)
1133 .cm_stride(7)
1134 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
1135 }
1136#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1137
1138
1139#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1140 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1) {
1141 GemmMicrokernelTester()
1142 .mr(4)
1143 .nr(2)
1144 .kr(1)
1145 .sr(1)
1146 .m(4)
1147 .n(2)
1148 .k(1)
1149 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1150 }
1151
1152 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cn) {
1153 GemmMicrokernelTester()
1154 .mr(4)
1155 .nr(2)
1156 .kr(1)
1157 .sr(1)
1158 .m(4)
1159 .n(2)
1160 .k(1)
1161 .cn_stride(5)
1162 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1163 }
1164
1165 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001166 for (uint32_t n = 1; n <= 2; n++) {
1167 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001168 GemmMicrokernelTester()
1169 .mr(4)
1170 .nr(2)
1171 .kr(1)
1172 .sr(1)
1173 .m(m)
1174 .n(n)
1175 .k(1)
1176 .iterations(1)
1177 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1178 }
1179 }
1180 }
1181
1182 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile_m) {
1183 for (uint32_t m = 1; m <= 4; m++) {
1184 GemmMicrokernelTester()
1185 .mr(4)
1186 .nr(2)
1187 .kr(1)
1188 .sr(1)
1189 .m(m)
1190 .n(2)
1191 .k(1)
1192 .iterations(1)
1193 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1194 }
1195 }
1196
1197 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile_n) {
1198 for (uint32_t n = 1; n <= 2; n++) {
1199 GemmMicrokernelTester()
1200 .mr(4)
1201 .nr(2)
1202 .kr(1)
1203 .sr(1)
1204 .m(4)
1205 .n(n)
1206 .k(1)
1207 .iterations(1)
1208 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1209 }
1210 }
1211
1212 TEST(F32_IGEMM_RELU_4X2__WASM, k_gt_1) {
1213 for (size_t k = 2; k < 10; k++) {
1214 GemmMicrokernelTester()
1215 .mr(4)
1216 .nr(2)
1217 .kr(1)
1218 .sr(1)
1219 .m(4)
1220 .n(2)
1221 .k(k)
1222 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1223 }
1224 }
1225
1226 TEST(F32_IGEMM_RELU_4X2__WASM, k_gt_1_subtile) {
1227 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001228 for (uint32_t n = 1; n <= 2; n++) {
1229 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001230 GemmMicrokernelTester()
1231 .mr(4)
1232 .nr(2)
1233 .kr(1)
1234 .sr(1)
1235 .m(m)
1236 .n(n)
1237 .k(k)
1238 .iterations(1)
1239 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1240 }
1241 }
1242 }
1243 }
1244
1245 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2) {
1246 for (uint32_t n = 3; n < 4; n++) {
1247 for (size_t k = 1; k <= 5; k += 2) {
1248 GemmMicrokernelTester()
1249 .mr(4)
1250 .nr(2)
1251 .kr(1)
1252 .sr(1)
1253 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001254 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001255 .k(k)
1256 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1257 }
1258 }
1259 }
1260
1261 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_strided_cn) {
1262 for (uint32_t n = 3; n < 4; n++) {
1263 for (size_t k = 1; k <= 5; k += 2) {
1264 GemmMicrokernelTester()
1265 .mr(4)
1266 .nr(2)
1267 .kr(1)
1268 .sr(1)
1269 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001270 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001271 .k(k)
1272 .cn_stride(5)
1273 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1274 }
1275 }
1276 }
1277
1278 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_subtile) {
1279 for (uint32_t n = 3; n < 4; n++) {
1280 for (size_t k = 1; k <= 5; k += 2) {
1281 for (uint32_t m = 1; m <= 4; m++) {
1282 GemmMicrokernelTester()
1283 .mr(4)
1284 .nr(2)
1285 .kr(1)
1286 .sr(1)
1287 .m(m)
1288 .n(n)
1289 .k(k)
1290 .iterations(1)
1291 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1292 }
1293 }
1294 }
1295 }
1296
1297 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2) {
1298 for (uint32_t n = 4; n <= 6; n += 2) {
1299 for (size_t k = 1; k <= 5; k += 2) {
1300 GemmMicrokernelTester()
1301 .mr(4)
1302 .nr(2)
1303 .kr(1)
1304 .sr(1)
1305 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001306 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001307 .k(k)
1308 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1309 }
1310 }
1311 }
1312
1313 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_strided_cn) {
1314 for (uint32_t n = 4; n <= 6; n += 2) {
1315 for (size_t k = 1; k <= 5; k += 2) {
1316 GemmMicrokernelTester()
1317 .mr(4)
1318 .nr(2)
1319 .kr(1)
1320 .sr(1)
1321 .m(4)
1322 .n(n)
1323 .k(k)
1324 .cn_stride(5)
1325 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1326 }
1327 }
1328 }
1329
1330 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_subtile) {
1331 for (uint32_t n = 4; n <= 6; n += 2) {
1332 for (size_t k = 1; k <= 5; k += 2) {
1333 for (uint32_t m = 1; m <= 4; m++) {
1334 GemmMicrokernelTester()
1335 .mr(4)
1336 .nr(2)
1337 .kr(1)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .iterations(1)
1343 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1344 }
1345 }
1346 }
1347 }
1348
1349 TEST(F32_IGEMM_RELU_4X2__WASM, small_kernel) {
1350 for (size_t k = 1; k <= 5; k += 2) {
1351 GemmMicrokernelTester()
1352 .mr(4)
1353 .nr(2)
1354 .kr(1)
1355 .sr(1)
1356 .m(4)
1357 .n(2)
1358 .k(k)
1359 .ks(3)
1360 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1361 }
1362 }
1363
1364 TEST(F32_IGEMM_RELU_4X2__WASM, small_kernel_subtile) {
1365 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001366 for (uint32_t n = 1; n <= 2; n++) {
1367 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001368 GemmMicrokernelTester()
1369 .mr(4)
1370 .nr(2)
1371 .kr(1)
1372 .sr(1)
1373 .m(m)
1374 .n(n)
1375 .k(k)
1376 .ks(3)
1377 .iterations(1)
1378 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1379 }
1380 }
1381 }
1382 }
1383
1384 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_small_kernel) {
1385 for (uint32_t n = 3; n < 4; n++) {
1386 for (size_t k = 1; k <= 5; k += 2) {
1387 GemmMicrokernelTester()
1388 .mr(4)
1389 .nr(2)
1390 .kr(1)
1391 .sr(1)
1392 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001393 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001394 .k(k)
1395 .ks(3)
1396 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1397 }
1398 }
1399 }
1400
1401 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_small_kernel) {
1402 for (uint32_t n = 4; n <= 6; n += 2) {
1403 for (size_t k = 1; k <= 5; k += 2) {
1404 GemmMicrokernelTester()
1405 .mr(4)
1406 .nr(2)
1407 .kr(1)
1408 .sr(1)
1409 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001410 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001411 .k(k)
1412 .ks(3)
1413 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1414 }
1415 }
1416 }
1417
1418 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cm_subtile) {
1419 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001420 for (uint32_t n = 1; n <= 2; n++) {
1421 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001422 GemmMicrokernelTester()
1423 .mr(4)
1424 .nr(2)
1425 .kr(1)
1426 .sr(1)
1427 .m(m)
1428 .n(n)
1429 .k(k)
1430 .cm_stride(5)
1431 .iterations(1)
1432 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1433 }
1434 }
1435 }
1436 }
1437
1438 TEST(F32_IGEMM_RELU_4X2__WASM, a_offset) {
1439 for (size_t k = 1; k <= 5; k += 2) {
1440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(2)
1443 .kr(1)
1444 .sr(1)
1445 .m(4)
1446 .n(2)
1447 .k(k)
1448 .ks(3)
1449 .a_offset(23)
1450 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1451 }
1452 }
1453
1454 TEST(F32_IGEMM_RELU_4X2__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001455 for (size_t k = 1; k <= 5; k += 2) {
1456 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001457 GemmMicrokernelTester()
1458 .mr(4)
1459 .nr(2)
1460 .kr(1)
1461 .sr(1)
1462 .m(4)
1463 .n(2)
1464 .k(k)
1465 .ks(3)
1466 .a_offset(23)
1467 .zero_index(mz)
1468 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1469 }
1470 }
1471 }
1472
1473 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cm) {
1474 GemmMicrokernelTester()
1475 .mr(4)
1476 .nr(2)
1477 .kr(1)
1478 .sr(1)
1479 .m(4)
1480 .n(2)
1481 .k(1)
1482 .cm_stride(5)
1483 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
1484 }
1485#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1486
1487
1488TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1) {
1489 GemmMicrokernelTester()
1490 .mr(1)
1491 .nr(4)
1492 .kr(1)
1493 .sr(1)
1494 .m(1)
1495 .n(4)
1496 .k(1)
1497 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1498}
1499
1500TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cn) {
1501 GemmMicrokernelTester()
1502 .mr(1)
1503 .nr(4)
1504 .kr(1)
1505 .sr(1)
1506 .m(1)
1507 .n(4)
1508 .k(1)
1509 .cn_stride(7)
1510 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1511}
1512
1513TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001514 for (uint32_t n = 1; n <= 4; n++) {
1515 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001516 GemmMicrokernelTester()
1517 .mr(1)
1518 .nr(4)
1519 .kr(1)
1520 .sr(1)
1521 .m(m)
1522 .n(n)
1523 .k(1)
1524 .iterations(1)
1525 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1526 }
1527 }
1528}
1529
1530TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile_m) {
1531 for (uint32_t m = 1; m <= 1; m++) {
1532 GemmMicrokernelTester()
1533 .mr(1)
1534 .nr(4)
1535 .kr(1)
1536 .sr(1)
1537 .m(m)
1538 .n(4)
1539 .k(1)
1540 .iterations(1)
1541 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1542 }
1543}
1544
1545TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile_n) {
1546 for (uint32_t n = 1; n <= 4; n++) {
1547 GemmMicrokernelTester()
1548 .mr(1)
1549 .nr(4)
1550 .kr(1)
1551 .sr(1)
1552 .m(1)
1553 .n(n)
1554 .k(1)
1555 .iterations(1)
1556 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1557 }
1558}
1559
1560TEST(F32_IGEMM_RELU_1X4__SCALAR, k_gt_1) {
1561 for (size_t k = 2; k < 10; k++) {
1562 GemmMicrokernelTester()
1563 .mr(1)
1564 .nr(4)
1565 .kr(1)
1566 .sr(1)
1567 .m(1)
1568 .n(4)
1569 .k(k)
1570 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1571 }
1572}
1573
1574TEST(F32_IGEMM_RELU_1X4__SCALAR, k_gt_1_subtile) {
1575 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001576 for (uint32_t n = 1; n <= 4; n++) {
1577 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001578 GemmMicrokernelTester()
1579 .mr(1)
1580 .nr(4)
1581 .kr(1)
1582 .sr(1)
1583 .m(m)
1584 .n(n)
1585 .k(k)
1586 .iterations(1)
1587 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1588 }
1589 }
1590 }
1591}
1592
1593TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4) {
1594 for (uint32_t n = 5; n < 8; n++) {
1595 for (size_t k = 1; k <= 5; k += 2) {
1596 GemmMicrokernelTester()
1597 .mr(1)
1598 .nr(4)
1599 .kr(1)
1600 .sr(1)
1601 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001602 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001603 .k(k)
1604 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1605 }
1606 }
1607}
1608
1609TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_strided_cn) {
1610 for (uint32_t n = 5; n < 8; n++) {
1611 for (size_t k = 1; k <= 5; k += 2) {
1612 GemmMicrokernelTester()
1613 .mr(1)
1614 .nr(4)
1615 .kr(1)
1616 .sr(1)
1617 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001618 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001619 .k(k)
1620 .cn_stride(7)
1621 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1622 }
1623 }
1624}
1625
1626TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_subtile) {
1627 for (uint32_t n = 5; n < 8; n++) {
1628 for (size_t k = 1; k <= 5; k += 2) {
1629 for (uint32_t m = 1; m <= 1; m++) {
1630 GemmMicrokernelTester()
1631 .mr(1)
1632 .nr(4)
1633 .kr(1)
1634 .sr(1)
1635 .m(m)
1636 .n(n)
1637 .k(k)
1638 .iterations(1)
1639 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1640 }
1641 }
1642 }
1643}
1644
1645TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4) {
1646 for (uint32_t n = 8; n <= 12; n += 4) {
1647 for (size_t k = 1; k <= 5; k += 2) {
1648 GemmMicrokernelTester()
1649 .mr(1)
1650 .nr(4)
1651 .kr(1)
1652 .sr(1)
1653 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001654 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001655 .k(k)
1656 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1657 }
1658 }
1659}
1660
1661TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_strided_cn) {
1662 for (uint32_t n = 8; n <= 12; n += 4) {
1663 for (size_t k = 1; k <= 5; k += 2) {
1664 GemmMicrokernelTester()
1665 .mr(1)
1666 .nr(4)
1667 .kr(1)
1668 .sr(1)
1669 .m(1)
1670 .n(n)
1671 .k(k)
1672 .cn_stride(7)
1673 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1674 }
1675 }
1676}
1677
1678TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_subtile) {
1679 for (uint32_t n = 8; n <= 12; n += 4) {
1680 for (size_t k = 1; k <= 5; k += 2) {
1681 for (uint32_t m = 1; m <= 1; m++) {
1682 GemmMicrokernelTester()
1683 .mr(1)
1684 .nr(4)
1685 .kr(1)
1686 .sr(1)
1687 .m(m)
1688 .n(n)
1689 .k(k)
1690 .iterations(1)
1691 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1692 }
1693 }
1694 }
1695}
1696
1697TEST(F32_IGEMM_RELU_1X4__SCALAR, small_kernel) {
1698 for (size_t k = 1; k <= 5; k += 2) {
1699 GemmMicrokernelTester()
1700 .mr(1)
1701 .nr(4)
1702 .kr(1)
1703 .sr(1)
1704 .m(1)
1705 .n(4)
1706 .k(k)
1707 .ks(3)
1708 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1709 }
1710}
1711
1712TEST(F32_IGEMM_RELU_1X4__SCALAR, small_kernel_subtile) {
1713 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001714 for (uint32_t n = 1; n <= 4; n++) {
1715 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001716 GemmMicrokernelTester()
1717 .mr(1)
1718 .nr(4)
1719 .kr(1)
1720 .sr(1)
1721 .m(m)
1722 .n(n)
1723 .k(k)
1724 .ks(3)
1725 .iterations(1)
1726 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1727 }
1728 }
1729 }
1730}
1731
1732TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_small_kernel) {
1733 for (uint32_t n = 5; n < 8; n++) {
1734 for (size_t k = 1; k <= 5; k += 2) {
1735 GemmMicrokernelTester()
1736 .mr(1)
1737 .nr(4)
1738 .kr(1)
1739 .sr(1)
1740 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001741 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001742 .k(k)
1743 .ks(3)
1744 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1745 }
1746 }
1747}
1748
1749TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_small_kernel) {
1750 for (uint32_t n = 8; n <= 12; n += 4) {
1751 for (size_t k = 1; k <= 5; k += 2) {
1752 GemmMicrokernelTester()
1753 .mr(1)
1754 .nr(4)
1755 .kr(1)
1756 .sr(1)
1757 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001758 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001759 .k(k)
1760 .ks(3)
1761 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1762 }
1763 }
1764}
1765
1766TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cm_subtile) {
1767 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001768 for (uint32_t n = 1; n <= 4; n++) {
1769 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001770 GemmMicrokernelTester()
1771 .mr(1)
1772 .nr(4)
1773 .kr(1)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .cm_stride(7)
1779 .iterations(1)
1780 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1781 }
1782 }
1783 }
1784}
1785
1786TEST(F32_IGEMM_RELU_1X4__SCALAR, a_offset) {
1787 for (size_t k = 1; k <= 5; k += 2) {
1788 GemmMicrokernelTester()
1789 .mr(1)
1790 .nr(4)
1791 .kr(1)
1792 .sr(1)
1793 .m(1)
1794 .n(4)
1795 .k(k)
1796 .ks(3)
1797 .a_offset(7)
1798 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1799 }
1800}
1801
1802TEST(F32_IGEMM_RELU_1X4__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001803 for (size_t k = 1; k <= 5; k += 2) {
1804 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001805 GemmMicrokernelTester()
1806 .mr(1)
1807 .nr(4)
1808 .kr(1)
1809 .sr(1)
1810 .m(1)
1811 .n(4)
1812 .k(k)
1813 .ks(3)
1814 .a_offset(7)
1815 .zero_index(mz)
1816 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1817 }
1818 }
1819}
1820
1821TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cm) {
1822 GemmMicrokernelTester()
1823 .mr(1)
1824 .nr(4)
1825 .kr(1)
1826 .sr(1)
1827 .m(1)
1828 .n(4)
1829 .k(1)
1830 .cm_stride(7)
1831 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
1832}
1833
1834
1835TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1) {
1836 GemmMicrokernelTester()
1837 .mr(4)
1838 .nr(4)
1839 .kr(1)
1840 .sr(1)
1841 .m(4)
1842 .n(4)
1843 .k(1)
1844 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1845}
1846
1847TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cn) {
1848 GemmMicrokernelTester()
1849 .mr(4)
1850 .nr(4)
1851 .kr(1)
1852 .sr(1)
1853 .m(4)
1854 .n(4)
1855 .k(1)
1856 .cn_stride(7)
1857 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1858}
1859
1860TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001861 for (uint32_t n = 1; n <= 4; n++) {
1862 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001863 GemmMicrokernelTester()
1864 .mr(4)
1865 .nr(4)
1866 .kr(1)
1867 .sr(1)
1868 .m(m)
1869 .n(n)
1870 .k(1)
1871 .iterations(1)
1872 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1873 }
1874 }
1875}
1876
1877TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile_m) {
1878 for (uint32_t m = 1; m <= 4; m++) {
1879 GemmMicrokernelTester()
1880 .mr(4)
1881 .nr(4)
1882 .kr(1)
1883 .sr(1)
1884 .m(m)
1885 .n(4)
1886 .k(1)
1887 .iterations(1)
1888 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1889 }
1890}
1891
1892TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile_n) {
1893 for (uint32_t n = 1; n <= 4; n++) {
1894 GemmMicrokernelTester()
1895 .mr(4)
1896 .nr(4)
1897 .kr(1)
1898 .sr(1)
1899 .m(4)
1900 .n(n)
1901 .k(1)
1902 .iterations(1)
1903 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1904 }
1905}
1906
1907TEST(F32_IGEMM_RELU_4X4__SCALAR, k_gt_1) {
1908 for (size_t k = 2; k < 10; k++) {
1909 GemmMicrokernelTester()
1910 .mr(4)
1911 .nr(4)
1912 .kr(1)
1913 .sr(1)
1914 .m(4)
1915 .n(4)
1916 .k(k)
1917 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1918 }
1919}
1920
1921TEST(F32_IGEMM_RELU_4X4__SCALAR, k_gt_1_subtile) {
1922 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001923 for (uint32_t n = 1; n <= 4; n++) {
1924 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001925 GemmMicrokernelTester()
1926 .mr(4)
1927 .nr(4)
1928 .kr(1)
1929 .sr(1)
1930 .m(m)
1931 .n(n)
1932 .k(k)
1933 .iterations(1)
1934 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1935 }
1936 }
1937 }
1938}
1939
1940TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4) {
1941 for (uint32_t n = 5; n < 8; n++) {
1942 for (size_t k = 1; k <= 5; k += 2) {
1943 GemmMicrokernelTester()
1944 .mr(4)
1945 .nr(4)
1946 .kr(1)
1947 .sr(1)
1948 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001949 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001950 .k(k)
1951 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1952 }
1953 }
1954}
1955
1956TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_strided_cn) {
1957 for (uint32_t n = 5; n < 8; n++) {
1958 for (size_t k = 1; k <= 5; k += 2) {
1959 GemmMicrokernelTester()
1960 .mr(4)
1961 .nr(4)
1962 .kr(1)
1963 .sr(1)
1964 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001965 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001966 .k(k)
1967 .cn_stride(7)
1968 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1969 }
1970 }
1971}
1972
1973TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_subtile) {
1974 for (uint32_t n = 5; n < 8; n++) {
1975 for (size_t k = 1; k <= 5; k += 2) {
1976 for (uint32_t m = 1; m <= 4; m++) {
1977 GemmMicrokernelTester()
1978 .mr(4)
1979 .nr(4)
1980 .kr(1)
1981 .sr(1)
1982 .m(m)
1983 .n(n)
1984 .k(k)
1985 .iterations(1)
1986 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
1987 }
1988 }
1989 }
1990}
1991
1992TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4) {
1993 for (uint32_t n = 8; n <= 12; n += 4) {
1994 for (size_t k = 1; k <= 5; k += 2) {
1995 GemmMicrokernelTester()
1996 .mr(4)
1997 .nr(4)
1998 .kr(1)
1999 .sr(1)
2000 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002001 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002002 .k(k)
2003 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2004 }
2005 }
2006}
2007
2008TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_strided_cn) {
2009 for (uint32_t n = 8; n <= 12; n += 4) {
2010 for (size_t k = 1; k <= 5; k += 2) {
2011 GemmMicrokernelTester()
2012 .mr(4)
2013 .nr(4)
2014 .kr(1)
2015 .sr(1)
2016 .m(4)
2017 .n(n)
2018 .k(k)
2019 .cn_stride(7)
2020 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2021 }
2022 }
2023}
2024
2025TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_subtile) {
2026 for (uint32_t n = 8; n <= 12; n += 4) {
2027 for (size_t k = 1; k <= 5; k += 2) {
2028 for (uint32_t m = 1; m <= 4; m++) {
2029 GemmMicrokernelTester()
2030 .mr(4)
2031 .nr(4)
2032 .kr(1)
2033 .sr(1)
2034 .m(m)
2035 .n(n)
2036 .k(k)
2037 .iterations(1)
2038 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2039 }
2040 }
2041 }
2042}
2043
2044TEST(F32_IGEMM_RELU_4X4__SCALAR, small_kernel) {
2045 for (size_t k = 1; k <= 5; k += 2) {
2046 GemmMicrokernelTester()
2047 .mr(4)
2048 .nr(4)
2049 .kr(1)
2050 .sr(1)
2051 .m(4)
2052 .n(4)
2053 .k(k)
2054 .ks(3)
2055 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2056 }
2057}
2058
2059TEST(F32_IGEMM_RELU_4X4__SCALAR, small_kernel_subtile) {
2060 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002061 for (uint32_t n = 1; n <= 4; n++) {
2062 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002063 GemmMicrokernelTester()
2064 .mr(4)
2065 .nr(4)
2066 .kr(1)
2067 .sr(1)
2068 .m(m)
2069 .n(n)
2070 .k(k)
2071 .ks(3)
2072 .iterations(1)
2073 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2074 }
2075 }
2076 }
2077}
2078
2079TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_small_kernel) {
2080 for (uint32_t n = 5; n < 8; n++) {
2081 for (size_t k = 1; k <= 5; k += 2) {
2082 GemmMicrokernelTester()
2083 .mr(4)
2084 .nr(4)
2085 .kr(1)
2086 .sr(1)
2087 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002088 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002089 .k(k)
2090 .ks(3)
2091 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2092 }
2093 }
2094}
2095
2096TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_small_kernel) {
2097 for (uint32_t n = 8; n <= 12; n += 4) {
2098 for (size_t k = 1; k <= 5; k += 2) {
2099 GemmMicrokernelTester()
2100 .mr(4)
2101 .nr(4)
2102 .kr(1)
2103 .sr(1)
2104 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002105 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002106 .k(k)
2107 .ks(3)
2108 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2109 }
2110 }
2111}
2112
2113TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cm_subtile) {
2114 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002115 for (uint32_t n = 1; n <= 4; n++) {
2116 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002117 GemmMicrokernelTester()
2118 .mr(4)
2119 .nr(4)
2120 .kr(1)
2121 .sr(1)
2122 .m(m)
2123 .n(n)
2124 .k(k)
2125 .cm_stride(7)
2126 .iterations(1)
2127 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2128 }
2129 }
2130 }
2131}
2132
2133TEST(F32_IGEMM_RELU_4X4__SCALAR, a_offset) {
2134 for (size_t k = 1; k <= 5; k += 2) {
2135 GemmMicrokernelTester()
2136 .mr(4)
2137 .nr(4)
2138 .kr(1)
2139 .sr(1)
2140 .m(4)
2141 .n(4)
2142 .k(k)
2143 .ks(3)
2144 .a_offset(23)
2145 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2146 }
2147}
2148
2149TEST(F32_IGEMM_RELU_4X4__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002150 for (size_t k = 1; k <= 5; k += 2) {
2151 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002152 GemmMicrokernelTester()
2153 .mr(4)
2154 .nr(4)
2155 .kr(1)
2156 .sr(1)
2157 .m(4)
2158 .n(4)
2159 .k(k)
2160 .ks(3)
2161 .a_offset(23)
2162 .zero_index(mz)
2163 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2164 }
2165 }
2166}
2167
2168TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cm) {
2169 GemmMicrokernelTester()
2170 .mr(4)
2171 .nr(4)
2172 .kr(1)
2173 .sr(1)
2174 .m(4)
2175 .n(4)
2176 .k(1)
2177 .cm_stride(7)
2178 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
2179}
2180
2181
2182TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1) {
2183 GemmMicrokernelTester()
2184 .mr(4)
2185 .nr(2)
2186 .kr(1)
2187 .sr(1)
2188 .m(4)
2189 .n(2)
2190 .k(1)
2191 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2192}
2193
2194TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cn) {
2195 GemmMicrokernelTester()
2196 .mr(4)
2197 .nr(2)
2198 .kr(1)
2199 .sr(1)
2200 .m(4)
2201 .n(2)
2202 .k(1)
2203 .cn_stride(5)
2204 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2205}
2206
2207TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002208 for (uint32_t n = 1; n <= 2; n++) {
2209 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002210 GemmMicrokernelTester()
2211 .mr(4)
2212 .nr(2)
2213 .kr(1)
2214 .sr(1)
2215 .m(m)
2216 .n(n)
2217 .k(1)
2218 .iterations(1)
2219 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2220 }
2221 }
2222}
2223
2224TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile_m) {
2225 for (uint32_t m = 1; m <= 4; m++) {
2226 GemmMicrokernelTester()
2227 .mr(4)
2228 .nr(2)
2229 .kr(1)
2230 .sr(1)
2231 .m(m)
2232 .n(2)
2233 .k(1)
2234 .iterations(1)
2235 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2236 }
2237}
2238
2239TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile_n) {
2240 for (uint32_t n = 1; n <= 2; n++) {
2241 GemmMicrokernelTester()
2242 .mr(4)
2243 .nr(2)
2244 .kr(1)
2245 .sr(1)
2246 .m(4)
2247 .n(n)
2248 .k(1)
2249 .iterations(1)
2250 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2251 }
2252}
2253
2254TEST(F32_IGEMM_RELU_4X2__SCALAR, k_gt_1) {
2255 for (size_t k = 2; k < 10; k++) {
2256 GemmMicrokernelTester()
2257 .mr(4)
2258 .nr(2)
2259 .kr(1)
2260 .sr(1)
2261 .m(4)
2262 .n(2)
2263 .k(k)
2264 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2265 }
2266}
2267
2268TEST(F32_IGEMM_RELU_4X2__SCALAR, k_gt_1_subtile) {
2269 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002270 for (uint32_t n = 1; n <= 2; n++) {
2271 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002272 GemmMicrokernelTester()
2273 .mr(4)
2274 .nr(2)
2275 .kr(1)
2276 .sr(1)
2277 .m(m)
2278 .n(n)
2279 .k(k)
2280 .iterations(1)
2281 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2282 }
2283 }
2284 }
2285}
2286
2287TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2) {
2288 for (uint32_t n = 3; n < 4; n++) {
2289 for (size_t k = 1; k <= 5; k += 2) {
2290 GemmMicrokernelTester()
2291 .mr(4)
2292 .nr(2)
2293 .kr(1)
2294 .sr(1)
2295 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002296 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002297 .k(k)
2298 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2299 }
2300 }
2301}
2302
2303TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_strided_cn) {
2304 for (uint32_t n = 3; n < 4; n++) {
2305 for (size_t k = 1; k <= 5; k += 2) {
2306 GemmMicrokernelTester()
2307 .mr(4)
2308 .nr(2)
2309 .kr(1)
2310 .sr(1)
2311 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002312 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002313 .k(k)
2314 .cn_stride(5)
2315 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2316 }
2317 }
2318}
2319
2320TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_subtile) {
2321 for (uint32_t n = 3; n < 4; n++) {
2322 for (size_t k = 1; k <= 5; k += 2) {
2323 for (uint32_t m = 1; m <= 4; m++) {
2324 GemmMicrokernelTester()
2325 .mr(4)
2326 .nr(2)
2327 .kr(1)
2328 .sr(1)
2329 .m(m)
2330 .n(n)
2331 .k(k)
2332 .iterations(1)
2333 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2334 }
2335 }
2336 }
2337}
2338
2339TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2) {
2340 for (uint32_t n = 4; n <= 6; n += 2) {
2341 for (size_t k = 1; k <= 5; k += 2) {
2342 GemmMicrokernelTester()
2343 .mr(4)
2344 .nr(2)
2345 .kr(1)
2346 .sr(1)
2347 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002348 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002349 .k(k)
2350 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2351 }
2352 }
2353}
2354
2355TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_strided_cn) {
2356 for (uint32_t n = 4; n <= 6; n += 2) {
2357 for (size_t k = 1; k <= 5; k += 2) {
2358 GemmMicrokernelTester()
2359 .mr(4)
2360 .nr(2)
2361 .kr(1)
2362 .sr(1)
2363 .m(4)
2364 .n(n)
2365 .k(k)
2366 .cn_stride(5)
2367 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2368 }
2369 }
2370}
2371
2372TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_subtile) {
2373 for (uint32_t n = 4; n <= 6; n += 2) {
2374 for (size_t k = 1; k <= 5; k += 2) {
2375 for (uint32_t m = 1; m <= 4; m++) {
2376 GemmMicrokernelTester()
2377 .mr(4)
2378 .nr(2)
2379 .kr(1)
2380 .sr(1)
2381 .m(m)
2382 .n(n)
2383 .k(k)
2384 .iterations(1)
2385 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2386 }
2387 }
2388 }
2389}
2390
2391TEST(F32_IGEMM_RELU_4X2__SCALAR, small_kernel) {
2392 for (size_t k = 1; k <= 5; k += 2) {
2393 GemmMicrokernelTester()
2394 .mr(4)
2395 .nr(2)
2396 .kr(1)
2397 .sr(1)
2398 .m(4)
2399 .n(2)
2400 .k(k)
2401 .ks(3)
2402 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2403 }
2404}
2405
2406TEST(F32_IGEMM_RELU_4X2__SCALAR, small_kernel_subtile) {
2407 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002408 for (uint32_t n = 1; n <= 2; n++) {
2409 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002410 GemmMicrokernelTester()
2411 .mr(4)
2412 .nr(2)
2413 .kr(1)
2414 .sr(1)
2415 .m(m)
2416 .n(n)
2417 .k(k)
2418 .ks(3)
2419 .iterations(1)
2420 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2421 }
2422 }
2423 }
2424}
2425
2426TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_small_kernel) {
2427 for (uint32_t n = 3; n < 4; n++) {
2428 for (size_t k = 1; k <= 5; k += 2) {
2429 GemmMicrokernelTester()
2430 .mr(4)
2431 .nr(2)
2432 .kr(1)
2433 .sr(1)
2434 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002435 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002436 .k(k)
2437 .ks(3)
2438 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2439 }
2440 }
2441}
2442
2443TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_small_kernel) {
2444 for (uint32_t n = 4; n <= 6; n += 2) {
2445 for (size_t k = 1; k <= 5; k += 2) {
2446 GemmMicrokernelTester()
2447 .mr(4)
2448 .nr(2)
2449 .kr(1)
2450 .sr(1)
2451 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002452 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002453 .k(k)
2454 .ks(3)
2455 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2456 }
2457 }
2458}
2459
2460TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cm_subtile) {
2461 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002462 for (uint32_t n = 1; n <= 2; n++) {
2463 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002464 GemmMicrokernelTester()
2465 .mr(4)
2466 .nr(2)
2467 .kr(1)
2468 .sr(1)
2469 .m(m)
2470 .n(n)
2471 .k(k)
2472 .cm_stride(5)
2473 .iterations(1)
2474 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2475 }
2476 }
2477 }
2478}
2479
2480TEST(F32_IGEMM_RELU_4X2__SCALAR, a_offset) {
2481 for (size_t k = 1; k <= 5; k += 2) {
2482 GemmMicrokernelTester()
2483 .mr(4)
2484 .nr(2)
2485 .kr(1)
2486 .sr(1)
2487 .m(4)
2488 .n(2)
2489 .k(k)
2490 .ks(3)
2491 .a_offset(23)
2492 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2493 }
2494}
2495
2496TEST(F32_IGEMM_RELU_4X2__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002497 for (size_t k = 1; k <= 5; k += 2) {
2498 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002499 GemmMicrokernelTester()
2500 .mr(4)
2501 .nr(2)
2502 .kr(1)
2503 .sr(1)
2504 .m(4)
2505 .n(2)
2506 .k(k)
2507 .ks(3)
2508 .a_offset(23)
2509 .zero_index(mz)
2510 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2511 }
2512 }
2513}
2514
2515TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cm) {
2516 GemmMicrokernelTester()
2517 .mr(4)
2518 .nr(2)
2519 .kr(1)
2520 .sr(1)
2521 .m(4)
2522 .n(2)
2523 .k(1)
2524 .cm_stride(5)
2525 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
2526}