blob: 600bcf2e8c7d04c6518af1d25a677890e4587646 [file] [log] [blame]
Marat Dukhan467f6362020-05-22 23:21:55 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-igemm-relu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhan467f6362020-05-22 23:21:55 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Marat Dukhan4c617792021-12-21 15:47:58 -080026#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -070027 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -070028 GemmMicrokernelTester()
29 .mr(1)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(1)
34 .n(8)
35 .k(4)
Marat Dukhan688f6d82020-07-14 17:02:11 -070036 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070037 }
38
Marat Dukhan688f6d82020-07-14 17:02:11 -070039 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -070040 GemmMicrokernelTester()
41 .mr(1)
42 .nr(8)
43 .kr(1)
44 .sr(1)
45 .m(1)
46 .n(8)
47 .k(4)
48 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -070049 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070050 }
51
Marat Dukhan688f6d82020-07-14 17:02:11 -070052 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080053 for (uint32_t n = 1; n <= 8; n++) {
54 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -070055 GemmMicrokernelTester()
56 .mr(1)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(m)
61 .n(n)
62 .k(4)
63 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -070064 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070065 }
66 }
67 }
68
Marat Dukhan688f6d82020-07-14 17:02:11 -070069 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan802808c2020-06-16 11:01:17 -070070 for (uint32_t m = 1; m <= 1; m++) {
71 GemmMicrokernelTester()
72 .mr(1)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(8)
78 .k(4)
79 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -070080 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070081 }
82 }
83
Marat Dukhan688f6d82020-07-14 17:02:11 -070084 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan802808c2020-06-16 11:01:17 -070085 for (uint32_t n = 1; n <= 8; n++) {
86 GemmMicrokernelTester()
87 .mr(1)
88 .nr(8)
89 .kr(1)
90 .sr(1)
91 .m(1)
92 .n(n)
93 .k(4)
94 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -070095 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070096 }
97 }
98
Marat Dukhan688f6d82020-07-14 17:02:11 -070099 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700100 for (size_t k = 1; k < 4; k++) {
101 GemmMicrokernelTester()
102 .mr(1)
103 .nr(8)
104 .kr(1)
105 .sr(1)
106 .m(1)
107 .n(8)
108 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700109 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700110 }
111 }
112
Marat Dukhan688f6d82020-07-14 17:02:11 -0700113 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700114 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800115 for (uint32_t n = 1; n <= 8; n++) {
116 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700117 GemmMicrokernelTester()
118 .mr(1)
119 .nr(8)
120 .kr(1)
121 .sr(1)
122 .m(m)
123 .n(n)
124 .k(k)
125 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700126 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700127 }
128 }
129 }
130 }
131
Marat Dukhan688f6d82020-07-14 17:02:11 -0700132 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700133 for (size_t k = 5; k < 8; k++) {
134 GemmMicrokernelTester()
135 .mr(1)
136 .nr(8)
137 .kr(1)
138 .sr(1)
139 .m(1)
140 .n(8)
141 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700142 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700143 }
144 }
145
Marat Dukhan688f6d82020-07-14 17:02:11 -0700146 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700147 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800148 for (uint32_t n = 1; n <= 8; n++) {
149 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700150 GemmMicrokernelTester()
151 .mr(1)
152 .nr(8)
153 .kr(1)
154 .sr(1)
155 .m(m)
156 .n(n)
157 .k(k)
158 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700159 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700160 }
161 }
162 }
163 }
164
Marat Dukhan688f6d82020-07-14 17:02:11 -0700165 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700166 for (size_t k = 8; k <= 40; k += 4) {
167 GemmMicrokernelTester()
168 .mr(1)
169 .nr(8)
170 .kr(1)
171 .sr(1)
172 .m(1)
173 .n(8)
174 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700175 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700176 }
177 }
178
Marat Dukhan688f6d82020-07-14 17:02:11 -0700179 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700180 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800181 for (uint32_t n = 1; n <= 8; n++) {
182 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700183 GemmMicrokernelTester()
184 .mr(1)
185 .nr(8)
186 .kr(1)
187 .sr(1)
188 .m(m)
189 .n(n)
190 .k(k)
191 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700192 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700193 }
194 }
195 }
196 }
197
Marat Dukhan688f6d82020-07-14 17:02:11 -0700198 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700199 for (uint32_t n = 9; n < 16; n++) {
200 for (size_t k = 1; k <= 20; k += 5) {
201 GemmMicrokernelTester()
202 .mr(1)
203 .nr(8)
204 .kr(1)
205 .sr(1)
206 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800207 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -0700208 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700209 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700210 }
211 }
212 }
213
Marat Dukhan688f6d82020-07-14 17:02:11 -0700214 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700215 for (uint32_t n = 9; n < 16; n++) {
216 for (size_t k = 1; k <= 20; k += 5) {
217 GemmMicrokernelTester()
218 .mr(1)
219 .nr(8)
220 .kr(1)
221 .sr(1)
222 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800223 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -0700224 .k(k)
225 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700226 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700227 }
228 }
229 }
230
Marat Dukhan688f6d82020-07-14 17:02:11 -0700231 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700232 for (uint32_t n = 9; n < 16; n++) {
233 for (size_t k = 1; k <= 20; k += 5) {
234 for (uint32_t m = 1; m <= 1; m++) {
235 GemmMicrokernelTester()
236 .mr(1)
237 .nr(8)
238 .kr(1)
239 .sr(1)
240 .m(m)
241 .n(n)
242 .k(k)
243 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700244 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700245 }
246 }
247 }
248 }
249
Marat Dukhan688f6d82020-07-14 17:02:11 -0700250 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700251 for (uint32_t n = 16; n <= 24; n += 8) {
252 for (size_t k = 1; k <= 20; k += 5) {
253 GemmMicrokernelTester()
254 .mr(1)
255 .nr(8)
256 .kr(1)
257 .sr(1)
258 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800259 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -0700260 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700261 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700262 }
263 }
264 }
265
Marat Dukhan688f6d82020-07-14 17:02:11 -0700266 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700267 for (uint32_t n = 16; n <= 24; n += 8) {
268 for (size_t k = 1; k <= 20; k += 5) {
269 GemmMicrokernelTester()
270 .mr(1)
271 .nr(8)
272 .kr(1)
273 .sr(1)
274 .m(1)
275 .n(n)
276 .k(k)
277 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700278 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700279 }
280 }
281 }
282
Marat Dukhan688f6d82020-07-14 17:02:11 -0700283 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700284 for (uint32_t n = 16; n <= 24; n += 8) {
285 for (size_t k = 1; k <= 20; k += 5) {
286 for (uint32_t m = 1; m <= 1; m++) {
287 GemmMicrokernelTester()
288 .mr(1)
289 .nr(8)
290 .kr(1)
291 .sr(1)
292 .m(m)
293 .n(n)
294 .k(k)
295 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700296 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700297 }
298 }
299 }
300 }
301
Marat Dukhan688f6d82020-07-14 17:02:11 -0700302 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700303 for (size_t k = 1; k <= 20; k += 5) {
304 GemmMicrokernelTester()
305 .mr(1)
306 .nr(8)
307 .kr(1)
308 .sr(1)
309 .m(1)
310 .n(8)
311 .k(k)
312 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700313 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700314 }
315 }
316
Marat Dukhan688f6d82020-07-14 17:02:11 -0700317 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, small_kernel_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700318 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800319 for (uint32_t n = 1; n <= 8; n++) {
320 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700321 GemmMicrokernelTester()
322 .mr(1)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(m)
327 .n(n)
328 .k(k)
329 .ks(3)
330 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700331 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700332 }
333 }
334 }
335 }
336
Marat Dukhan688f6d82020-07-14 17:02:11 -0700337 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700338 for (uint32_t n = 9; n < 16; n++) {
339 for (size_t k = 1; k <= 20; k += 5) {
340 GemmMicrokernelTester()
341 .mr(1)
342 .nr(8)
343 .kr(1)
344 .sr(1)
345 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800346 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -0700347 .k(k)
348 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700349 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700350 }
351 }
352 }
353
Marat Dukhan688f6d82020-07-14 17:02:11 -0700354 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700355 for (uint32_t n = 16; n <= 24; n += 8) {
356 for (size_t k = 1; k <= 20; k += 5) {
357 GemmMicrokernelTester()
358 .mr(1)
359 .nr(8)
360 .kr(1)
361 .sr(1)
362 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800363 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -0700364 .k(k)
365 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700366 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700367 }
368 }
369 }
370
Marat Dukhan688f6d82020-07-14 17:02:11 -0700371 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700372 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800373 for (uint32_t n = 1; n <= 8; n++) {
374 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700375 GemmMicrokernelTester()
376 .mr(1)
377 .nr(8)
378 .kr(1)
379 .sr(1)
380 .m(m)
381 .n(n)
382 .k(k)
383 .cm_stride(11)
384 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700385 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700386 }
387 }
388 }
389 }
390
Marat Dukhan688f6d82020-07-14 17:02:11 -0700391 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, a_offset) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700392 for (size_t k = 1; k <= 20; k += 5) {
393 GemmMicrokernelTester()
394 .mr(1)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(1)
399 .n(8)
400 .k(k)
401 .ks(3)
402 .a_offset(23)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700403 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700404 }
405 }
406
Marat Dukhan688f6d82020-07-14 17:02:11 -0700407 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800408 for (size_t k = 1; k <= 20; k += 5) {
409 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700410 GemmMicrokernelTester()
411 .mr(1)
412 .nr(8)
413 .kr(1)
414 .sr(1)
415 .m(1)
416 .n(8)
417 .k(k)
418 .ks(3)
419 .a_offset(23)
420 .zero_index(mz)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700421 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700422 }
423 }
424 }
425
Marat Dukhan688f6d82020-07-14 17:02:11 -0700426 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700427 GemmMicrokernelTester()
428 .mr(1)
429 .nr(8)
430 .kr(1)
431 .sr(1)
432 .m(1)
433 .n(8)
434 .k(4)
435 .cm_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700436 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700437 }
Marat Dukhan4c617792021-12-21 15:47:58 -0800438#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan802808c2020-06-16 11:01:17 -0700439
440
Marat Dukhan4c617792021-12-21 15:47:58 -0800441#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -0700442 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4) {
443 GemmMicrokernelTester()
444 .mr(4)
445 .nr(8)
446 .kr(1)
447 .sr(1)
448 .m(4)
449 .n(8)
450 .k(4)
451 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
452 }
453
454 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cn) {
455 GemmMicrokernelTester()
456 .mr(4)
457 .nr(8)
458 .kr(1)
459 .sr(1)
460 .m(4)
461 .n(8)
462 .k(4)
463 .cn_stride(11)
464 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
465 }
466
467 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800468 for (uint32_t n = 1; n <= 8; n++) {
469 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700470 GemmMicrokernelTester()
471 .mr(4)
472 .nr(8)
473 .kr(1)
474 .sr(1)
475 .m(m)
476 .n(n)
477 .k(4)
478 .iterations(1)
479 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
480 }
481 }
482 }
483
484 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
485 for (uint32_t m = 1; m <= 4; m++) {
486 GemmMicrokernelTester()
487 .mr(4)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(m)
492 .n(8)
493 .k(4)
494 .iterations(1)
495 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
496 }
497 }
498
499 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
500 for (uint32_t n = 1; n <= 8; n++) {
501 GemmMicrokernelTester()
502 .mr(4)
503 .nr(8)
504 .kr(1)
505 .sr(1)
506 .m(4)
507 .n(n)
508 .k(4)
509 .iterations(1)
510 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
511 }
512 }
513
514 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4) {
515 for (size_t k = 1; k < 4; k++) {
516 GemmMicrokernelTester()
517 .mr(4)
518 .nr(8)
519 .kr(1)
520 .sr(1)
521 .m(4)
522 .n(8)
523 .k(k)
524 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
525 }
526 }
527
528 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
529 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800530 for (uint32_t n = 1; n <= 8; n++) {
531 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700532 GemmMicrokernelTester()
533 .mr(4)
534 .nr(8)
535 .kr(1)
536 .sr(1)
537 .m(m)
538 .n(n)
539 .k(k)
540 .iterations(1)
541 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
542 }
543 }
544 }
545 }
546
547 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4) {
548 for (size_t k = 5; k < 8; k++) {
549 GemmMicrokernelTester()
550 .mr(4)
551 .nr(8)
552 .kr(1)
553 .sr(1)
554 .m(4)
555 .n(8)
556 .k(k)
557 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
558 }
559 }
560
561 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
562 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800563 for (uint32_t n = 1; n <= 8; n++) {
564 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700565 GemmMicrokernelTester()
566 .mr(4)
567 .nr(8)
568 .kr(1)
569 .sr(1)
570 .m(m)
571 .n(n)
572 .k(k)
573 .iterations(1)
574 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
575 }
576 }
577 }
578 }
579
580 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4) {
581 for (size_t k = 8; k <= 40; k += 4) {
582 GemmMicrokernelTester()
583 .mr(4)
584 .nr(8)
585 .kr(1)
586 .sr(1)
587 .m(4)
588 .n(8)
589 .k(k)
590 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
591 }
592 }
593
594 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
595 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800596 for (uint32_t n = 1; n <= 8; n++) {
597 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700598 GemmMicrokernelTester()
599 .mr(4)
600 .nr(8)
601 .kr(1)
602 .sr(1)
603 .m(m)
604 .n(n)
605 .k(k)
606 .iterations(1)
607 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
608 }
609 }
610 }
611 }
612
613 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8) {
614 for (uint32_t n = 9; n < 16; n++) {
615 for (size_t k = 1; k <= 20; k += 5) {
616 GemmMicrokernelTester()
617 .mr(4)
618 .nr(8)
619 .kr(1)
620 .sr(1)
621 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800622 .n(n)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700623 .k(k)
624 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
625 }
626 }
627 }
628
629 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
630 for (uint32_t n = 9; n < 16; n++) {
631 for (size_t k = 1; k <= 20; k += 5) {
632 GemmMicrokernelTester()
633 .mr(4)
634 .nr(8)
635 .kr(1)
636 .sr(1)
637 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800638 .n(n)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700639 .k(k)
640 .cn_stride(11)
641 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
642 }
643 }
644 }
645
646 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
647 for (uint32_t n = 9; n < 16; n++) {
648 for (size_t k = 1; k <= 20; k += 5) {
649 for (uint32_t m = 1; m <= 4; m++) {
650 GemmMicrokernelTester()
651 .mr(4)
652 .nr(8)
653 .kr(1)
654 .sr(1)
655 .m(m)
656 .n(n)
657 .k(k)
658 .iterations(1)
659 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
660 }
661 }
662 }
663 }
664
665 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8) {
666 for (uint32_t n = 16; n <= 24; n += 8) {
667 for (size_t k = 1; k <= 20; k += 5) {
668 GemmMicrokernelTester()
669 .mr(4)
670 .nr(8)
671 .kr(1)
672 .sr(1)
673 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800674 .n(n)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700675 .k(k)
676 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
677 }
678 }
679 }
680
681 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
682 for (uint32_t n = 16; n <= 24; n += 8) {
683 for (size_t k = 1; k <= 20; k += 5) {
684 GemmMicrokernelTester()
685 .mr(4)
686 .nr(8)
687 .kr(1)
688 .sr(1)
689 .m(4)
690 .n(n)
691 .k(k)
692 .cn_stride(11)
693 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
694 }
695 }
696 }
697
698 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
699 for (uint32_t n = 16; n <= 24; n += 8) {
700 for (size_t k = 1; k <= 20; k += 5) {
701 for (uint32_t m = 1; m <= 4; m++) {
702 GemmMicrokernelTester()
703 .mr(4)
704 .nr(8)
705 .kr(1)
706 .sr(1)
707 .m(m)
708 .n(n)
709 .k(k)
710 .iterations(1)
711 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
712 }
713 }
714 }
715 }
716
717 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, small_kernel) {
718 for (size_t k = 1; k <= 20; k += 5) {
719 GemmMicrokernelTester()
720 .mr(4)
721 .nr(8)
722 .kr(1)
723 .sr(1)
724 .m(4)
725 .n(8)
726 .k(k)
727 .ks(3)
728 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
729 }
730 }
731
732 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, small_kernel_subtile) {
733 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800734 for (uint32_t n = 1; n <= 8; n++) {
735 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700736 GemmMicrokernelTester()
737 .mr(4)
738 .nr(8)
739 .kr(1)
740 .sr(1)
741 .m(m)
742 .n(n)
743 .k(k)
744 .ks(3)
745 .iterations(1)
746 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
747 }
748 }
749 }
750 }
751
752 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
753 for (uint32_t n = 9; n < 16; n++) {
754 for (size_t k = 1; k <= 20; k += 5) {
755 GemmMicrokernelTester()
756 .mr(4)
757 .nr(8)
758 .kr(1)
759 .sr(1)
760 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800761 .n(n)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700762 .k(k)
763 .ks(3)
764 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
765 }
766 }
767 }
768
769 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
770 for (uint32_t n = 16; n <= 24; n += 8) {
771 for (size_t k = 1; k <= 20; k += 5) {
772 GemmMicrokernelTester()
773 .mr(4)
774 .nr(8)
775 .kr(1)
776 .sr(1)
777 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800778 .n(n)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700779 .k(k)
780 .ks(3)
781 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
782 }
783 }
784 }
785
786 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
787 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800788 for (uint32_t n = 1; n <= 8; n++) {
789 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700790 GemmMicrokernelTester()
791 .mr(4)
792 .nr(8)
793 .kr(1)
794 .sr(1)
795 .m(m)
796 .n(n)
797 .k(k)
798 .cm_stride(11)
799 .iterations(1)
800 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
801 }
802 }
803 }
804 }
805
806 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, a_offset) {
807 for (size_t k = 1; k <= 20; k += 5) {
808 GemmMicrokernelTester()
809 .mr(4)
810 .nr(8)
811 .kr(1)
812 .sr(1)
813 .m(4)
814 .n(8)
815 .k(k)
816 .ks(3)
817 .a_offset(83)
818 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
819 }
820 }
821
822 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800823 for (size_t k = 1; k <= 20; k += 5) {
824 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan688f6d82020-07-14 17:02:11 -0700825 GemmMicrokernelTester()
826 .mr(4)
827 .nr(8)
828 .kr(1)
829 .sr(1)
830 .m(4)
831 .n(8)
832 .k(k)
833 .ks(3)
834 .a_offset(83)
835 .zero_index(mz)
836 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
837 }
838 }
839 }
840
841 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm) {
842 GemmMicrokernelTester()
843 .mr(4)
844 .nr(8)
845 .kr(1)
846 .sr(1)
847 .m(4)
848 .n(8)
849 .k(4)
850 .cm_stride(11)
851 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
852 }
Marat Dukhan4c617792021-12-21 15:47:58 -0800853#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -0700854
855
Marat Dukhan4c617792021-12-21 15:47:58 -0800856#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -0700857 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700858 GemmMicrokernelTester()
859 .mr(5)
860 .nr(8)
861 .kr(1)
862 .sr(1)
863 .m(5)
864 .n(8)
865 .k(4)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700866 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700867 }
868
Marat Dukhan688f6d82020-07-14 17:02:11 -0700869 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700870 GemmMicrokernelTester()
871 .mr(5)
872 .nr(8)
873 .kr(1)
874 .sr(1)
875 .m(5)
876 .n(8)
877 .k(4)
878 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700879 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700880 }
881
Marat Dukhan688f6d82020-07-14 17:02:11 -0700882 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800883 for (uint32_t n = 1; n <= 8; n++) {
884 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700885 GemmMicrokernelTester()
886 .mr(5)
887 .nr(8)
888 .kr(1)
889 .sr(1)
890 .m(m)
891 .n(n)
892 .k(4)
893 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700894 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700895 }
896 }
897 }
898
Marat Dukhan688f6d82020-07-14 17:02:11 -0700899 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700900 for (uint32_t m = 1; m <= 5; m++) {
901 GemmMicrokernelTester()
902 .mr(5)
903 .nr(8)
904 .kr(1)
905 .sr(1)
906 .m(m)
907 .n(8)
908 .k(4)
909 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700910 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700911 }
912 }
913
Marat Dukhan688f6d82020-07-14 17:02:11 -0700914 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700915 for (uint32_t n = 1; n <= 8; n++) {
916 GemmMicrokernelTester()
917 .mr(5)
918 .nr(8)
919 .kr(1)
920 .sr(1)
921 .m(5)
922 .n(n)
923 .k(4)
924 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700925 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700926 }
927 }
928
Marat Dukhan688f6d82020-07-14 17:02:11 -0700929 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700930 for (size_t k = 1; k < 4; k++) {
931 GemmMicrokernelTester()
932 .mr(5)
933 .nr(8)
934 .kr(1)
935 .sr(1)
936 .m(5)
937 .n(8)
938 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700939 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700940 }
941 }
942
Marat Dukhan688f6d82020-07-14 17:02:11 -0700943 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700944 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800945 for (uint32_t n = 1; n <= 8; n++) {
946 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700947 GemmMicrokernelTester()
948 .mr(5)
949 .nr(8)
950 .kr(1)
951 .sr(1)
952 .m(m)
953 .n(n)
954 .k(k)
955 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700956 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700957 }
958 }
959 }
960 }
961
Marat Dukhan688f6d82020-07-14 17:02:11 -0700962 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700963 for (size_t k = 5; k < 8; k++) {
964 GemmMicrokernelTester()
965 .mr(5)
966 .nr(8)
967 .kr(1)
968 .sr(1)
969 .m(5)
970 .n(8)
971 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700972 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700973 }
974 }
975
Marat Dukhan688f6d82020-07-14 17:02:11 -0700976 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700977 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800978 for (uint32_t n = 1; n <= 8; n++) {
979 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700980 GemmMicrokernelTester()
981 .mr(5)
982 .nr(8)
983 .kr(1)
984 .sr(1)
985 .m(m)
986 .n(n)
987 .k(k)
988 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700989 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700990 }
991 }
992 }
993 }
994
Marat Dukhan688f6d82020-07-14 17:02:11 -0700995 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700996 for (size_t k = 8; k <= 40; k += 4) {
997 GemmMicrokernelTester()
998 .mr(5)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(5)
1003 .n(8)
1004 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001005 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001006 }
1007 }
1008
Marat Dukhan688f6d82020-07-14 17:02:11 -07001009 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001010 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001011 for (uint32_t n = 1; n <= 8; n++) {
1012 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001013 GemmMicrokernelTester()
1014 .mr(5)
1015 .nr(8)
1016 .kr(1)
1017 .sr(1)
1018 .m(m)
1019 .n(n)
1020 .k(k)
1021 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001022 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001023 }
1024 }
1025 }
1026 }
1027
Marat Dukhan688f6d82020-07-14 17:02:11 -07001028 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001029 for (uint32_t n = 9; n < 16; n++) {
1030 for (size_t k = 1; k <= 20; k += 5) {
1031 GemmMicrokernelTester()
1032 .mr(5)
1033 .nr(8)
1034 .kr(1)
1035 .sr(1)
1036 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001037 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -07001038 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001039 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001040 }
1041 }
1042 }
1043
Marat Dukhan688f6d82020-07-14 17:02:11 -07001044 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001045 for (uint32_t n = 9; n < 16; n++) {
1046 for (size_t k = 1; k <= 20; k += 5) {
1047 GemmMicrokernelTester()
1048 .mr(5)
1049 .nr(8)
1050 .kr(1)
1051 .sr(1)
1052 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001053 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -07001054 .k(k)
1055 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001056 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001057 }
1058 }
1059 }
1060
Marat Dukhan688f6d82020-07-14 17:02:11 -07001061 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001062 for (uint32_t n = 9; n < 16; n++) {
1063 for (size_t k = 1; k <= 20; k += 5) {
1064 for (uint32_t m = 1; m <= 5; m++) {
1065 GemmMicrokernelTester()
1066 .mr(5)
1067 .nr(8)
1068 .kr(1)
1069 .sr(1)
1070 .m(m)
1071 .n(n)
1072 .k(k)
1073 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001074 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001075 }
1076 }
1077 }
1078 }
1079
Marat Dukhan688f6d82020-07-14 17:02:11 -07001080 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001081 for (uint32_t n = 16; n <= 24; n += 8) {
1082 for (size_t k = 1; k <= 20; k += 5) {
1083 GemmMicrokernelTester()
1084 .mr(5)
1085 .nr(8)
1086 .kr(1)
1087 .sr(1)
1088 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001089 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -07001090 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001091 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001092 }
1093 }
1094 }
1095
Marat Dukhan688f6d82020-07-14 17:02:11 -07001096 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001097 for (uint32_t n = 16; n <= 24; n += 8) {
1098 for (size_t k = 1; k <= 20; k += 5) {
1099 GemmMicrokernelTester()
1100 .mr(5)
1101 .nr(8)
1102 .kr(1)
1103 .sr(1)
1104 .m(5)
1105 .n(n)
1106 .k(k)
1107 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001108 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001109 }
1110 }
1111 }
1112
Marat Dukhan688f6d82020-07-14 17:02:11 -07001113 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001114 for (uint32_t n = 16; n <= 24; n += 8) {
1115 for (size_t k = 1; k <= 20; k += 5) {
1116 for (uint32_t m = 1; m <= 5; m++) {
1117 GemmMicrokernelTester()
1118 .mr(5)
1119 .nr(8)
1120 .kr(1)
1121 .sr(1)
1122 .m(m)
1123 .n(n)
1124 .k(k)
1125 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001126 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001127 }
1128 }
1129 }
1130 }
1131
Marat Dukhan688f6d82020-07-14 17:02:11 -07001132 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001133 for (size_t k = 1; k <= 20; k += 5) {
1134 GemmMicrokernelTester()
1135 .mr(5)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(5)
1140 .n(8)
1141 .k(k)
1142 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001143 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001144 }
1145 }
1146
Marat Dukhan688f6d82020-07-14 17:02:11 -07001147 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, small_kernel_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001148 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001149 for (uint32_t n = 1; n <= 8; n++) {
1150 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001151 GemmMicrokernelTester()
1152 .mr(5)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(k)
1159 .ks(3)
1160 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001161 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001162 }
1163 }
1164 }
1165 }
1166
Marat Dukhan688f6d82020-07-14 17:02:11 -07001167 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001168 for (uint32_t n = 9; n < 16; n++) {
1169 for (size_t k = 1; k <= 20; k += 5) {
1170 GemmMicrokernelTester()
1171 .mr(5)
1172 .nr(8)
1173 .kr(1)
1174 .sr(1)
1175 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001176 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -07001177 .k(k)
1178 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001179 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001180 }
1181 }
1182 }
1183
Marat Dukhan688f6d82020-07-14 17:02:11 -07001184 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001185 for (uint32_t n = 16; n <= 24; n += 8) {
1186 for (size_t k = 1; k <= 20; k += 5) {
1187 GemmMicrokernelTester()
1188 .mr(5)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001193 .n(n)
Marat Dukhan802808c2020-06-16 11:01:17 -07001194 .k(k)
1195 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001196 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001197 }
1198 }
1199 }
1200
Marat Dukhan688f6d82020-07-14 17:02:11 -07001201 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001202 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001203 for (uint32_t n = 1; n <= 8; n++) {
1204 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001205 GemmMicrokernelTester()
1206 .mr(5)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(m)
1211 .n(n)
1212 .k(k)
1213 .cm_stride(11)
1214 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001215 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001216 }
1217 }
1218 }
1219 }
1220
Marat Dukhan688f6d82020-07-14 17:02:11 -07001221 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, a_offset) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001222 for (size_t k = 1; k <= 20; k += 5) {
1223 GemmMicrokernelTester()
1224 .mr(5)
1225 .nr(8)
1226 .kr(1)
1227 .sr(1)
1228 .m(5)
1229 .n(8)
1230 .k(k)
1231 .ks(3)
1232 .a_offset(103)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001233 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001234 }
1235 }
1236
Marat Dukhan688f6d82020-07-14 17:02:11 -07001237 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001238 for (size_t k = 1; k <= 20; k += 5) {
1239 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001240 GemmMicrokernelTester()
1241 .mr(5)
1242 .nr(8)
1243 .kr(1)
1244 .sr(1)
1245 .m(5)
1246 .n(8)
1247 .k(k)
1248 .ks(3)
1249 .a_offset(103)
1250 .zero_index(mz)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001251 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001252 }
1253 }
1254 }
1255
Marat Dukhan688f6d82020-07-14 17:02:11 -07001256 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001257 GemmMicrokernelTester()
1258 .mr(5)
1259 .nr(8)
1260 .kr(1)
1261 .sr(1)
1262 .m(5)
1263 .n(8)
1264 .k(4)
1265 .cm_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001266 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001267 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001268#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan802808c2020-06-16 11:01:17 -07001269
1270
Marat Dukhan4c617792021-12-21 15:47:58 -08001271#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07001272 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1) {
1273 GemmMicrokernelTester()
1274 .mr(1)
1275 .nr(4)
1276 .kr(1)
1277 .sr(1)
1278 .m(1)
1279 .n(4)
1280 .k(1)
1281 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1282 }
1283
1284 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cn) {
1285 GemmMicrokernelTester()
1286 .mr(1)
1287 .nr(4)
1288 .kr(1)
1289 .sr(1)
1290 .m(1)
1291 .n(4)
1292 .k(1)
1293 .cn_stride(7)
1294 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1295 }
1296
1297 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001298 for (uint32_t n = 1; n <= 4; n++) {
1299 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001300 GemmMicrokernelTester()
1301 .mr(1)
1302 .nr(4)
1303 .kr(1)
1304 .sr(1)
1305 .m(m)
1306 .n(n)
1307 .k(1)
1308 .iterations(1)
1309 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1310 }
1311 }
1312 }
1313
1314 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile_m) {
1315 for (uint32_t m = 1; m <= 1; m++) {
1316 GemmMicrokernelTester()
1317 .mr(1)
1318 .nr(4)
1319 .kr(1)
1320 .sr(1)
1321 .m(m)
1322 .n(4)
1323 .k(1)
1324 .iterations(1)
1325 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1326 }
1327 }
1328
1329 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile_n) {
1330 for (uint32_t n = 1; n <= 4; n++) {
1331 GemmMicrokernelTester()
1332 .mr(1)
1333 .nr(4)
1334 .kr(1)
1335 .sr(1)
1336 .m(1)
1337 .n(n)
1338 .k(1)
1339 .iterations(1)
1340 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1341 }
1342 }
1343
1344 TEST(F32_IGEMM_RELU_1X4__WASM, k_gt_1) {
1345 for (size_t k = 2; k < 10; k++) {
1346 GemmMicrokernelTester()
1347 .mr(1)
1348 .nr(4)
1349 .kr(1)
1350 .sr(1)
1351 .m(1)
1352 .n(4)
1353 .k(k)
1354 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1355 }
1356 }
1357
1358 TEST(F32_IGEMM_RELU_1X4__WASM, k_gt_1_subtile) {
1359 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001360 for (uint32_t n = 1; n <= 4; n++) {
1361 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001362 GemmMicrokernelTester()
1363 .mr(1)
1364 .nr(4)
1365 .kr(1)
1366 .sr(1)
1367 .m(m)
1368 .n(n)
1369 .k(k)
1370 .iterations(1)
1371 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1372 }
1373 }
1374 }
1375 }
1376
1377 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4) {
1378 for (uint32_t n = 5; n < 8; n++) {
1379 for (size_t k = 1; k <= 5; k += 2) {
1380 GemmMicrokernelTester()
1381 .mr(1)
1382 .nr(4)
1383 .kr(1)
1384 .sr(1)
1385 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001386 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001387 .k(k)
1388 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1389 }
1390 }
1391 }
1392
1393 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_strided_cn) {
1394 for (uint32_t n = 5; n < 8; n++) {
1395 for (size_t k = 1; k <= 5; k += 2) {
1396 GemmMicrokernelTester()
1397 .mr(1)
1398 .nr(4)
1399 .kr(1)
1400 .sr(1)
1401 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001402 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001403 .k(k)
1404 .cn_stride(7)
1405 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1406 }
1407 }
1408 }
1409
1410 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_subtile) {
1411 for (uint32_t n = 5; n < 8; n++) {
1412 for (size_t k = 1; k <= 5; k += 2) {
1413 for (uint32_t m = 1; m <= 1; m++) {
1414 GemmMicrokernelTester()
1415 .mr(1)
1416 .nr(4)
1417 .kr(1)
1418 .sr(1)
1419 .m(m)
1420 .n(n)
1421 .k(k)
1422 .iterations(1)
1423 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1424 }
1425 }
1426 }
1427 }
1428
1429 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4) {
1430 for (uint32_t n = 8; n <= 12; n += 4) {
1431 for (size_t k = 1; k <= 5; k += 2) {
1432 GemmMicrokernelTester()
1433 .mr(1)
1434 .nr(4)
1435 .kr(1)
1436 .sr(1)
1437 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001438 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001439 .k(k)
1440 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1441 }
1442 }
1443 }
1444
1445 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_strided_cn) {
1446 for (uint32_t n = 8; n <= 12; n += 4) {
1447 for (size_t k = 1; k <= 5; k += 2) {
1448 GemmMicrokernelTester()
1449 .mr(1)
1450 .nr(4)
1451 .kr(1)
1452 .sr(1)
1453 .m(1)
1454 .n(n)
1455 .k(k)
1456 .cn_stride(7)
1457 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1458 }
1459 }
1460 }
1461
1462 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_subtile) {
1463 for (uint32_t n = 8; n <= 12; n += 4) {
1464 for (size_t k = 1; k <= 5; k += 2) {
1465 for (uint32_t m = 1; m <= 1; m++) {
1466 GemmMicrokernelTester()
1467 .mr(1)
1468 .nr(4)
1469 .kr(1)
1470 .sr(1)
1471 .m(m)
1472 .n(n)
1473 .k(k)
1474 .iterations(1)
1475 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1476 }
1477 }
1478 }
1479 }
1480
1481 TEST(F32_IGEMM_RELU_1X4__WASM, small_kernel) {
1482 for (size_t k = 1; k <= 5; k += 2) {
1483 GemmMicrokernelTester()
1484 .mr(1)
1485 .nr(4)
1486 .kr(1)
1487 .sr(1)
1488 .m(1)
1489 .n(4)
1490 .k(k)
1491 .ks(3)
1492 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1493 }
1494 }
1495
1496 TEST(F32_IGEMM_RELU_1X4__WASM, small_kernel_subtile) {
1497 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001498 for (uint32_t n = 1; n <= 4; n++) {
1499 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001500 GemmMicrokernelTester()
1501 .mr(1)
1502 .nr(4)
1503 .kr(1)
1504 .sr(1)
1505 .m(m)
1506 .n(n)
1507 .k(k)
1508 .ks(3)
1509 .iterations(1)
1510 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1511 }
1512 }
1513 }
1514 }
1515
1516 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_small_kernel) {
1517 for (uint32_t n = 5; n < 8; n++) {
1518 for (size_t k = 1; k <= 5; k += 2) {
1519 GemmMicrokernelTester()
1520 .mr(1)
1521 .nr(4)
1522 .kr(1)
1523 .sr(1)
1524 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001525 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001526 .k(k)
1527 .ks(3)
1528 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1529 }
1530 }
1531 }
1532
1533 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_small_kernel) {
1534 for (uint32_t n = 8; n <= 12; n += 4) {
1535 for (size_t k = 1; k <= 5; k += 2) {
1536 GemmMicrokernelTester()
1537 .mr(1)
1538 .nr(4)
1539 .kr(1)
1540 .sr(1)
1541 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001542 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001543 .k(k)
1544 .ks(3)
1545 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1546 }
1547 }
1548 }
1549
1550 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cm_subtile) {
1551 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001552 for (uint32_t n = 1; n <= 4; n++) {
1553 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001554 GemmMicrokernelTester()
1555 .mr(1)
1556 .nr(4)
1557 .kr(1)
1558 .sr(1)
1559 .m(m)
1560 .n(n)
1561 .k(k)
1562 .cm_stride(7)
1563 .iterations(1)
1564 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1565 }
1566 }
1567 }
1568 }
1569
1570 TEST(F32_IGEMM_RELU_1X4__WASM, a_offset) {
1571 for (size_t k = 1; k <= 5; k += 2) {
1572 GemmMicrokernelTester()
1573 .mr(1)
1574 .nr(4)
1575 .kr(1)
1576 .sr(1)
1577 .m(1)
1578 .n(4)
1579 .k(k)
1580 .ks(3)
1581 .a_offset(7)
1582 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1583 }
1584 }
1585
1586 TEST(F32_IGEMM_RELU_1X4__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001587 for (size_t k = 1; k <= 5; k += 2) {
1588 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001589 GemmMicrokernelTester()
1590 .mr(1)
1591 .nr(4)
1592 .kr(1)
1593 .sr(1)
1594 .m(1)
1595 .n(4)
1596 .k(k)
1597 .ks(3)
1598 .a_offset(7)
1599 .zero_index(mz)
1600 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1601 }
1602 }
1603 }
1604
1605 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cm) {
1606 GemmMicrokernelTester()
1607 .mr(1)
1608 .nr(4)
1609 .kr(1)
1610 .sr(1)
1611 .m(1)
1612 .n(4)
1613 .k(1)
1614 .cm_stride(7)
1615 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1616 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001617#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07001618
1619
Marat Dukhan467f6362020-05-22 23:21:55 -07001620TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1) {
1621 GemmMicrokernelTester()
1622 .mr(2)
1623 .nr(4)
1624 .kr(1)
1625 .sr(1)
1626 .m(2)
1627 .n(4)
1628 .k(1)
1629 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1630}
1631
1632TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cn) {
1633 GemmMicrokernelTester()
1634 .mr(2)
1635 .nr(4)
1636 .kr(1)
1637 .sr(1)
1638 .m(2)
1639 .n(4)
1640 .k(1)
1641 .cn_stride(7)
1642 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1643}
1644
1645TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001646 for (uint32_t n = 1; n <= 4; n++) {
1647 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001648 GemmMicrokernelTester()
1649 .mr(2)
1650 .nr(4)
1651 .kr(1)
1652 .sr(1)
1653 .m(m)
1654 .n(n)
1655 .k(1)
1656 .iterations(1)
1657 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1658 }
1659 }
1660}
1661
1662TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile_m) {
1663 for (uint32_t m = 1; m <= 2; m++) {
1664 GemmMicrokernelTester()
1665 .mr(2)
1666 .nr(4)
1667 .kr(1)
1668 .sr(1)
1669 .m(m)
1670 .n(4)
1671 .k(1)
1672 .iterations(1)
1673 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1674 }
1675}
1676
1677TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile_n) {
1678 for (uint32_t n = 1; n <= 4; n++) {
1679 GemmMicrokernelTester()
1680 .mr(2)
1681 .nr(4)
1682 .kr(1)
1683 .sr(1)
1684 .m(2)
1685 .n(n)
1686 .k(1)
1687 .iterations(1)
1688 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1689 }
1690}
1691
1692TEST(F32_IGEMM_RELU_2X4__SCALAR, k_gt_1) {
1693 for (size_t k = 2; k < 10; k++) {
1694 GemmMicrokernelTester()
1695 .mr(2)
1696 .nr(4)
1697 .kr(1)
1698 .sr(1)
1699 .m(2)
1700 .n(4)
1701 .k(k)
1702 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1703 }
1704}
1705
1706TEST(F32_IGEMM_RELU_2X4__SCALAR, k_gt_1_subtile) {
1707 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001708 for (uint32_t n = 1; n <= 4; n++) {
1709 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001710 GemmMicrokernelTester()
1711 .mr(2)
1712 .nr(4)
1713 .kr(1)
1714 .sr(1)
1715 .m(m)
1716 .n(n)
1717 .k(k)
1718 .iterations(1)
1719 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1720 }
1721 }
1722 }
1723}
1724
1725TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4) {
1726 for (uint32_t n = 5; n < 8; n++) {
1727 for (size_t k = 1; k <= 5; k += 2) {
1728 GemmMicrokernelTester()
1729 .mr(2)
1730 .nr(4)
1731 .kr(1)
1732 .sr(1)
1733 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001734 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001735 .k(k)
1736 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1737 }
1738 }
1739}
1740
1741TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_strided_cn) {
1742 for (uint32_t n = 5; n < 8; n++) {
1743 for (size_t k = 1; k <= 5; k += 2) {
1744 GemmMicrokernelTester()
1745 .mr(2)
1746 .nr(4)
1747 .kr(1)
1748 .sr(1)
1749 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001750 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001751 .k(k)
1752 .cn_stride(7)
1753 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1754 }
1755 }
1756}
1757
1758TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_subtile) {
1759 for (uint32_t n = 5; n < 8; n++) {
1760 for (size_t k = 1; k <= 5; k += 2) {
1761 for (uint32_t m = 1; m <= 2; m++) {
1762 GemmMicrokernelTester()
1763 .mr(2)
1764 .nr(4)
1765 .kr(1)
1766 .sr(1)
1767 .m(m)
1768 .n(n)
1769 .k(k)
1770 .iterations(1)
1771 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1772 }
1773 }
1774 }
1775}
1776
1777TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4) {
1778 for (uint32_t n = 8; n <= 12; n += 4) {
1779 for (size_t k = 1; k <= 5; k += 2) {
1780 GemmMicrokernelTester()
1781 .mr(2)
1782 .nr(4)
1783 .kr(1)
1784 .sr(1)
1785 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001786 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001787 .k(k)
1788 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1789 }
1790 }
1791}
1792
1793TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_strided_cn) {
1794 for (uint32_t n = 8; n <= 12; n += 4) {
1795 for (size_t k = 1; k <= 5; k += 2) {
1796 GemmMicrokernelTester()
1797 .mr(2)
1798 .nr(4)
1799 .kr(1)
1800 .sr(1)
1801 .m(2)
1802 .n(n)
1803 .k(k)
1804 .cn_stride(7)
1805 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1806 }
1807 }
1808}
1809
1810TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_subtile) {
1811 for (uint32_t n = 8; n <= 12; n += 4) {
1812 for (size_t k = 1; k <= 5; k += 2) {
1813 for (uint32_t m = 1; m <= 2; m++) {
1814 GemmMicrokernelTester()
1815 .mr(2)
1816 .nr(4)
1817 .kr(1)
1818 .sr(1)
1819 .m(m)
1820 .n(n)
1821 .k(k)
1822 .iterations(1)
1823 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1824 }
1825 }
1826 }
1827}
1828
1829TEST(F32_IGEMM_RELU_2X4__SCALAR, small_kernel) {
1830 for (size_t k = 1; k <= 5; k += 2) {
1831 GemmMicrokernelTester()
1832 .mr(2)
1833 .nr(4)
1834 .kr(1)
1835 .sr(1)
1836 .m(2)
1837 .n(4)
1838 .k(k)
1839 .ks(3)
1840 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1841 }
1842}
1843
1844TEST(F32_IGEMM_RELU_2X4__SCALAR, small_kernel_subtile) {
1845 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001846 for (uint32_t n = 1; n <= 4; n++) {
1847 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001848 GemmMicrokernelTester()
1849 .mr(2)
1850 .nr(4)
1851 .kr(1)
1852 .sr(1)
1853 .m(m)
1854 .n(n)
1855 .k(k)
1856 .ks(3)
1857 .iterations(1)
1858 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1859 }
1860 }
1861 }
1862}
1863
1864TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_small_kernel) {
1865 for (uint32_t n = 5; n < 8; n++) {
1866 for (size_t k = 1; k <= 5; k += 2) {
1867 GemmMicrokernelTester()
1868 .mr(2)
1869 .nr(4)
1870 .kr(1)
1871 .sr(1)
1872 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001873 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001874 .k(k)
1875 .ks(3)
1876 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1877 }
1878 }
1879}
1880
1881TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_small_kernel) {
1882 for (uint32_t n = 8; n <= 12; n += 4) {
1883 for (size_t k = 1; k <= 5; k += 2) {
1884 GemmMicrokernelTester()
1885 .mr(2)
1886 .nr(4)
1887 .kr(1)
1888 .sr(1)
1889 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001890 .n(n)
Marat Dukhan467f6362020-05-22 23:21:55 -07001891 .k(k)
1892 .ks(3)
1893 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1894 }
1895 }
1896}
1897
1898TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cm_subtile) {
1899 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001900 for (uint32_t n = 1; n <= 4; n++) {
1901 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001902 GemmMicrokernelTester()
1903 .mr(2)
1904 .nr(4)
1905 .kr(1)
1906 .sr(1)
1907 .m(m)
1908 .n(n)
1909 .k(k)
1910 .cm_stride(7)
1911 .iterations(1)
1912 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1913 }
1914 }
1915 }
1916}
1917
1918TEST(F32_IGEMM_RELU_2X4__SCALAR, a_offset) {
1919 for (size_t k = 1; k <= 5; k += 2) {
1920 GemmMicrokernelTester()
1921 .mr(2)
1922 .nr(4)
1923 .kr(1)
1924 .sr(1)
1925 .m(2)
1926 .n(4)
1927 .k(k)
1928 .ks(3)
1929 .a_offset(13)
1930 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1931 }
1932}
1933
1934TEST(F32_IGEMM_RELU_2X4__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001935 for (size_t k = 1; k <= 5; k += 2) {
1936 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan467f6362020-05-22 23:21:55 -07001937 GemmMicrokernelTester()
1938 .mr(2)
1939 .nr(4)
1940 .kr(1)
1941 .sr(1)
1942 .m(2)
1943 .n(4)
1944 .k(k)
1945 .ks(3)
1946 .a_offset(13)
1947 .zero_index(mz)
1948 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1949 }
1950 }
1951}
1952
1953TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cm) {
1954 GemmMicrokernelTester()
1955 .mr(2)
1956 .nr(4)
1957 .kr(1)
1958 .sr(1)
1959 .m(2)
1960 .n(4)
1961 .k(1)
1962 .cm_stride(7)
1963 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
1964}