blob: 0eb9647fa7a46d3b1bb90f36f623e86cf9e3ac37 [file] [log] [blame]
Marat Dukhan467f6362020-05-22 23:21:55 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-igemm-relu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
22#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan802808c2020-06-16 11:01:17 -070025#if XNN_ARCH_WASMSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -070026 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -070027 GemmMicrokernelTester()
28 .mr(1)
29 .nr(8)
30 .kr(1)
31 .sr(1)
32 .m(1)
33 .n(8)
34 .k(4)
Marat Dukhan688f6d82020-07-14 17:02:11 -070035 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070036 }
37
Marat Dukhan688f6d82020-07-14 17:02:11 -070038 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -070039 GemmMicrokernelTester()
40 .mr(1)
41 .nr(8)
42 .kr(1)
43 .sr(1)
44 .m(1)
45 .n(8)
46 .k(4)
47 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -070048 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070049 }
50
Marat Dukhan688f6d82020-07-14 17:02:11 -070051 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -070052 for (uint32_t m = 1; m <= 1; m++) {
53 for (uint32_t n = 1; n <= 8; n++) {
54 GemmMicrokernelTester()
55 .mr(1)
56 .nr(8)
57 .kr(1)
58 .sr(1)
59 .m(m)
60 .n(n)
61 .k(4)
62 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -070063 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070064 }
65 }
66 }
67
Marat Dukhan688f6d82020-07-14 17:02:11 -070068 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan802808c2020-06-16 11:01:17 -070069 for (uint32_t m = 1; m <= 1; m++) {
70 GemmMicrokernelTester()
71 .mr(1)
72 .nr(8)
73 .kr(1)
74 .sr(1)
75 .m(m)
76 .n(8)
77 .k(4)
78 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -070079 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070080 }
81 }
82
Marat Dukhan688f6d82020-07-14 17:02:11 -070083 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan802808c2020-06-16 11:01:17 -070084 for (uint32_t n = 1; n <= 8; n++) {
85 GemmMicrokernelTester()
86 .mr(1)
87 .nr(8)
88 .kr(1)
89 .sr(1)
90 .m(1)
91 .n(n)
92 .k(4)
93 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -070094 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -070095 }
96 }
97
Marat Dukhan688f6d82020-07-14 17:02:11 -070098 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -070099 for (size_t k = 1; k < 4; k++) {
100 GemmMicrokernelTester()
101 .mr(1)
102 .nr(8)
103 .kr(1)
104 .sr(1)
105 .m(1)
106 .n(8)
107 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700108 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700109 }
110 }
111
Marat Dukhan688f6d82020-07-14 17:02:11 -0700112 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700113 for (size_t k = 1; k < 4; k++) {
114 for (uint32_t m = 1; m <= 1; m++) {
115 for (uint32_t n = 1; n <= 8; n++) {
116 GemmMicrokernelTester()
117 .mr(1)
118 .nr(8)
119 .kr(1)
120 .sr(1)
121 .m(m)
122 .n(n)
123 .k(k)
124 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700125 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700126 }
127 }
128 }
129 }
130
Marat Dukhan688f6d82020-07-14 17:02:11 -0700131 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700132 for (size_t k = 5; k < 8; k++) {
133 GemmMicrokernelTester()
134 .mr(1)
135 .nr(8)
136 .kr(1)
137 .sr(1)
138 .m(1)
139 .n(8)
140 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700141 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700142 }
143 }
144
Marat Dukhan688f6d82020-07-14 17:02:11 -0700145 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700146 for (size_t k = 5; k < 8; k++) {
147 for (uint32_t m = 1; m <= 1; m++) {
148 for (uint32_t n = 1; n <= 8; n++) {
149 GemmMicrokernelTester()
150 .mr(1)
151 .nr(8)
152 .kr(1)
153 .sr(1)
154 .m(m)
155 .n(n)
156 .k(k)
157 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700158 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700159 }
160 }
161 }
162 }
163
Marat Dukhan688f6d82020-07-14 17:02:11 -0700164 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700165 for (size_t k = 8; k <= 40; k += 4) {
166 GemmMicrokernelTester()
167 .mr(1)
168 .nr(8)
169 .kr(1)
170 .sr(1)
171 .m(1)
172 .n(8)
173 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700174 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700175 }
176 }
177
Marat Dukhan688f6d82020-07-14 17:02:11 -0700178 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700179 for (size_t k = 8; k <= 40; k += 4) {
180 for (uint32_t m = 1; m <= 1; m++) {
181 for (uint32_t n = 1; n <= 8; n++) {
182 GemmMicrokernelTester()
183 .mr(1)
184 .nr(8)
185 .kr(1)
186 .sr(1)
187 .m(m)
188 .n(n)
189 .k(k)
190 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700191 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700192 }
193 }
194 }
195 }
196
Marat Dukhan688f6d82020-07-14 17:02:11 -0700197 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700198 for (uint32_t n = 9; n < 16; n++) {
199 for (size_t k = 1; k <= 20; k += 5) {
200 GemmMicrokernelTester()
201 .mr(1)
202 .nr(8)
203 .kr(1)
204 .sr(1)
205 .m(1)
206 .n(8)
207 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700208 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700209 }
210 }
211 }
212
Marat Dukhan688f6d82020-07-14 17:02:11 -0700213 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700214 for (uint32_t n = 9; n < 16; n++) {
215 for (size_t k = 1; k <= 20; k += 5) {
216 GemmMicrokernelTester()
217 .mr(1)
218 .nr(8)
219 .kr(1)
220 .sr(1)
221 .m(1)
222 .n(8)
223 .k(k)
224 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700225 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700226 }
227 }
228 }
229
Marat Dukhan688f6d82020-07-14 17:02:11 -0700230 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700231 for (uint32_t n = 9; n < 16; n++) {
232 for (size_t k = 1; k <= 20; k += 5) {
233 for (uint32_t m = 1; m <= 1; m++) {
234 GemmMicrokernelTester()
235 .mr(1)
236 .nr(8)
237 .kr(1)
238 .sr(1)
239 .m(m)
240 .n(n)
241 .k(k)
242 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700243 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700244 }
245 }
246 }
247 }
248
Marat Dukhan688f6d82020-07-14 17:02:11 -0700249 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700250 for (uint32_t n = 16; n <= 24; n += 8) {
251 for (size_t k = 1; k <= 20; k += 5) {
252 GemmMicrokernelTester()
253 .mr(1)
254 .nr(8)
255 .kr(1)
256 .sr(1)
257 .m(1)
258 .n(8)
259 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700260 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700261 }
262 }
263 }
264
Marat Dukhan688f6d82020-07-14 17:02:11 -0700265 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 20; k += 5) {
268 GemmMicrokernelTester()
269 .mr(1)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(1)
274 .n(n)
275 .k(k)
276 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700277 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700278 }
279 }
280 }
281
Marat Dukhan688f6d82020-07-14 17:02:11 -0700282 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 20; k += 5) {
285 for (uint32_t m = 1; m <= 1; m++) {
286 GemmMicrokernelTester()
287 .mr(1)
288 .nr(8)
289 .kr(1)
290 .sr(1)
291 .m(m)
292 .n(n)
293 .k(k)
294 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700295 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700296 }
297 }
298 }
299 }
300
Marat Dukhan688f6d82020-07-14 17:02:11 -0700301 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700302 for (size_t k = 1; k <= 20; k += 5) {
303 GemmMicrokernelTester()
304 .mr(1)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(1)
309 .n(8)
310 .k(k)
311 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700312 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700313 }
314 }
315
Marat Dukhan688f6d82020-07-14 17:02:11 -0700316 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, small_kernel_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700317 for (size_t k = 1; k <= 20; k += 5) {
318 for (uint32_t m = 1; m <= 1; m++) {
319 for (uint32_t n = 1; n <= 8; n++) {
320 GemmMicrokernelTester()
321 .mr(1)
322 .nr(8)
323 .kr(1)
324 .sr(1)
325 .m(m)
326 .n(n)
327 .k(k)
328 .ks(3)
329 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700330 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700331 }
332 }
333 }
334 }
335
Marat Dukhan688f6d82020-07-14 17:02:11 -0700336 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700337 for (uint32_t n = 9; n < 16; n++) {
338 for (size_t k = 1; k <= 20; k += 5) {
339 GemmMicrokernelTester()
340 .mr(1)
341 .nr(8)
342 .kr(1)
343 .sr(1)
344 .m(1)
345 .n(8)
346 .k(k)
347 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700348 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700349 }
350 }
351 }
352
Marat Dukhan688f6d82020-07-14 17:02:11 -0700353 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700354 for (uint32_t n = 16; n <= 24; n += 8) {
355 for (size_t k = 1; k <= 20; k += 5) {
356 GemmMicrokernelTester()
357 .mr(1)
358 .nr(8)
359 .kr(1)
360 .sr(1)
361 .m(1)
362 .n(8)
363 .k(k)
364 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700365 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700366 }
367 }
368 }
369
Marat Dukhan688f6d82020-07-14 17:02:11 -0700370 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700371 for (size_t k = 1; k <= 20; k += 5) {
372 for (uint32_t m = 1; m <= 1; m++) {
373 for (uint32_t n = 1; n <= 8; n++) {
374 GemmMicrokernelTester()
375 .mr(1)
376 .nr(8)
377 .kr(1)
378 .sr(1)
379 .m(m)
380 .n(n)
381 .k(k)
382 .cm_stride(11)
383 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700384 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700385 }
386 }
387 }
388 }
389
Marat Dukhan688f6d82020-07-14 17:02:11 -0700390 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, a_offset) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700391 for (size_t k = 1; k <= 20; k += 5) {
392 GemmMicrokernelTester()
393 .mr(1)
394 .nr(8)
395 .kr(1)
396 .sr(1)
397 .m(1)
398 .n(8)
399 .k(k)
400 .ks(3)
401 .a_offset(23)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700402 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700403 }
404 }
405
Marat Dukhan688f6d82020-07-14 17:02:11 -0700406 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, zero) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700407 for (uint32_t mz = 0; mz < 1; mz++) {
408 for (size_t k = 1; k <= 20; k += 5) {
409 GemmMicrokernelTester()
410 .mr(1)
411 .nr(8)
412 .kr(1)
413 .sr(1)
414 .m(1)
415 .n(8)
416 .k(k)
417 .ks(3)
418 .a_offset(23)
419 .zero_index(mz)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700420 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700421 }
422 }
423 }
424
Marat Dukhan688f6d82020-07-14 17:02:11 -0700425 TEST(F32_IGEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700426 GemmMicrokernelTester()
427 .mr(1)
428 .nr(8)
429 .kr(1)
430 .sr(1)
431 .m(1)
432 .n(8)
433 .k(4)
434 .cm_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700435 .Test(xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700436 }
437#endif // XNN_ARCH_WASMSIMD
438
439
440#if XNN_ARCH_WASMSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -0700441 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4) {
442 GemmMicrokernelTester()
443 .mr(4)
444 .nr(8)
445 .kr(1)
446 .sr(1)
447 .m(4)
448 .n(8)
449 .k(4)
450 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
451 }
452
453 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cn) {
454 GemmMicrokernelTester()
455 .mr(4)
456 .nr(8)
457 .kr(1)
458 .sr(1)
459 .m(4)
460 .n(8)
461 .k(4)
462 .cn_stride(11)
463 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
464 }
465
466 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
467 for (uint32_t m = 1; m <= 4; m++) {
468 for (uint32_t n = 1; n <= 8; n++) {
469 GemmMicrokernelTester()
470 .mr(4)
471 .nr(8)
472 .kr(1)
473 .sr(1)
474 .m(m)
475 .n(n)
476 .k(4)
477 .iterations(1)
478 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
479 }
480 }
481 }
482
483 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
484 for (uint32_t m = 1; m <= 4; m++) {
485 GemmMicrokernelTester()
486 .mr(4)
487 .nr(8)
488 .kr(1)
489 .sr(1)
490 .m(m)
491 .n(8)
492 .k(4)
493 .iterations(1)
494 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
495 }
496 }
497
498 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
499 for (uint32_t n = 1; n <= 8; n++) {
500 GemmMicrokernelTester()
501 .mr(4)
502 .nr(8)
503 .kr(1)
504 .sr(1)
505 .m(4)
506 .n(n)
507 .k(4)
508 .iterations(1)
509 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
510 }
511 }
512
513 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4) {
514 for (size_t k = 1; k < 4; k++) {
515 GemmMicrokernelTester()
516 .mr(4)
517 .nr(8)
518 .kr(1)
519 .sr(1)
520 .m(4)
521 .n(8)
522 .k(k)
523 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
524 }
525 }
526
527 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
528 for (size_t k = 1; k < 4; k++) {
529 for (uint32_t m = 1; m <= 4; m++) {
530 for (uint32_t n = 1; n <= 8; n++) {
531 GemmMicrokernelTester()
532 .mr(4)
533 .nr(8)
534 .kr(1)
535 .sr(1)
536 .m(m)
537 .n(n)
538 .k(k)
539 .iterations(1)
540 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
541 }
542 }
543 }
544 }
545
546 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4) {
547 for (size_t k = 5; k < 8; k++) {
548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(8)
551 .kr(1)
552 .sr(1)
553 .m(4)
554 .n(8)
555 .k(k)
556 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
557 }
558 }
559
560 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
561 for (size_t k = 5; k < 8; k++) {
562 for (uint32_t m = 1; m <= 4; m++) {
563 for (uint32_t n = 1; n <= 8; n++) {
564 GemmMicrokernelTester()
565 .mr(4)
566 .nr(8)
567 .kr(1)
568 .sr(1)
569 .m(m)
570 .n(n)
571 .k(k)
572 .iterations(1)
573 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
574 }
575 }
576 }
577 }
578
579 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4) {
580 for (size_t k = 8; k <= 40; k += 4) {
581 GemmMicrokernelTester()
582 .mr(4)
583 .nr(8)
584 .kr(1)
585 .sr(1)
586 .m(4)
587 .n(8)
588 .k(k)
589 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
590 }
591 }
592
593 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
594 for (size_t k = 8; k <= 40; k += 4) {
595 for (uint32_t m = 1; m <= 4; m++) {
596 for (uint32_t n = 1; n <= 8; n++) {
597 GemmMicrokernelTester()
598 .mr(4)
599 .nr(8)
600 .kr(1)
601 .sr(1)
602 .m(m)
603 .n(n)
604 .k(k)
605 .iterations(1)
606 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
607 }
608 }
609 }
610 }
611
612 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8) {
613 for (uint32_t n = 9; n < 16; n++) {
614 for (size_t k = 1; k <= 20; k += 5) {
615 GemmMicrokernelTester()
616 .mr(4)
617 .nr(8)
618 .kr(1)
619 .sr(1)
620 .m(4)
621 .n(8)
622 .k(k)
623 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
624 }
625 }
626 }
627
628 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
629 for (uint32_t n = 9; n < 16; n++) {
630 for (size_t k = 1; k <= 20; k += 5) {
631 GemmMicrokernelTester()
632 .mr(4)
633 .nr(8)
634 .kr(1)
635 .sr(1)
636 .m(4)
637 .n(8)
638 .k(k)
639 .cn_stride(11)
640 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
641 }
642 }
643 }
644
645 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
646 for (uint32_t n = 9; n < 16; n++) {
647 for (size_t k = 1; k <= 20; k += 5) {
648 for (uint32_t m = 1; m <= 4; m++) {
649 GemmMicrokernelTester()
650 .mr(4)
651 .nr(8)
652 .kr(1)
653 .sr(1)
654 .m(m)
655 .n(n)
656 .k(k)
657 .iterations(1)
658 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
659 }
660 }
661 }
662 }
663
664 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8) {
665 for (uint32_t n = 16; n <= 24; n += 8) {
666 for (size_t k = 1; k <= 20; k += 5) {
667 GemmMicrokernelTester()
668 .mr(4)
669 .nr(8)
670 .kr(1)
671 .sr(1)
672 .m(4)
673 .n(8)
674 .k(k)
675 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
676 }
677 }
678 }
679
680 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
681 for (uint32_t n = 16; n <= 24; n += 8) {
682 for (size_t k = 1; k <= 20; k += 5) {
683 GemmMicrokernelTester()
684 .mr(4)
685 .nr(8)
686 .kr(1)
687 .sr(1)
688 .m(4)
689 .n(n)
690 .k(k)
691 .cn_stride(11)
692 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
693 }
694 }
695 }
696
697 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
698 for (uint32_t n = 16; n <= 24; n += 8) {
699 for (size_t k = 1; k <= 20; k += 5) {
700 for (uint32_t m = 1; m <= 4; m++) {
701 GemmMicrokernelTester()
702 .mr(4)
703 .nr(8)
704 .kr(1)
705 .sr(1)
706 .m(m)
707 .n(n)
708 .k(k)
709 .iterations(1)
710 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
711 }
712 }
713 }
714 }
715
716 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, small_kernel) {
717 for (size_t k = 1; k <= 20; k += 5) {
718 GemmMicrokernelTester()
719 .mr(4)
720 .nr(8)
721 .kr(1)
722 .sr(1)
723 .m(4)
724 .n(8)
725 .k(k)
726 .ks(3)
727 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
728 }
729 }
730
731 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, small_kernel_subtile) {
732 for (size_t k = 1; k <= 20; k += 5) {
733 for (uint32_t m = 1; m <= 4; m++) {
734 for (uint32_t n = 1; n <= 8; n++) {
735 GemmMicrokernelTester()
736 .mr(4)
737 .nr(8)
738 .kr(1)
739 .sr(1)
740 .m(m)
741 .n(n)
742 .k(k)
743 .ks(3)
744 .iterations(1)
745 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
746 }
747 }
748 }
749 }
750
751 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
752 for (uint32_t n = 9; n < 16; n++) {
753 for (size_t k = 1; k <= 20; k += 5) {
754 GemmMicrokernelTester()
755 .mr(4)
756 .nr(8)
757 .kr(1)
758 .sr(1)
759 .m(4)
760 .n(8)
761 .k(k)
762 .ks(3)
763 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
764 }
765 }
766 }
767
768 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
769 for (uint32_t n = 16; n <= 24; n += 8) {
770 for (size_t k = 1; k <= 20; k += 5) {
771 GemmMicrokernelTester()
772 .mr(4)
773 .nr(8)
774 .kr(1)
775 .sr(1)
776 .m(4)
777 .n(8)
778 .k(k)
779 .ks(3)
780 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
781 }
782 }
783 }
784
785 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
786 for (size_t k = 1; k <= 20; k += 5) {
787 for (uint32_t m = 1; m <= 4; m++) {
788 for (uint32_t n = 1; n <= 8; n++) {
789 GemmMicrokernelTester()
790 .mr(4)
791 .nr(8)
792 .kr(1)
793 .sr(1)
794 .m(m)
795 .n(n)
796 .k(k)
797 .cm_stride(11)
798 .iterations(1)
799 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
800 }
801 }
802 }
803 }
804
805 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, a_offset) {
806 for (size_t k = 1; k <= 20; k += 5) {
807 GemmMicrokernelTester()
808 .mr(4)
809 .nr(8)
810 .kr(1)
811 .sr(1)
812 .m(4)
813 .n(8)
814 .k(k)
815 .ks(3)
816 .a_offset(83)
817 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
818 }
819 }
820
821 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, zero) {
822 for (uint32_t mz = 0; mz < 4; mz++) {
823 for (size_t k = 1; k <= 20; k += 5) {
824 GemmMicrokernelTester()
825 .mr(4)
826 .nr(8)
827 .kr(1)
828 .sr(1)
829 .m(4)
830 .n(8)
831 .k(k)
832 .ks(3)
833 .a_offset(83)
834 .zero_index(mz)
835 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
836 }
837 }
838 }
839
840 TEST(F32_IGEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm) {
841 GemmMicrokernelTester()
842 .mr(4)
843 .nr(8)
844 .kr(1)
845 .sr(1)
846 .m(4)
847 .n(8)
848 .k(4)
849 .cm_stride(11)
850 .Test(xnn_f32_igemm_relu_ukernel_4x8__wasmsimd_splat);
851 }
852#endif // XNN_ARCH_WASMSIMD
853
854
855#if XNN_ARCH_WASMSIMD
856 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700857 GemmMicrokernelTester()
858 .mr(5)
859 .nr(8)
860 .kr(1)
861 .sr(1)
862 .m(5)
863 .n(8)
864 .k(4)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700865 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700866 }
867
Marat Dukhan688f6d82020-07-14 17:02:11 -0700868 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700869 GemmMicrokernelTester()
870 .mr(5)
871 .nr(8)
872 .kr(1)
873 .sr(1)
874 .m(5)
875 .n(8)
876 .k(4)
877 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700878 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700879 }
880
Marat Dukhan688f6d82020-07-14 17:02:11 -0700881 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700882 for (uint32_t m = 1; m <= 5; m++) {
883 for (uint32_t n = 1; n <= 8; n++) {
884 GemmMicrokernelTester()
885 .mr(5)
886 .nr(8)
887 .kr(1)
888 .sr(1)
889 .m(m)
890 .n(n)
891 .k(4)
892 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700893 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700894 }
895 }
896 }
897
Marat Dukhan688f6d82020-07-14 17:02:11 -0700898 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700899 for (uint32_t m = 1; m <= 5; m++) {
900 GemmMicrokernelTester()
901 .mr(5)
902 .nr(8)
903 .kr(1)
904 .sr(1)
905 .m(m)
906 .n(8)
907 .k(4)
908 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700909 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700910 }
911 }
912
Marat Dukhan688f6d82020-07-14 17:02:11 -0700913 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700914 for (uint32_t n = 1; n <= 8; n++) {
915 GemmMicrokernelTester()
916 .mr(5)
917 .nr(8)
918 .kr(1)
919 .sr(1)
920 .m(5)
921 .n(n)
922 .k(4)
923 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700924 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700925 }
926 }
927
Marat Dukhan688f6d82020-07-14 17:02:11 -0700928 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700929 for (size_t k = 1; k < 4; k++) {
930 GemmMicrokernelTester()
931 .mr(5)
932 .nr(8)
933 .kr(1)
934 .sr(1)
935 .m(5)
936 .n(8)
937 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700938 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700939 }
940 }
941
Marat Dukhan688f6d82020-07-14 17:02:11 -0700942 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700943 for (size_t k = 1; k < 4; k++) {
944 for (uint32_t m = 1; m <= 5; m++) {
945 for (uint32_t n = 1; n <= 8; n++) {
946 GemmMicrokernelTester()
947 .mr(5)
948 .nr(8)
949 .kr(1)
950 .sr(1)
951 .m(m)
952 .n(n)
953 .k(k)
954 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700955 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700956 }
957 }
958 }
959 }
960
Marat Dukhan688f6d82020-07-14 17:02:11 -0700961 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700962 for (size_t k = 5; k < 8; k++) {
963 GemmMicrokernelTester()
964 .mr(5)
965 .nr(8)
966 .kr(1)
967 .sr(1)
968 .m(5)
969 .n(8)
970 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700971 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700972 }
973 }
974
Marat Dukhan688f6d82020-07-14 17:02:11 -0700975 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700976 for (size_t k = 5; k < 8; k++) {
977 for (uint32_t m = 1; m <= 5; m++) {
978 for (uint32_t n = 1; n <= 8; n++) {
979 GemmMicrokernelTester()
980 .mr(5)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(m)
985 .n(n)
986 .k(k)
987 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -0700988 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -0700989 }
990 }
991 }
992 }
993
Marat Dukhan688f6d82020-07-14 17:02:11 -0700994 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4) {
Marat Dukhan802808c2020-06-16 11:01:17 -0700995 for (size_t k = 8; k <= 40; k += 4) {
996 GemmMicrokernelTester()
997 .mr(5)
998 .nr(8)
999 .kr(1)
1000 .sr(1)
1001 .m(5)
1002 .n(8)
1003 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001004 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001005 }
1006 }
1007
Marat Dukhan688f6d82020-07-14 17:02:11 -07001008 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001009 for (size_t k = 8; k <= 40; k += 4) {
1010 for (uint32_t m = 1; m <= 5; m++) {
1011 for (uint32_t n = 1; n <= 8; n++) {
1012 GemmMicrokernelTester()
1013 .mr(5)
1014 .nr(8)
1015 .kr(1)
1016 .sr(1)
1017 .m(m)
1018 .n(n)
1019 .k(k)
1020 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001021 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001022 }
1023 }
1024 }
1025 }
1026
Marat Dukhan688f6d82020-07-14 17:02:11 -07001027 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001028 for (uint32_t n = 9; n < 16; n++) {
1029 for (size_t k = 1; k <= 20; k += 5) {
1030 GemmMicrokernelTester()
1031 .mr(5)
1032 .nr(8)
1033 .kr(1)
1034 .sr(1)
1035 .m(5)
1036 .n(8)
1037 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001038 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001039 }
1040 }
1041 }
1042
Marat Dukhan688f6d82020-07-14 17:02:11 -07001043 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001044 for (uint32_t n = 9; n < 16; n++) {
1045 for (size_t k = 1; k <= 20; k += 5) {
1046 GemmMicrokernelTester()
1047 .mr(5)
1048 .nr(8)
1049 .kr(1)
1050 .sr(1)
1051 .m(5)
1052 .n(8)
1053 .k(k)
1054 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001055 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001056 }
1057 }
1058 }
1059
Marat Dukhan688f6d82020-07-14 17:02:11 -07001060 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001061 for (uint32_t n = 9; n < 16; n++) {
1062 for (size_t k = 1; k <= 20; k += 5) {
1063 for (uint32_t m = 1; m <= 5; m++) {
1064 GemmMicrokernelTester()
1065 .mr(5)
1066 .nr(8)
1067 .kr(1)
1068 .sr(1)
1069 .m(m)
1070 .n(n)
1071 .k(k)
1072 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001073 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001074 }
1075 }
1076 }
1077 }
1078
Marat Dukhan688f6d82020-07-14 17:02:11 -07001079 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001080 for (uint32_t n = 16; n <= 24; n += 8) {
1081 for (size_t k = 1; k <= 20; k += 5) {
1082 GemmMicrokernelTester()
1083 .mr(5)
1084 .nr(8)
1085 .kr(1)
1086 .sr(1)
1087 .m(5)
1088 .n(8)
1089 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001090 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001091 }
1092 }
1093 }
1094
Marat Dukhan688f6d82020-07-14 17:02:11 -07001095 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001096 for (uint32_t n = 16; n <= 24; n += 8) {
1097 for (size_t k = 1; k <= 20; k += 5) {
1098 GemmMicrokernelTester()
1099 .mr(5)
1100 .nr(8)
1101 .kr(1)
1102 .sr(1)
1103 .m(5)
1104 .n(n)
1105 .k(k)
1106 .cn_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001107 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001108 }
1109 }
1110 }
1111
Marat Dukhan688f6d82020-07-14 17:02:11 -07001112 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001113 for (uint32_t n = 16; n <= 24; n += 8) {
1114 for (size_t k = 1; k <= 20; k += 5) {
1115 for (uint32_t m = 1; m <= 5; m++) {
1116 GemmMicrokernelTester()
1117 .mr(5)
1118 .nr(8)
1119 .kr(1)
1120 .sr(1)
1121 .m(m)
1122 .n(n)
1123 .k(k)
1124 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001125 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001126 }
1127 }
1128 }
1129 }
1130
Marat Dukhan688f6d82020-07-14 17:02:11 -07001131 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001132 for (size_t k = 1; k <= 20; k += 5) {
1133 GemmMicrokernelTester()
1134 .mr(5)
1135 .nr(8)
1136 .kr(1)
1137 .sr(1)
1138 .m(5)
1139 .n(8)
1140 .k(k)
1141 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001142 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001143 }
1144 }
1145
Marat Dukhan688f6d82020-07-14 17:02:11 -07001146 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, small_kernel_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001147 for (size_t k = 1; k <= 20; k += 5) {
1148 for (uint32_t m = 1; m <= 5; m++) {
1149 for (uint32_t n = 1; n <= 8; n++) {
1150 GemmMicrokernelTester()
1151 .mr(5)
1152 .nr(8)
1153 .kr(1)
1154 .sr(1)
1155 .m(m)
1156 .n(n)
1157 .k(k)
1158 .ks(3)
1159 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001160 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001161 }
1162 }
1163 }
1164 }
1165
Marat Dukhan688f6d82020-07-14 17:02:11 -07001166 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001167 for (uint32_t n = 9; n < 16; n++) {
1168 for (size_t k = 1; k <= 20; k += 5) {
1169 GemmMicrokernelTester()
1170 .mr(5)
1171 .nr(8)
1172 .kr(1)
1173 .sr(1)
1174 .m(5)
1175 .n(8)
1176 .k(k)
1177 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001178 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001179 }
1180 }
1181 }
1182
Marat Dukhan688f6d82020-07-14 17:02:11 -07001183 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001184 for (uint32_t n = 16; n <= 24; n += 8) {
1185 for (size_t k = 1; k <= 20; k += 5) {
1186 GemmMicrokernelTester()
1187 .mr(5)
1188 .nr(8)
1189 .kr(1)
1190 .sr(1)
1191 .m(5)
1192 .n(8)
1193 .k(k)
1194 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001195 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001196 }
1197 }
1198 }
1199
Marat Dukhan688f6d82020-07-14 17:02:11 -07001200 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001201 for (size_t k = 1; k <= 20; k += 5) {
1202 for (uint32_t m = 1; m <= 5; m++) {
1203 for (uint32_t n = 1; n <= 8; n++) {
1204 GemmMicrokernelTester()
1205 .mr(5)
1206 .nr(8)
1207 .kr(1)
1208 .sr(1)
1209 .m(m)
1210 .n(n)
1211 .k(k)
1212 .cm_stride(11)
1213 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001214 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001215 }
1216 }
1217 }
1218 }
1219
Marat Dukhan688f6d82020-07-14 17:02:11 -07001220 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, a_offset) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001221 for (size_t k = 1; k <= 20; k += 5) {
1222 GemmMicrokernelTester()
1223 .mr(5)
1224 .nr(8)
1225 .kr(1)
1226 .sr(1)
1227 .m(5)
1228 .n(8)
1229 .k(k)
1230 .ks(3)
1231 .a_offset(103)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001232 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001233 }
1234 }
1235
Marat Dukhan688f6d82020-07-14 17:02:11 -07001236 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, zero) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001237 for (uint32_t mz = 0; mz < 5; mz++) {
1238 for (size_t k = 1; k <= 20; k += 5) {
1239 GemmMicrokernelTester()
1240 .mr(5)
1241 .nr(8)
1242 .kr(1)
1243 .sr(1)
1244 .m(5)
1245 .n(8)
1246 .k(k)
1247 .ks(3)
1248 .a_offset(103)
1249 .zero_index(mz)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001250 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001251 }
1252 }
1253 }
1254
Marat Dukhan688f6d82020-07-14 17:02:11 -07001255 TEST(F32_IGEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm) {
Marat Dukhan802808c2020-06-16 11:01:17 -07001256 GemmMicrokernelTester()
1257 .mr(5)
1258 .nr(8)
1259 .kr(1)
1260 .sr(1)
1261 .m(5)
1262 .n(8)
1263 .k(4)
1264 .cm_stride(11)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001265 .Test(xnn_f32_igemm_relu_ukernel_5x8__wasmsimd_splat);
Marat Dukhan802808c2020-06-16 11:01:17 -07001266 }
1267#endif // XNN_ARCH_WASMSIMD
1268
1269
1270#if XNN_ARCH_WASMSIMD
Marat Dukhan688f6d82020-07-14 17:02:11 -07001271 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001272 GemmMicrokernelTester()
1273 .mr(4)
1274 .nr(2)
1275 .kr(4)
1276 .sr(1)
1277 .m(4)
1278 .n(2)
1279 .k(4)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001280 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001281 }
1282
Marat Dukhan688f6d82020-07-14 17:02:11 -07001283 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cn) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001284 GemmMicrokernelTester()
1285 .mr(4)
1286 .nr(2)
1287 .kr(4)
1288 .sr(1)
1289 .m(4)
1290 .n(2)
1291 .k(4)
1292 .cn_stride(5)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001293 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001294 }
1295
Marat Dukhan688f6d82020-07-14 17:02:11 -07001296 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001297 for (uint32_t m = 1; m <= 4; m++) {
1298 for (uint32_t n = 1; n <= 2; n++) {
1299 GemmMicrokernelTester()
1300 .mr(4)
1301 .nr(2)
1302 .kr(4)
1303 .sr(1)
1304 .m(m)
1305 .n(n)
1306 .k(4)
1307 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001308 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001309 }
1310 }
1311 }
1312
Marat Dukhan688f6d82020-07-14 17:02:11 -07001313 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001314 for (uint32_t m = 1; m <= 4; m++) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(2)
1318 .kr(4)
1319 .sr(1)
1320 .m(m)
1321 .n(2)
1322 .k(4)
1323 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001324 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001325 }
1326 }
1327
Marat Dukhan688f6d82020-07-14 17:02:11 -07001328 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001329 for (uint32_t n = 1; n <= 2; n++) {
1330 GemmMicrokernelTester()
1331 .mr(4)
1332 .nr(2)
1333 .kr(4)
1334 .sr(1)
1335 .m(4)
1336 .n(n)
1337 .k(4)
1338 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001339 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001340 }
1341 }
1342
Marat Dukhan688f6d82020-07-14 17:02:11 -07001343 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_lt_4) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001344 for (size_t k = 1; k < 4; k++) {
1345 GemmMicrokernelTester()
1346 .mr(4)
1347 .nr(2)
1348 .kr(4)
1349 .sr(1)
1350 .m(4)
1351 .n(2)
1352 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001353 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001354 }
1355 }
1356
Marat Dukhan688f6d82020-07-14 17:02:11 -07001357 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_lt_4_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001358 for (size_t k = 1; k < 4; k++) {
1359 for (uint32_t m = 1; m <= 4; m++) {
1360 for (uint32_t n = 1; n <= 2; n++) {
1361 GemmMicrokernelTester()
1362 .mr(4)
1363 .nr(2)
1364 .kr(4)
1365 .sr(1)
1366 .m(m)
1367 .n(n)
1368 .k(k)
1369 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001370 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001371 }
1372 }
1373 }
1374 }
1375
Marat Dukhan688f6d82020-07-14 17:02:11 -07001376 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_gt_4) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001377 for (size_t k = 5; k < 8; k++) {
1378 GemmMicrokernelTester()
1379 .mr(4)
1380 .nr(2)
1381 .kr(4)
1382 .sr(1)
1383 .m(4)
1384 .n(2)
1385 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001386 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001387 }
1388 }
1389
Marat Dukhan688f6d82020-07-14 17:02:11 -07001390 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_gt_4_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001391 for (size_t k = 5; k < 8; k++) {
1392 for (uint32_t m = 1; m <= 4; m++) {
1393 for (uint32_t n = 1; n <= 2; n++) {
1394 GemmMicrokernelTester()
1395 .mr(4)
1396 .nr(2)
1397 .kr(4)
1398 .sr(1)
1399 .m(m)
1400 .n(n)
1401 .k(k)
1402 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001403 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001404 }
1405 }
1406 }
1407 }
1408
Marat Dukhan688f6d82020-07-14 17:02:11 -07001409 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_div_4) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001410 for (size_t k = 8; k <= 40; k += 4) {
1411 GemmMicrokernelTester()
1412 .mr(4)
1413 .nr(2)
1414 .kr(4)
1415 .sr(1)
1416 .m(4)
1417 .n(2)
1418 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001419 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001420 }
1421 }
1422
Marat Dukhan688f6d82020-07-14 17:02:11 -07001423 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, k_div_4_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001424 for (size_t k = 8; k <= 40; k += 4) {
1425 for (uint32_t m = 1; m <= 4; m++) {
1426 for (uint32_t n = 1; n <= 2; n++) {
1427 GemmMicrokernelTester()
1428 .mr(4)
1429 .nr(2)
1430 .kr(4)
1431 .sr(1)
1432 .m(m)
1433 .n(n)
1434 .k(k)
1435 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001436 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001437 }
1438 }
1439 }
1440 }
1441
Marat Dukhan688f6d82020-07-14 17:02:11 -07001442 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001443 for (uint32_t n = 3; n < 4; n++) {
1444 for (size_t k = 1; k <= 20; k += 5) {
1445 GemmMicrokernelTester()
1446 .mr(4)
1447 .nr(2)
1448 .kr(4)
1449 .sr(1)
1450 .m(4)
1451 .n(2)
1452 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001453 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001454 }
1455 }
1456 }
1457
Marat Dukhan688f6d82020-07-14 17:02:11 -07001458 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001459 for (uint32_t n = 3; n < 4; n++) {
1460 for (size_t k = 1; k <= 20; k += 5) {
1461 GemmMicrokernelTester()
1462 .mr(4)
1463 .nr(2)
1464 .kr(4)
1465 .sr(1)
1466 .m(4)
1467 .n(2)
1468 .k(k)
1469 .cn_stride(5)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001470 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001471 }
1472 }
1473 }
1474
Marat Dukhan688f6d82020-07-14 17:02:11 -07001475 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001476 for (uint32_t n = 3; n < 4; n++) {
1477 for (size_t k = 1; k <= 20; k += 5) {
1478 for (uint32_t m = 1; m <= 4; m++) {
1479 GemmMicrokernelTester()
1480 .mr(4)
1481 .nr(2)
1482 .kr(4)
1483 .sr(1)
1484 .m(m)
1485 .n(n)
1486 .k(k)
1487 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001488 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001489 }
1490 }
1491 }
1492 }
1493
Marat Dukhan688f6d82020-07-14 17:02:11 -07001494 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001495 for (uint32_t n = 4; n <= 6; n += 2) {
1496 for (size_t k = 1; k <= 20; k += 5) {
1497 GemmMicrokernelTester()
1498 .mr(4)
1499 .nr(2)
1500 .kr(4)
1501 .sr(1)
1502 .m(4)
1503 .n(2)
1504 .k(k)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001505 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001506 }
1507 }
1508 }
1509
Marat Dukhan688f6d82020-07-14 17:02:11 -07001510 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_cn) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001511 for (uint32_t n = 4; n <= 6; n += 2) {
1512 for (size_t k = 1; k <= 20; k += 5) {
1513 GemmMicrokernelTester()
1514 .mr(4)
1515 .nr(2)
1516 .kr(4)
1517 .sr(1)
1518 .m(4)
1519 .n(n)
1520 .k(k)
1521 .cn_stride(5)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001522 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001523 }
1524 }
1525 }
1526
Marat Dukhan688f6d82020-07-14 17:02:11 -07001527 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001528 for (uint32_t n = 4; n <= 6; n += 2) {
1529 for (size_t k = 1; k <= 20; k += 5) {
1530 for (uint32_t m = 1; m <= 4; m++) {
1531 GemmMicrokernelTester()
1532 .mr(4)
1533 .nr(2)
1534 .kr(4)
1535 .sr(1)
1536 .m(m)
1537 .n(n)
1538 .k(k)
1539 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001540 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001541 }
1542 }
1543 }
1544 }
1545
Marat Dukhan688f6d82020-07-14 17:02:11 -07001546 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, small_kernel) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001547 for (size_t k = 1; k <= 20; k += 5) {
1548 GemmMicrokernelTester()
1549 .mr(4)
1550 .nr(2)
1551 .kr(4)
1552 .sr(1)
1553 .m(4)
1554 .n(2)
1555 .k(k)
1556 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001557 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001558 }
1559 }
1560
Marat Dukhan688f6d82020-07-14 17:02:11 -07001561 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, small_kernel_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001562 for (size_t k = 1; k <= 20; k += 5) {
1563 for (uint32_t m = 1; m <= 4; m++) {
1564 for (uint32_t n = 1; n <= 2; n++) {
1565 GemmMicrokernelTester()
1566 .mr(4)
1567 .nr(2)
1568 .kr(4)
1569 .sr(1)
1570 .m(m)
1571 .n(n)
1572 .k(k)
1573 .ks(3)
1574 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001575 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001576 }
1577 }
1578 }
1579 }
1580
Marat Dukhan688f6d82020-07-14 17:02:11 -07001581 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_gt_2_small_kernel) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001582 for (uint32_t n = 3; n < 4; n++) {
1583 for (size_t k = 1; k <= 20; k += 5) {
1584 GemmMicrokernelTester()
1585 .mr(4)
1586 .nr(2)
1587 .kr(4)
1588 .sr(1)
1589 .m(4)
1590 .n(2)
1591 .k(k)
1592 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001593 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001594 }
1595 }
1596 }
1597
Marat Dukhan688f6d82020-07-14 17:02:11 -07001598 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, n_div_2_small_kernel) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001599 for (uint32_t n = 4; n <= 6; n += 2) {
1600 for (size_t k = 1; k <= 20; k += 5) {
1601 GemmMicrokernelTester()
1602 .mr(4)
1603 .nr(2)
1604 .kr(4)
1605 .sr(1)
1606 .m(4)
1607 .n(2)
1608 .k(k)
1609 .ks(3)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001610 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001611 }
1612 }
1613 }
1614
Marat Dukhan688f6d82020-07-14 17:02:11 -07001615 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cm_subtile) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001616 for (size_t k = 1; k <= 20; k += 5) {
1617 for (uint32_t m = 1; m <= 4; m++) {
1618 for (uint32_t n = 1; n <= 2; n++) {
1619 GemmMicrokernelTester()
1620 .mr(4)
1621 .nr(2)
1622 .kr(4)
1623 .sr(1)
1624 .m(m)
1625 .n(n)
1626 .k(k)
1627 .cm_stride(5)
1628 .iterations(1)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001629 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001630 }
1631 }
1632 }
1633 }
1634
Marat Dukhan688f6d82020-07-14 17:02:11 -07001635 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, a_offset) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001636 for (size_t k = 1; k <= 20; k += 5) {
1637 GemmMicrokernelTester()
1638 .mr(4)
1639 .nr(2)
1640 .kr(4)
1641 .sr(1)
1642 .m(4)
1643 .n(2)
1644 .k(k)
1645 .ks(3)
1646 .a_offset(83)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001647 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001648 }
1649 }
1650
Marat Dukhan688f6d82020-07-14 17:02:11 -07001651 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, zero) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001652 for (uint32_t mz = 0; mz < 4; mz++) {
1653 for (size_t k = 1; k <= 20; k += 5) {
1654 GemmMicrokernelTester()
1655 .mr(4)
1656 .nr(2)
1657 .kr(4)
1658 .sr(1)
1659 .m(4)
1660 .n(2)
1661 .k(k)
1662 .ks(3)
1663 .a_offset(83)
1664 .zero_index(mz)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001665 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001666 }
1667 }
1668 }
1669
Marat Dukhan688f6d82020-07-14 17:02:11 -07001670 TEST(F32_IGEMM_RELU_4X2C4__WASMSIMD, strided_cm) {
Marat Dukhane39e6462020-07-09 01:33:36 -07001671 GemmMicrokernelTester()
1672 .mr(4)
1673 .nr(2)
1674 .kr(4)
1675 .sr(1)
1676 .m(4)
1677 .n(2)
1678 .k(4)
1679 .cm_stride(5)
Marat Dukhan688f6d82020-07-14 17:02:11 -07001680 .Test(xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd);
Marat Dukhane39e6462020-07-09 01:33:36 -07001681 }
1682#endif // XNN_ARCH_WASMSIMD
1683
1684
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07001685#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07001686 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1) {
1687 GemmMicrokernelTester()
1688 .mr(1)
1689 .nr(4)
1690 .kr(1)
1691 .sr(1)
1692 .m(1)
1693 .n(4)
1694 .k(1)
1695 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1696 }
1697
1698 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cn) {
1699 GemmMicrokernelTester()
1700 .mr(1)
1701 .nr(4)
1702 .kr(1)
1703 .sr(1)
1704 .m(1)
1705 .n(4)
1706 .k(1)
1707 .cn_stride(7)
1708 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1709 }
1710
1711 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile) {
1712 for (uint32_t m = 1; m <= 1; m++) {
1713 for (uint32_t n = 1; n <= 4; n++) {
1714 GemmMicrokernelTester()
1715 .mr(1)
1716 .nr(4)
1717 .kr(1)
1718 .sr(1)
1719 .m(m)
1720 .n(n)
1721 .k(1)
1722 .iterations(1)
1723 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1724 }
1725 }
1726 }
1727
1728 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile_m) {
1729 for (uint32_t m = 1; m <= 1; m++) {
1730 GemmMicrokernelTester()
1731 .mr(1)
1732 .nr(4)
1733 .kr(1)
1734 .sr(1)
1735 .m(m)
1736 .n(4)
1737 .k(1)
1738 .iterations(1)
1739 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1740 }
1741 }
1742
1743 TEST(F32_IGEMM_RELU_1X4__WASM, k_eq_1_subtile_n) {
1744 for (uint32_t n = 1; n <= 4; n++) {
1745 GemmMicrokernelTester()
1746 .mr(1)
1747 .nr(4)
1748 .kr(1)
1749 .sr(1)
1750 .m(1)
1751 .n(n)
1752 .k(1)
1753 .iterations(1)
1754 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1755 }
1756 }
1757
1758 TEST(F32_IGEMM_RELU_1X4__WASM, k_gt_1) {
1759 for (size_t k = 2; k < 10; k++) {
1760 GemmMicrokernelTester()
1761 .mr(1)
1762 .nr(4)
1763 .kr(1)
1764 .sr(1)
1765 .m(1)
1766 .n(4)
1767 .k(k)
1768 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1769 }
1770 }
1771
1772 TEST(F32_IGEMM_RELU_1X4__WASM, k_gt_1_subtile) {
1773 for (size_t k = 2; k < 10; k++) {
1774 for (uint32_t m = 1; m <= 1; m++) {
1775 for (uint32_t n = 1; n <= 4; n++) {
1776 GemmMicrokernelTester()
1777 .mr(1)
1778 .nr(4)
1779 .kr(1)
1780 .sr(1)
1781 .m(m)
1782 .n(n)
1783 .k(k)
1784 .iterations(1)
1785 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1786 }
1787 }
1788 }
1789 }
1790
1791 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4) {
1792 for (uint32_t n = 5; n < 8; n++) {
1793 for (size_t k = 1; k <= 5; k += 2) {
1794 GemmMicrokernelTester()
1795 .mr(1)
1796 .nr(4)
1797 .kr(1)
1798 .sr(1)
1799 .m(1)
1800 .n(4)
1801 .k(k)
1802 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1803 }
1804 }
1805 }
1806
1807 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_strided_cn) {
1808 for (uint32_t n = 5; n < 8; n++) {
1809 for (size_t k = 1; k <= 5; k += 2) {
1810 GemmMicrokernelTester()
1811 .mr(1)
1812 .nr(4)
1813 .kr(1)
1814 .sr(1)
1815 .m(1)
1816 .n(4)
1817 .k(k)
1818 .cn_stride(7)
1819 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1820 }
1821 }
1822 }
1823
1824 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_subtile) {
1825 for (uint32_t n = 5; n < 8; n++) {
1826 for (size_t k = 1; k <= 5; k += 2) {
1827 for (uint32_t m = 1; m <= 1; m++) {
1828 GemmMicrokernelTester()
1829 .mr(1)
1830 .nr(4)
1831 .kr(1)
1832 .sr(1)
1833 .m(m)
1834 .n(n)
1835 .k(k)
1836 .iterations(1)
1837 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1838 }
1839 }
1840 }
1841 }
1842
1843 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4) {
1844 for (uint32_t n = 8; n <= 12; n += 4) {
1845 for (size_t k = 1; k <= 5; k += 2) {
1846 GemmMicrokernelTester()
1847 .mr(1)
1848 .nr(4)
1849 .kr(1)
1850 .sr(1)
1851 .m(1)
1852 .n(4)
1853 .k(k)
1854 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1855 }
1856 }
1857 }
1858
1859 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_strided_cn) {
1860 for (uint32_t n = 8; n <= 12; n += 4) {
1861 for (size_t k = 1; k <= 5; k += 2) {
1862 GemmMicrokernelTester()
1863 .mr(1)
1864 .nr(4)
1865 .kr(1)
1866 .sr(1)
1867 .m(1)
1868 .n(n)
1869 .k(k)
1870 .cn_stride(7)
1871 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1872 }
1873 }
1874 }
1875
1876 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_subtile) {
1877 for (uint32_t n = 8; n <= 12; n += 4) {
1878 for (size_t k = 1; k <= 5; k += 2) {
1879 for (uint32_t m = 1; m <= 1; m++) {
1880 GemmMicrokernelTester()
1881 .mr(1)
1882 .nr(4)
1883 .kr(1)
1884 .sr(1)
1885 .m(m)
1886 .n(n)
1887 .k(k)
1888 .iterations(1)
1889 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1890 }
1891 }
1892 }
1893 }
1894
1895 TEST(F32_IGEMM_RELU_1X4__WASM, small_kernel) {
1896 for (size_t k = 1; k <= 5; k += 2) {
1897 GemmMicrokernelTester()
1898 .mr(1)
1899 .nr(4)
1900 .kr(1)
1901 .sr(1)
1902 .m(1)
1903 .n(4)
1904 .k(k)
1905 .ks(3)
1906 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1907 }
1908 }
1909
1910 TEST(F32_IGEMM_RELU_1X4__WASM, small_kernel_subtile) {
1911 for (size_t k = 1; k <= 5; k += 2) {
1912 for (uint32_t m = 1; m <= 1; m++) {
1913 for (uint32_t n = 1; n <= 4; n++) {
1914 GemmMicrokernelTester()
1915 .mr(1)
1916 .nr(4)
1917 .kr(1)
1918 .sr(1)
1919 .m(m)
1920 .n(n)
1921 .k(k)
1922 .ks(3)
1923 .iterations(1)
1924 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1925 }
1926 }
1927 }
1928 }
1929
1930 TEST(F32_IGEMM_RELU_1X4__WASM, n_gt_4_small_kernel) {
1931 for (uint32_t n = 5; n < 8; n++) {
1932 for (size_t k = 1; k <= 5; k += 2) {
1933 GemmMicrokernelTester()
1934 .mr(1)
1935 .nr(4)
1936 .kr(1)
1937 .sr(1)
1938 .m(1)
1939 .n(4)
1940 .k(k)
1941 .ks(3)
1942 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1943 }
1944 }
1945 }
1946
1947 TEST(F32_IGEMM_RELU_1X4__WASM, n_div_4_small_kernel) {
1948 for (uint32_t n = 8; n <= 12; n += 4) {
1949 for (size_t k = 1; k <= 5; k += 2) {
1950 GemmMicrokernelTester()
1951 .mr(1)
1952 .nr(4)
1953 .kr(1)
1954 .sr(1)
1955 .m(1)
1956 .n(4)
1957 .k(k)
1958 .ks(3)
1959 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1960 }
1961 }
1962 }
1963
1964 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cm_subtile) {
1965 for (size_t k = 1; k <= 5; k += 2) {
1966 for (uint32_t m = 1; m <= 1; m++) {
1967 for (uint32_t n = 1; n <= 4; n++) {
1968 GemmMicrokernelTester()
1969 .mr(1)
1970 .nr(4)
1971 .kr(1)
1972 .sr(1)
1973 .m(m)
1974 .n(n)
1975 .k(k)
1976 .cm_stride(7)
1977 .iterations(1)
1978 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1979 }
1980 }
1981 }
1982 }
1983
1984 TEST(F32_IGEMM_RELU_1X4__WASM, a_offset) {
1985 for (size_t k = 1; k <= 5; k += 2) {
1986 GemmMicrokernelTester()
1987 .mr(1)
1988 .nr(4)
1989 .kr(1)
1990 .sr(1)
1991 .m(1)
1992 .n(4)
1993 .k(k)
1994 .ks(3)
1995 .a_offset(7)
1996 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
1997 }
1998 }
1999
2000 TEST(F32_IGEMM_RELU_1X4__WASM, zero) {
2001 for (uint32_t mz = 0; mz < 1; mz++) {
2002 for (size_t k = 1; k <= 5; k += 2) {
2003 GemmMicrokernelTester()
2004 .mr(1)
2005 .nr(4)
2006 .kr(1)
2007 .sr(1)
2008 .m(1)
2009 .n(4)
2010 .k(k)
2011 .ks(3)
2012 .a_offset(7)
2013 .zero_index(mz)
2014 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
2015 }
2016 }
2017 }
2018
2019 TEST(F32_IGEMM_RELU_1X4__WASM, strided_cm) {
2020 GemmMicrokernelTester()
2021 .mr(1)
2022 .nr(4)
2023 .kr(1)
2024 .sr(1)
2025 .m(1)
2026 .n(4)
2027 .k(1)
2028 .cm_stride(7)
2029 .Test(xnn_f32_igemm_relu_ukernel_1x4__wasm);
2030 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07002031#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07002032
2033
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07002034#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07002035 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1) {
2036 GemmMicrokernelTester()
2037 .mr(2)
2038 .nr(4)
2039 .kr(1)
2040 .sr(1)
2041 .m(2)
2042 .n(4)
2043 .k(1)
2044 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2045 }
2046
2047 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cn) {
2048 GemmMicrokernelTester()
2049 .mr(2)
2050 .nr(4)
2051 .kr(1)
2052 .sr(1)
2053 .m(2)
2054 .n(4)
2055 .k(1)
2056 .cn_stride(7)
2057 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2058 }
2059
2060 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile) {
2061 for (uint32_t m = 1; m <= 2; m++) {
2062 for (uint32_t n = 1; n <= 4; n++) {
2063 GemmMicrokernelTester()
2064 .mr(2)
2065 .nr(4)
2066 .kr(1)
2067 .sr(1)
2068 .m(m)
2069 .n(n)
2070 .k(1)
2071 .iterations(1)
2072 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2073 }
2074 }
2075 }
2076
2077 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile_m) {
2078 for (uint32_t m = 1; m <= 2; m++) {
2079 GemmMicrokernelTester()
2080 .mr(2)
2081 .nr(4)
2082 .kr(1)
2083 .sr(1)
2084 .m(m)
2085 .n(4)
2086 .k(1)
2087 .iterations(1)
2088 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2089 }
2090 }
2091
2092 TEST(F32_IGEMM_RELU_2X4__WASM, k_eq_1_subtile_n) {
2093 for (uint32_t n = 1; n <= 4; n++) {
2094 GemmMicrokernelTester()
2095 .mr(2)
2096 .nr(4)
2097 .kr(1)
2098 .sr(1)
2099 .m(2)
2100 .n(n)
2101 .k(1)
2102 .iterations(1)
2103 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2104 }
2105 }
2106
2107 TEST(F32_IGEMM_RELU_2X4__WASM, k_gt_1) {
2108 for (size_t k = 2; k < 10; k++) {
2109 GemmMicrokernelTester()
2110 .mr(2)
2111 .nr(4)
2112 .kr(1)
2113 .sr(1)
2114 .m(2)
2115 .n(4)
2116 .k(k)
2117 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2118 }
2119 }
2120
2121 TEST(F32_IGEMM_RELU_2X4__WASM, k_gt_1_subtile) {
2122 for (size_t k = 2; k < 10; k++) {
2123 for (uint32_t m = 1; m <= 2; m++) {
2124 for (uint32_t n = 1; n <= 4; n++) {
2125 GemmMicrokernelTester()
2126 .mr(2)
2127 .nr(4)
2128 .kr(1)
2129 .sr(1)
2130 .m(m)
2131 .n(n)
2132 .k(k)
2133 .iterations(1)
2134 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2135 }
2136 }
2137 }
2138 }
2139
2140 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4) {
2141 for (uint32_t n = 5; n < 8; n++) {
2142 for (size_t k = 1; k <= 5; k += 2) {
2143 GemmMicrokernelTester()
2144 .mr(2)
2145 .nr(4)
2146 .kr(1)
2147 .sr(1)
2148 .m(2)
2149 .n(4)
2150 .k(k)
2151 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2152 }
2153 }
2154 }
2155
2156 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_strided_cn) {
2157 for (uint32_t n = 5; n < 8; n++) {
2158 for (size_t k = 1; k <= 5; k += 2) {
2159 GemmMicrokernelTester()
2160 .mr(2)
2161 .nr(4)
2162 .kr(1)
2163 .sr(1)
2164 .m(2)
2165 .n(4)
2166 .k(k)
2167 .cn_stride(7)
2168 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2169 }
2170 }
2171 }
2172
2173 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_subtile) {
2174 for (uint32_t n = 5; n < 8; n++) {
2175 for (size_t k = 1; k <= 5; k += 2) {
2176 for (uint32_t m = 1; m <= 2; m++) {
2177 GemmMicrokernelTester()
2178 .mr(2)
2179 .nr(4)
2180 .kr(1)
2181 .sr(1)
2182 .m(m)
2183 .n(n)
2184 .k(k)
2185 .iterations(1)
2186 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2187 }
2188 }
2189 }
2190 }
2191
2192 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4) {
2193 for (uint32_t n = 8; n <= 12; n += 4) {
2194 for (size_t k = 1; k <= 5; k += 2) {
2195 GemmMicrokernelTester()
2196 .mr(2)
2197 .nr(4)
2198 .kr(1)
2199 .sr(1)
2200 .m(2)
2201 .n(4)
2202 .k(k)
2203 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2204 }
2205 }
2206 }
2207
2208 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_strided_cn) {
2209 for (uint32_t n = 8; n <= 12; n += 4) {
2210 for (size_t k = 1; k <= 5; k += 2) {
2211 GemmMicrokernelTester()
2212 .mr(2)
2213 .nr(4)
2214 .kr(1)
2215 .sr(1)
2216 .m(2)
2217 .n(n)
2218 .k(k)
2219 .cn_stride(7)
2220 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2221 }
2222 }
2223 }
2224
2225 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_subtile) {
2226 for (uint32_t n = 8; n <= 12; n += 4) {
2227 for (size_t k = 1; k <= 5; k += 2) {
2228 for (uint32_t m = 1; m <= 2; m++) {
2229 GemmMicrokernelTester()
2230 .mr(2)
2231 .nr(4)
2232 .kr(1)
2233 .sr(1)
2234 .m(m)
2235 .n(n)
2236 .k(k)
2237 .iterations(1)
2238 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2239 }
2240 }
2241 }
2242 }
2243
2244 TEST(F32_IGEMM_RELU_2X4__WASM, small_kernel) {
2245 for (size_t k = 1; k <= 5; k += 2) {
2246 GemmMicrokernelTester()
2247 .mr(2)
2248 .nr(4)
2249 .kr(1)
2250 .sr(1)
2251 .m(2)
2252 .n(4)
2253 .k(k)
2254 .ks(3)
2255 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2256 }
2257 }
2258
2259 TEST(F32_IGEMM_RELU_2X4__WASM, small_kernel_subtile) {
2260 for (size_t k = 1; k <= 5; k += 2) {
2261 for (uint32_t m = 1; m <= 2; m++) {
2262 for (uint32_t n = 1; n <= 4; n++) {
2263 GemmMicrokernelTester()
2264 .mr(2)
2265 .nr(4)
2266 .kr(1)
2267 .sr(1)
2268 .m(m)
2269 .n(n)
2270 .k(k)
2271 .ks(3)
2272 .iterations(1)
2273 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2274 }
2275 }
2276 }
2277 }
2278
2279 TEST(F32_IGEMM_RELU_2X4__WASM, n_gt_4_small_kernel) {
2280 for (uint32_t n = 5; n < 8; n++) {
2281 for (size_t k = 1; k <= 5; k += 2) {
2282 GemmMicrokernelTester()
2283 .mr(2)
2284 .nr(4)
2285 .kr(1)
2286 .sr(1)
2287 .m(2)
2288 .n(4)
2289 .k(k)
2290 .ks(3)
2291 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2292 }
2293 }
2294 }
2295
2296 TEST(F32_IGEMM_RELU_2X4__WASM, n_div_4_small_kernel) {
2297 for (uint32_t n = 8; n <= 12; n += 4) {
2298 for (size_t k = 1; k <= 5; k += 2) {
2299 GemmMicrokernelTester()
2300 .mr(2)
2301 .nr(4)
2302 .kr(1)
2303 .sr(1)
2304 .m(2)
2305 .n(4)
2306 .k(k)
2307 .ks(3)
2308 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2309 }
2310 }
2311 }
2312
2313 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cm_subtile) {
2314 for (size_t k = 1; k <= 5; k += 2) {
2315 for (uint32_t m = 1; m <= 2; m++) {
2316 for (uint32_t n = 1; n <= 4; n++) {
2317 GemmMicrokernelTester()
2318 .mr(2)
2319 .nr(4)
2320 .kr(1)
2321 .sr(1)
2322 .m(m)
2323 .n(n)
2324 .k(k)
2325 .cm_stride(7)
2326 .iterations(1)
2327 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2328 }
2329 }
2330 }
2331 }
2332
2333 TEST(F32_IGEMM_RELU_2X4__WASM, a_offset) {
2334 for (size_t k = 1; k <= 5; k += 2) {
2335 GemmMicrokernelTester()
2336 .mr(2)
2337 .nr(4)
2338 .kr(1)
2339 .sr(1)
2340 .m(2)
2341 .n(4)
2342 .k(k)
2343 .ks(3)
2344 .a_offset(13)
2345 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2346 }
2347 }
2348
2349 TEST(F32_IGEMM_RELU_2X4__WASM, zero) {
2350 for (uint32_t mz = 0; mz < 2; mz++) {
2351 for (size_t k = 1; k <= 5; k += 2) {
2352 GemmMicrokernelTester()
2353 .mr(2)
2354 .nr(4)
2355 .kr(1)
2356 .sr(1)
2357 .m(2)
2358 .n(4)
2359 .k(k)
2360 .ks(3)
2361 .a_offset(13)
2362 .zero_index(mz)
2363 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2364 }
2365 }
2366 }
2367
2368 TEST(F32_IGEMM_RELU_2X4__WASM, strided_cm) {
2369 GemmMicrokernelTester()
2370 .mr(2)
2371 .nr(4)
2372 .kr(1)
2373 .sr(1)
2374 .m(2)
2375 .n(4)
2376 .k(1)
2377 .cm_stride(7)
2378 .Test(xnn_f32_igemm_relu_ukernel_2x4__wasm);
2379 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07002380#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07002381
2382
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07002383#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07002384 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1) {
2385 GemmMicrokernelTester()
2386 .mr(4)
2387 .nr(4)
2388 .kr(1)
2389 .sr(1)
2390 .m(4)
2391 .n(4)
2392 .k(1)
2393 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2394 }
2395
2396 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cn) {
2397 GemmMicrokernelTester()
2398 .mr(4)
2399 .nr(4)
2400 .kr(1)
2401 .sr(1)
2402 .m(4)
2403 .n(4)
2404 .k(1)
2405 .cn_stride(7)
2406 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2407 }
2408
2409 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile) {
2410 for (uint32_t m = 1; m <= 4; m++) {
2411 for (uint32_t n = 1; n <= 4; n++) {
2412 GemmMicrokernelTester()
2413 .mr(4)
2414 .nr(4)
2415 .kr(1)
2416 .sr(1)
2417 .m(m)
2418 .n(n)
2419 .k(1)
2420 .iterations(1)
2421 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2422 }
2423 }
2424 }
2425
2426 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile_m) {
2427 for (uint32_t m = 1; m <= 4; m++) {
2428 GemmMicrokernelTester()
2429 .mr(4)
2430 .nr(4)
2431 .kr(1)
2432 .sr(1)
2433 .m(m)
2434 .n(4)
2435 .k(1)
2436 .iterations(1)
2437 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2438 }
2439 }
2440
2441 TEST(F32_IGEMM_RELU_4X4__WASM, k_eq_1_subtile_n) {
2442 for (uint32_t n = 1; n <= 4; n++) {
2443 GemmMicrokernelTester()
2444 .mr(4)
2445 .nr(4)
2446 .kr(1)
2447 .sr(1)
2448 .m(4)
2449 .n(n)
2450 .k(1)
2451 .iterations(1)
2452 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2453 }
2454 }
2455
2456 TEST(F32_IGEMM_RELU_4X4__WASM, k_gt_1) {
2457 for (size_t k = 2; k < 10; k++) {
2458 GemmMicrokernelTester()
2459 .mr(4)
2460 .nr(4)
2461 .kr(1)
2462 .sr(1)
2463 .m(4)
2464 .n(4)
2465 .k(k)
2466 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2467 }
2468 }
2469
2470 TEST(F32_IGEMM_RELU_4X4__WASM, k_gt_1_subtile) {
2471 for (size_t k = 2; k < 10; k++) {
2472 for (uint32_t m = 1; m <= 4; m++) {
2473 for (uint32_t n = 1; n <= 4; n++) {
2474 GemmMicrokernelTester()
2475 .mr(4)
2476 .nr(4)
2477 .kr(1)
2478 .sr(1)
2479 .m(m)
2480 .n(n)
2481 .k(k)
2482 .iterations(1)
2483 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2484 }
2485 }
2486 }
2487 }
2488
2489 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4) {
2490 for (uint32_t n = 5; n < 8; n++) {
2491 for (size_t k = 1; k <= 5; k += 2) {
2492 GemmMicrokernelTester()
2493 .mr(4)
2494 .nr(4)
2495 .kr(1)
2496 .sr(1)
2497 .m(4)
2498 .n(4)
2499 .k(k)
2500 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2501 }
2502 }
2503 }
2504
2505 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_strided_cn) {
2506 for (uint32_t n = 5; n < 8; n++) {
2507 for (size_t k = 1; k <= 5; k += 2) {
2508 GemmMicrokernelTester()
2509 .mr(4)
2510 .nr(4)
2511 .kr(1)
2512 .sr(1)
2513 .m(4)
2514 .n(4)
2515 .k(k)
2516 .cn_stride(7)
2517 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2518 }
2519 }
2520 }
2521
2522 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_subtile) {
2523 for (uint32_t n = 5; n < 8; n++) {
2524 for (size_t k = 1; k <= 5; k += 2) {
2525 for (uint32_t m = 1; m <= 4; m++) {
2526 GemmMicrokernelTester()
2527 .mr(4)
2528 .nr(4)
2529 .kr(1)
2530 .sr(1)
2531 .m(m)
2532 .n(n)
2533 .k(k)
2534 .iterations(1)
2535 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2536 }
2537 }
2538 }
2539 }
2540
2541 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4) {
2542 for (uint32_t n = 8; n <= 12; n += 4) {
2543 for (size_t k = 1; k <= 5; k += 2) {
2544 GemmMicrokernelTester()
2545 .mr(4)
2546 .nr(4)
2547 .kr(1)
2548 .sr(1)
2549 .m(4)
2550 .n(4)
2551 .k(k)
2552 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2553 }
2554 }
2555 }
2556
2557 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_strided_cn) {
2558 for (uint32_t n = 8; n <= 12; n += 4) {
2559 for (size_t k = 1; k <= 5; k += 2) {
2560 GemmMicrokernelTester()
2561 .mr(4)
2562 .nr(4)
2563 .kr(1)
2564 .sr(1)
2565 .m(4)
2566 .n(n)
2567 .k(k)
2568 .cn_stride(7)
2569 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2570 }
2571 }
2572 }
2573
2574 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_subtile) {
2575 for (uint32_t n = 8; n <= 12; n += 4) {
2576 for (size_t k = 1; k <= 5; k += 2) {
2577 for (uint32_t m = 1; m <= 4; m++) {
2578 GemmMicrokernelTester()
2579 .mr(4)
2580 .nr(4)
2581 .kr(1)
2582 .sr(1)
2583 .m(m)
2584 .n(n)
2585 .k(k)
2586 .iterations(1)
2587 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2588 }
2589 }
2590 }
2591 }
2592
2593 TEST(F32_IGEMM_RELU_4X4__WASM, small_kernel) {
2594 for (size_t k = 1; k <= 5; k += 2) {
2595 GemmMicrokernelTester()
2596 .mr(4)
2597 .nr(4)
2598 .kr(1)
2599 .sr(1)
2600 .m(4)
2601 .n(4)
2602 .k(k)
2603 .ks(3)
2604 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2605 }
2606 }
2607
2608 TEST(F32_IGEMM_RELU_4X4__WASM, small_kernel_subtile) {
2609 for (size_t k = 1; k <= 5; k += 2) {
2610 for (uint32_t m = 1; m <= 4; m++) {
2611 for (uint32_t n = 1; n <= 4; n++) {
2612 GemmMicrokernelTester()
2613 .mr(4)
2614 .nr(4)
2615 .kr(1)
2616 .sr(1)
2617 .m(m)
2618 .n(n)
2619 .k(k)
2620 .ks(3)
2621 .iterations(1)
2622 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2623 }
2624 }
2625 }
2626 }
2627
2628 TEST(F32_IGEMM_RELU_4X4__WASM, n_gt_4_small_kernel) {
2629 for (uint32_t n = 5; n < 8; n++) {
2630 for (size_t k = 1; k <= 5; k += 2) {
2631 GemmMicrokernelTester()
2632 .mr(4)
2633 .nr(4)
2634 .kr(1)
2635 .sr(1)
2636 .m(4)
2637 .n(4)
2638 .k(k)
2639 .ks(3)
2640 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2641 }
2642 }
2643 }
2644
2645 TEST(F32_IGEMM_RELU_4X4__WASM, n_div_4_small_kernel) {
2646 for (uint32_t n = 8; n <= 12; n += 4) {
2647 for (size_t k = 1; k <= 5; k += 2) {
2648 GemmMicrokernelTester()
2649 .mr(4)
2650 .nr(4)
2651 .kr(1)
2652 .sr(1)
2653 .m(4)
2654 .n(4)
2655 .k(k)
2656 .ks(3)
2657 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2658 }
2659 }
2660 }
2661
2662 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cm_subtile) {
2663 for (size_t k = 1; k <= 5; k += 2) {
2664 for (uint32_t m = 1; m <= 4; m++) {
2665 for (uint32_t n = 1; n <= 4; n++) {
2666 GemmMicrokernelTester()
2667 .mr(4)
2668 .nr(4)
2669 .kr(1)
2670 .sr(1)
2671 .m(m)
2672 .n(n)
2673 .k(k)
2674 .cm_stride(7)
2675 .iterations(1)
2676 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2677 }
2678 }
2679 }
2680 }
2681
2682 TEST(F32_IGEMM_RELU_4X4__WASM, a_offset) {
2683 for (size_t k = 1; k <= 5; k += 2) {
2684 GemmMicrokernelTester()
2685 .mr(4)
2686 .nr(4)
2687 .kr(1)
2688 .sr(1)
2689 .m(4)
2690 .n(4)
2691 .k(k)
2692 .ks(3)
2693 .a_offset(23)
2694 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2695 }
2696 }
2697
2698 TEST(F32_IGEMM_RELU_4X4__WASM, zero) {
2699 for (uint32_t mz = 0; mz < 4; mz++) {
2700 for (size_t k = 1; k <= 5; k += 2) {
2701 GemmMicrokernelTester()
2702 .mr(4)
2703 .nr(4)
2704 .kr(1)
2705 .sr(1)
2706 .m(4)
2707 .n(4)
2708 .k(k)
2709 .ks(3)
2710 .a_offset(23)
2711 .zero_index(mz)
2712 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2713 }
2714 }
2715 }
2716
2717 TEST(F32_IGEMM_RELU_4X4__WASM, strided_cm) {
2718 GemmMicrokernelTester()
2719 .mr(4)
2720 .nr(4)
2721 .kr(1)
2722 .sr(1)
2723 .m(4)
2724 .n(4)
2725 .k(1)
2726 .cm_stride(7)
2727 .Test(xnn_f32_igemm_relu_ukernel_4x4__wasm);
2728 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07002729#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07002730
2731
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07002732#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07002733 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1) {
2734 GemmMicrokernelTester()
2735 .mr(4)
2736 .nr(2)
2737 .kr(1)
2738 .sr(1)
2739 .m(4)
2740 .n(2)
2741 .k(1)
2742 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2743 }
2744
2745 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cn) {
2746 GemmMicrokernelTester()
2747 .mr(4)
2748 .nr(2)
2749 .kr(1)
2750 .sr(1)
2751 .m(4)
2752 .n(2)
2753 .k(1)
2754 .cn_stride(5)
2755 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2756 }
2757
2758 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile) {
2759 for (uint32_t m = 1; m <= 4; m++) {
2760 for (uint32_t n = 1; n <= 2; n++) {
2761 GemmMicrokernelTester()
2762 .mr(4)
2763 .nr(2)
2764 .kr(1)
2765 .sr(1)
2766 .m(m)
2767 .n(n)
2768 .k(1)
2769 .iterations(1)
2770 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2771 }
2772 }
2773 }
2774
2775 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile_m) {
2776 for (uint32_t m = 1; m <= 4; m++) {
2777 GemmMicrokernelTester()
2778 .mr(4)
2779 .nr(2)
2780 .kr(1)
2781 .sr(1)
2782 .m(m)
2783 .n(2)
2784 .k(1)
2785 .iterations(1)
2786 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2787 }
2788 }
2789
2790 TEST(F32_IGEMM_RELU_4X2__WASM, k_eq_1_subtile_n) {
2791 for (uint32_t n = 1; n <= 2; n++) {
2792 GemmMicrokernelTester()
2793 .mr(4)
2794 .nr(2)
2795 .kr(1)
2796 .sr(1)
2797 .m(4)
2798 .n(n)
2799 .k(1)
2800 .iterations(1)
2801 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2802 }
2803 }
2804
2805 TEST(F32_IGEMM_RELU_4X2__WASM, k_gt_1) {
2806 for (size_t k = 2; k < 10; k++) {
2807 GemmMicrokernelTester()
2808 .mr(4)
2809 .nr(2)
2810 .kr(1)
2811 .sr(1)
2812 .m(4)
2813 .n(2)
2814 .k(k)
2815 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2816 }
2817 }
2818
2819 TEST(F32_IGEMM_RELU_4X2__WASM, k_gt_1_subtile) {
2820 for (size_t k = 2; k < 10; k++) {
2821 for (uint32_t m = 1; m <= 4; m++) {
2822 for (uint32_t n = 1; n <= 2; n++) {
2823 GemmMicrokernelTester()
2824 .mr(4)
2825 .nr(2)
2826 .kr(1)
2827 .sr(1)
2828 .m(m)
2829 .n(n)
2830 .k(k)
2831 .iterations(1)
2832 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2833 }
2834 }
2835 }
2836 }
2837
2838 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2) {
2839 for (uint32_t n = 3; n < 4; n++) {
2840 for (size_t k = 1; k <= 5; k += 2) {
2841 GemmMicrokernelTester()
2842 .mr(4)
2843 .nr(2)
2844 .kr(1)
2845 .sr(1)
2846 .m(4)
2847 .n(2)
2848 .k(k)
2849 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2850 }
2851 }
2852 }
2853
2854 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_strided_cn) {
2855 for (uint32_t n = 3; n < 4; n++) {
2856 for (size_t k = 1; k <= 5; k += 2) {
2857 GemmMicrokernelTester()
2858 .mr(4)
2859 .nr(2)
2860 .kr(1)
2861 .sr(1)
2862 .m(4)
2863 .n(2)
2864 .k(k)
2865 .cn_stride(5)
2866 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2867 }
2868 }
2869 }
2870
2871 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_subtile) {
2872 for (uint32_t n = 3; n < 4; n++) {
2873 for (size_t k = 1; k <= 5; k += 2) {
2874 for (uint32_t m = 1; m <= 4; m++) {
2875 GemmMicrokernelTester()
2876 .mr(4)
2877 .nr(2)
2878 .kr(1)
2879 .sr(1)
2880 .m(m)
2881 .n(n)
2882 .k(k)
2883 .iterations(1)
2884 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2885 }
2886 }
2887 }
2888 }
2889
2890 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2) {
2891 for (uint32_t n = 4; n <= 6; n += 2) {
2892 for (size_t k = 1; k <= 5; k += 2) {
2893 GemmMicrokernelTester()
2894 .mr(4)
2895 .nr(2)
2896 .kr(1)
2897 .sr(1)
2898 .m(4)
2899 .n(2)
2900 .k(k)
2901 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2902 }
2903 }
2904 }
2905
2906 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_strided_cn) {
2907 for (uint32_t n = 4; n <= 6; n += 2) {
2908 for (size_t k = 1; k <= 5; k += 2) {
2909 GemmMicrokernelTester()
2910 .mr(4)
2911 .nr(2)
2912 .kr(1)
2913 .sr(1)
2914 .m(4)
2915 .n(n)
2916 .k(k)
2917 .cn_stride(5)
2918 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2919 }
2920 }
2921 }
2922
2923 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_subtile) {
2924 for (uint32_t n = 4; n <= 6; n += 2) {
2925 for (size_t k = 1; k <= 5; k += 2) {
2926 for (uint32_t m = 1; m <= 4; m++) {
2927 GemmMicrokernelTester()
2928 .mr(4)
2929 .nr(2)
2930 .kr(1)
2931 .sr(1)
2932 .m(m)
2933 .n(n)
2934 .k(k)
2935 .iterations(1)
2936 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2937 }
2938 }
2939 }
2940 }
2941
2942 TEST(F32_IGEMM_RELU_4X2__WASM, small_kernel) {
2943 for (size_t k = 1; k <= 5; k += 2) {
2944 GemmMicrokernelTester()
2945 .mr(4)
2946 .nr(2)
2947 .kr(1)
2948 .sr(1)
2949 .m(4)
2950 .n(2)
2951 .k(k)
2952 .ks(3)
2953 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2954 }
2955 }
2956
2957 TEST(F32_IGEMM_RELU_4X2__WASM, small_kernel_subtile) {
2958 for (size_t k = 1; k <= 5; k += 2) {
2959 for (uint32_t m = 1; m <= 4; m++) {
2960 for (uint32_t n = 1; n <= 2; n++) {
2961 GemmMicrokernelTester()
2962 .mr(4)
2963 .nr(2)
2964 .kr(1)
2965 .sr(1)
2966 .m(m)
2967 .n(n)
2968 .k(k)
2969 .ks(3)
2970 .iterations(1)
2971 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2972 }
2973 }
2974 }
2975 }
2976
2977 TEST(F32_IGEMM_RELU_4X2__WASM, n_gt_2_small_kernel) {
2978 for (uint32_t n = 3; n < 4; n++) {
2979 for (size_t k = 1; k <= 5; k += 2) {
2980 GemmMicrokernelTester()
2981 .mr(4)
2982 .nr(2)
2983 .kr(1)
2984 .sr(1)
2985 .m(4)
2986 .n(2)
2987 .k(k)
2988 .ks(3)
2989 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
2990 }
2991 }
2992 }
2993
2994 TEST(F32_IGEMM_RELU_4X2__WASM, n_div_2_small_kernel) {
2995 for (uint32_t n = 4; n <= 6; n += 2) {
2996 for (size_t k = 1; k <= 5; k += 2) {
2997 GemmMicrokernelTester()
2998 .mr(4)
2999 .nr(2)
3000 .kr(1)
3001 .sr(1)
3002 .m(4)
3003 .n(2)
3004 .k(k)
3005 .ks(3)
3006 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
3007 }
3008 }
3009 }
3010
3011 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cm_subtile) {
3012 for (size_t k = 1; k <= 5; k += 2) {
3013 for (uint32_t m = 1; m <= 4; m++) {
3014 for (uint32_t n = 1; n <= 2; n++) {
3015 GemmMicrokernelTester()
3016 .mr(4)
3017 .nr(2)
3018 .kr(1)
3019 .sr(1)
3020 .m(m)
3021 .n(n)
3022 .k(k)
3023 .cm_stride(5)
3024 .iterations(1)
3025 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
3026 }
3027 }
3028 }
3029 }
3030
3031 TEST(F32_IGEMM_RELU_4X2__WASM, a_offset) {
3032 for (size_t k = 1; k <= 5; k += 2) {
3033 GemmMicrokernelTester()
3034 .mr(4)
3035 .nr(2)
3036 .kr(1)
3037 .sr(1)
3038 .m(4)
3039 .n(2)
3040 .k(k)
3041 .ks(3)
3042 .a_offset(23)
3043 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
3044 }
3045 }
3046
3047 TEST(F32_IGEMM_RELU_4X2__WASM, zero) {
3048 for (uint32_t mz = 0; mz < 4; mz++) {
3049 for (size_t k = 1; k <= 5; k += 2) {
3050 GemmMicrokernelTester()
3051 .mr(4)
3052 .nr(2)
3053 .kr(1)
3054 .sr(1)
3055 .m(4)
3056 .n(2)
3057 .k(k)
3058 .ks(3)
3059 .a_offset(23)
3060 .zero_index(mz)
3061 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
3062 }
3063 }
3064 }
3065
3066 TEST(F32_IGEMM_RELU_4X2__WASM, strided_cm) {
3067 GemmMicrokernelTester()
3068 .mr(4)
3069 .nr(2)
3070 .kr(1)
3071 .sr(1)
3072 .m(4)
3073 .n(2)
3074 .k(1)
3075 .cm_stride(5)
3076 .Test(xnn_f32_igemm_relu_ukernel_4x2__wasm);
3077 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07003078#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan467f6362020-05-22 23:21:55 -07003079
3080
3081TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1) {
3082 GemmMicrokernelTester()
3083 .mr(1)
3084 .nr(4)
3085 .kr(1)
3086 .sr(1)
3087 .m(1)
3088 .n(4)
3089 .k(1)
3090 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3091}
3092
3093TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cn) {
3094 GemmMicrokernelTester()
3095 .mr(1)
3096 .nr(4)
3097 .kr(1)
3098 .sr(1)
3099 .m(1)
3100 .n(4)
3101 .k(1)
3102 .cn_stride(7)
3103 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3104}
3105
3106TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile) {
3107 for (uint32_t m = 1; m <= 1; m++) {
3108 for (uint32_t n = 1; n <= 4; n++) {
3109 GemmMicrokernelTester()
3110 .mr(1)
3111 .nr(4)
3112 .kr(1)
3113 .sr(1)
3114 .m(m)
3115 .n(n)
3116 .k(1)
3117 .iterations(1)
3118 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3119 }
3120 }
3121}
3122
3123TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile_m) {
3124 for (uint32_t m = 1; m <= 1; m++) {
3125 GemmMicrokernelTester()
3126 .mr(1)
3127 .nr(4)
3128 .kr(1)
3129 .sr(1)
3130 .m(m)
3131 .n(4)
3132 .k(1)
3133 .iterations(1)
3134 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3135 }
3136}
3137
3138TEST(F32_IGEMM_RELU_1X4__SCALAR, k_eq_1_subtile_n) {
3139 for (uint32_t n = 1; n <= 4; n++) {
3140 GemmMicrokernelTester()
3141 .mr(1)
3142 .nr(4)
3143 .kr(1)
3144 .sr(1)
3145 .m(1)
3146 .n(n)
3147 .k(1)
3148 .iterations(1)
3149 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3150 }
3151}
3152
3153TEST(F32_IGEMM_RELU_1X4__SCALAR, k_gt_1) {
3154 for (size_t k = 2; k < 10; k++) {
3155 GemmMicrokernelTester()
3156 .mr(1)
3157 .nr(4)
3158 .kr(1)
3159 .sr(1)
3160 .m(1)
3161 .n(4)
3162 .k(k)
3163 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3164 }
3165}
3166
3167TEST(F32_IGEMM_RELU_1X4__SCALAR, k_gt_1_subtile) {
3168 for (size_t k = 2; k < 10; k++) {
3169 for (uint32_t m = 1; m <= 1; m++) {
3170 for (uint32_t n = 1; n <= 4; n++) {
3171 GemmMicrokernelTester()
3172 .mr(1)
3173 .nr(4)
3174 .kr(1)
3175 .sr(1)
3176 .m(m)
3177 .n(n)
3178 .k(k)
3179 .iterations(1)
3180 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3181 }
3182 }
3183 }
3184}
3185
3186TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4) {
3187 for (uint32_t n = 5; n < 8; n++) {
3188 for (size_t k = 1; k <= 5; k += 2) {
3189 GemmMicrokernelTester()
3190 .mr(1)
3191 .nr(4)
3192 .kr(1)
3193 .sr(1)
3194 .m(1)
3195 .n(4)
3196 .k(k)
3197 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3198 }
3199 }
3200}
3201
3202TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_strided_cn) {
3203 for (uint32_t n = 5; n < 8; n++) {
3204 for (size_t k = 1; k <= 5; k += 2) {
3205 GemmMicrokernelTester()
3206 .mr(1)
3207 .nr(4)
3208 .kr(1)
3209 .sr(1)
3210 .m(1)
3211 .n(4)
3212 .k(k)
3213 .cn_stride(7)
3214 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3215 }
3216 }
3217}
3218
3219TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_subtile) {
3220 for (uint32_t n = 5; n < 8; n++) {
3221 for (size_t k = 1; k <= 5; k += 2) {
3222 for (uint32_t m = 1; m <= 1; m++) {
3223 GemmMicrokernelTester()
3224 .mr(1)
3225 .nr(4)
3226 .kr(1)
3227 .sr(1)
3228 .m(m)
3229 .n(n)
3230 .k(k)
3231 .iterations(1)
3232 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3233 }
3234 }
3235 }
3236}
3237
3238TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4) {
3239 for (uint32_t n = 8; n <= 12; n += 4) {
3240 for (size_t k = 1; k <= 5; k += 2) {
3241 GemmMicrokernelTester()
3242 .mr(1)
3243 .nr(4)
3244 .kr(1)
3245 .sr(1)
3246 .m(1)
3247 .n(4)
3248 .k(k)
3249 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3250 }
3251 }
3252}
3253
3254TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_strided_cn) {
3255 for (uint32_t n = 8; n <= 12; n += 4) {
3256 for (size_t k = 1; k <= 5; k += 2) {
3257 GemmMicrokernelTester()
3258 .mr(1)
3259 .nr(4)
3260 .kr(1)
3261 .sr(1)
3262 .m(1)
3263 .n(n)
3264 .k(k)
3265 .cn_stride(7)
3266 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3267 }
3268 }
3269}
3270
3271TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_subtile) {
3272 for (uint32_t n = 8; n <= 12; n += 4) {
3273 for (size_t k = 1; k <= 5; k += 2) {
3274 for (uint32_t m = 1; m <= 1; m++) {
3275 GemmMicrokernelTester()
3276 .mr(1)
3277 .nr(4)
3278 .kr(1)
3279 .sr(1)
3280 .m(m)
3281 .n(n)
3282 .k(k)
3283 .iterations(1)
3284 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3285 }
3286 }
3287 }
3288}
3289
3290TEST(F32_IGEMM_RELU_1X4__SCALAR, small_kernel) {
3291 for (size_t k = 1; k <= 5; k += 2) {
3292 GemmMicrokernelTester()
3293 .mr(1)
3294 .nr(4)
3295 .kr(1)
3296 .sr(1)
3297 .m(1)
3298 .n(4)
3299 .k(k)
3300 .ks(3)
3301 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3302 }
3303}
3304
3305TEST(F32_IGEMM_RELU_1X4__SCALAR, small_kernel_subtile) {
3306 for (size_t k = 1; k <= 5; k += 2) {
3307 for (uint32_t m = 1; m <= 1; m++) {
3308 for (uint32_t n = 1; n <= 4; n++) {
3309 GemmMicrokernelTester()
3310 .mr(1)
3311 .nr(4)
3312 .kr(1)
3313 .sr(1)
3314 .m(m)
3315 .n(n)
3316 .k(k)
3317 .ks(3)
3318 .iterations(1)
3319 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3320 }
3321 }
3322 }
3323}
3324
3325TEST(F32_IGEMM_RELU_1X4__SCALAR, n_gt_4_small_kernel) {
3326 for (uint32_t n = 5; n < 8; n++) {
3327 for (size_t k = 1; k <= 5; k += 2) {
3328 GemmMicrokernelTester()
3329 .mr(1)
3330 .nr(4)
3331 .kr(1)
3332 .sr(1)
3333 .m(1)
3334 .n(4)
3335 .k(k)
3336 .ks(3)
3337 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3338 }
3339 }
3340}
3341
3342TEST(F32_IGEMM_RELU_1X4__SCALAR, n_div_4_small_kernel) {
3343 for (uint32_t n = 8; n <= 12; n += 4) {
3344 for (size_t k = 1; k <= 5; k += 2) {
3345 GemmMicrokernelTester()
3346 .mr(1)
3347 .nr(4)
3348 .kr(1)
3349 .sr(1)
3350 .m(1)
3351 .n(4)
3352 .k(k)
3353 .ks(3)
3354 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3355 }
3356 }
3357}
3358
3359TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cm_subtile) {
3360 for (size_t k = 1; k <= 5; k += 2) {
3361 for (uint32_t m = 1; m <= 1; m++) {
3362 for (uint32_t n = 1; n <= 4; n++) {
3363 GemmMicrokernelTester()
3364 .mr(1)
3365 .nr(4)
3366 .kr(1)
3367 .sr(1)
3368 .m(m)
3369 .n(n)
3370 .k(k)
3371 .cm_stride(7)
3372 .iterations(1)
3373 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3374 }
3375 }
3376 }
3377}
3378
3379TEST(F32_IGEMM_RELU_1X4__SCALAR, a_offset) {
3380 for (size_t k = 1; k <= 5; k += 2) {
3381 GemmMicrokernelTester()
3382 .mr(1)
3383 .nr(4)
3384 .kr(1)
3385 .sr(1)
3386 .m(1)
3387 .n(4)
3388 .k(k)
3389 .ks(3)
3390 .a_offset(7)
3391 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3392 }
3393}
3394
3395TEST(F32_IGEMM_RELU_1X4__SCALAR, zero) {
3396 for (uint32_t mz = 0; mz < 1; mz++) {
3397 for (size_t k = 1; k <= 5; k += 2) {
3398 GemmMicrokernelTester()
3399 .mr(1)
3400 .nr(4)
3401 .kr(1)
3402 .sr(1)
3403 .m(1)
3404 .n(4)
3405 .k(k)
3406 .ks(3)
3407 .a_offset(7)
3408 .zero_index(mz)
3409 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3410 }
3411 }
3412}
3413
3414TEST(F32_IGEMM_RELU_1X4__SCALAR, strided_cm) {
3415 GemmMicrokernelTester()
3416 .mr(1)
3417 .nr(4)
3418 .kr(1)
3419 .sr(1)
3420 .m(1)
3421 .n(4)
3422 .k(1)
3423 .cm_stride(7)
3424 .Test(xnn_f32_igemm_relu_ukernel_1x4__scalar);
3425}
3426
3427
3428TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1) {
3429 GemmMicrokernelTester()
3430 .mr(2)
3431 .nr(4)
3432 .kr(1)
3433 .sr(1)
3434 .m(2)
3435 .n(4)
3436 .k(1)
3437 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3438}
3439
3440TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cn) {
3441 GemmMicrokernelTester()
3442 .mr(2)
3443 .nr(4)
3444 .kr(1)
3445 .sr(1)
3446 .m(2)
3447 .n(4)
3448 .k(1)
3449 .cn_stride(7)
3450 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3451}
3452
3453TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile) {
3454 for (uint32_t m = 1; m <= 2; m++) {
3455 for (uint32_t n = 1; n <= 4; n++) {
3456 GemmMicrokernelTester()
3457 .mr(2)
3458 .nr(4)
3459 .kr(1)
3460 .sr(1)
3461 .m(m)
3462 .n(n)
3463 .k(1)
3464 .iterations(1)
3465 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3466 }
3467 }
3468}
3469
3470TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile_m) {
3471 for (uint32_t m = 1; m <= 2; m++) {
3472 GemmMicrokernelTester()
3473 .mr(2)
3474 .nr(4)
3475 .kr(1)
3476 .sr(1)
3477 .m(m)
3478 .n(4)
3479 .k(1)
3480 .iterations(1)
3481 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3482 }
3483}
3484
3485TEST(F32_IGEMM_RELU_2X4__SCALAR, k_eq_1_subtile_n) {
3486 for (uint32_t n = 1; n <= 4; n++) {
3487 GemmMicrokernelTester()
3488 .mr(2)
3489 .nr(4)
3490 .kr(1)
3491 .sr(1)
3492 .m(2)
3493 .n(n)
3494 .k(1)
3495 .iterations(1)
3496 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3497 }
3498}
3499
3500TEST(F32_IGEMM_RELU_2X4__SCALAR, k_gt_1) {
3501 for (size_t k = 2; k < 10; k++) {
3502 GemmMicrokernelTester()
3503 .mr(2)
3504 .nr(4)
3505 .kr(1)
3506 .sr(1)
3507 .m(2)
3508 .n(4)
3509 .k(k)
3510 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3511 }
3512}
3513
3514TEST(F32_IGEMM_RELU_2X4__SCALAR, k_gt_1_subtile) {
3515 for (size_t k = 2; k < 10; k++) {
3516 for (uint32_t m = 1; m <= 2; m++) {
3517 for (uint32_t n = 1; n <= 4; n++) {
3518 GemmMicrokernelTester()
3519 .mr(2)
3520 .nr(4)
3521 .kr(1)
3522 .sr(1)
3523 .m(m)
3524 .n(n)
3525 .k(k)
3526 .iterations(1)
3527 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3528 }
3529 }
3530 }
3531}
3532
3533TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4) {
3534 for (uint32_t n = 5; n < 8; n++) {
3535 for (size_t k = 1; k <= 5; k += 2) {
3536 GemmMicrokernelTester()
3537 .mr(2)
3538 .nr(4)
3539 .kr(1)
3540 .sr(1)
3541 .m(2)
3542 .n(4)
3543 .k(k)
3544 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3545 }
3546 }
3547}
3548
3549TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_strided_cn) {
3550 for (uint32_t n = 5; n < 8; n++) {
3551 for (size_t k = 1; k <= 5; k += 2) {
3552 GemmMicrokernelTester()
3553 .mr(2)
3554 .nr(4)
3555 .kr(1)
3556 .sr(1)
3557 .m(2)
3558 .n(4)
3559 .k(k)
3560 .cn_stride(7)
3561 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3562 }
3563 }
3564}
3565
3566TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_subtile) {
3567 for (uint32_t n = 5; n < 8; n++) {
3568 for (size_t k = 1; k <= 5; k += 2) {
3569 for (uint32_t m = 1; m <= 2; m++) {
3570 GemmMicrokernelTester()
3571 .mr(2)
3572 .nr(4)
3573 .kr(1)
3574 .sr(1)
3575 .m(m)
3576 .n(n)
3577 .k(k)
3578 .iterations(1)
3579 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3580 }
3581 }
3582 }
3583}
3584
3585TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4) {
3586 for (uint32_t n = 8; n <= 12; n += 4) {
3587 for (size_t k = 1; k <= 5; k += 2) {
3588 GemmMicrokernelTester()
3589 .mr(2)
3590 .nr(4)
3591 .kr(1)
3592 .sr(1)
3593 .m(2)
3594 .n(4)
3595 .k(k)
3596 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3597 }
3598 }
3599}
3600
3601TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_strided_cn) {
3602 for (uint32_t n = 8; n <= 12; n += 4) {
3603 for (size_t k = 1; k <= 5; k += 2) {
3604 GemmMicrokernelTester()
3605 .mr(2)
3606 .nr(4)
3607 .kr(1)
3608 .sr(1)
3609 .m(2)
3610 .n(n)
3611 .k(k)
3612 .cn_stride(7)
3613 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3614 }
3615 }
3616}
3617
3618TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_subtile) {
3619 for (uint32_t n = 8; n <= 12; n += 4) {
3620 for (size_t k = 1; k <= 5; k += 2) {
3621 for (uint32_t m = 1; m <= 2; m++) {
3622 GemmMicrokernelTester()
3623 .mr(2)
3624 .nr(4)
3625 .kr(1)
3626 .sr(1)
3627 .m(m)
3628 .n(n)
3629 .k(k)
3630 .iterations(1)
3631 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3632 }
3633 }
3634 }
3635}
3636
3637TEST(F32_IGEMM_RELU_2X4__SCALAR, small_kernel) {
3638 for (size_t k = 1; k <= 5; k += 2) {
3639 GemmMicrokernelTester()
3640 .mr(2)
3641 .nr(4)
3642 .kr(1)
3643 .sr(1)
3644 .m(2)
3645 .n(4)
3646 .k(k)
3647 .ks(3)
3648 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3649 }
3650}
3651
3652TEST(F32_IGEMM_RELU_2X4__SCALAR, small_kernel_subtile) {
3653 for (size_t k = 1; k <= 5; k += 2) {
3654 for (uint32_t m = 1; m <= 2; m++) {
3655 for (uint32_t n = 1; n <= 4; n++) {
3656 GemmMicrokernelTester()
3657 .mr(2)
3658 .nr(4)
3659 .kr(1)
3660 .sr(1)
3661 .m(m)
3662 .n(n)
3663 .k(k)
3664 .ks(3)
3665 .iterations(1)
3666 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3667 }
3668 }
3669 }
3670}
3671
3672TEST(F32_IGEMM_RELU_2X4__SCALAR, n_gt_4_small_kernel) {
3673 for (uint32_t n = 5; n < 8; n++) {
3674 for (size_t k = 1; k <= 5; k += 2) {
3675 GemmMicrokernelTester()
3676 .mr(2)
3677 .nr(4)
3678 .kr(1)
3679 .sr(1)
3680 .m(2)
3681 .n(4)
3682 .k(k)
3683 .ks(3)
3684 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3685 }
3686 }
3687}
3688
3689TEST(F32_IGEMM_RELU_2X4__SCALAR, n_div_4_small_kernel) {
3690 for (uint32_t n = 8; n <= 12; n += 4) {
3691 for (size_t k = 1; k <= 5; k += 2) {
3692 GemmMicrokernelTester()
3693 .mr(2)
3694 .nr(4)
3695 .kr(1)
3696 .sr(1)
3697 .m(2)
3698 .n(4)
3699 .k(k)
3700 .ks(3)
3701 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3702 }
3703 }
3704}
3705
3706TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cm_subtile) {
3707 for (size_t k = 1; k <= 5; k += 2) {
3708 for (uint32_t m = 1; m <= 2; m++) {
3709 for (uint32_t n = 1; n <= 4; n++) {
3710 GemmMicrokernelTester()
3711 .mr(2)
3712 .nr(4)
3713 .kr(1)
3714 .sr(1)
3715 .m(m)
3716 .n(n)
3717 .k(k)
3718 .cm_stride(7)
3719 .iterations(1)
3720 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3721 }
3722 }
3723 }
3724}
3725
3726TEST(F32_IGEMM_RELU_2X4__SCALAR, a_offset) {
3727 for (size_t k = 1; k <= 5; k += 2) {
3728 GemmMicrokernelTester()
3729 .mr(2)
3730 .nr(4)
3731 .kr(1)
3732 .sr(1)
3733 .m(2)
3734 .n(4)
3735 .k(k)
3736 .ks(3)
3737 .a_offset(13)
3738 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3739 }
3740}
3741
3742TEST(F32_IGEMM_RELU_2X4__SCALAR, zero) {
3743 for (uint32_t mz = 0; mz < 2; mz++) {
3744 for (size_t k = 1; k <= 5; k += 2) {
3745 GemmMicrokernelTester()
3746 .mr(2)
3747 .nr(4)
3748 .kr(1)
3749 .sr(1)
3750 .m(2)
3751 .n(4)
3752 .k(k)
3753 .ks(3)
3754 .a_offset(13)
3755 .zero_index(mz)
3756 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3757 }
3758 }
3759}
3760
3761TEST(F32_IGEMM_RELU_2X4__SCALAR, strided_cm) {
3762 GemmMicrokernelTester()
3763 .mr(2)
3764 .nr(4)
3765 .kr(1)
3766 .sr(1)
3767 .m(2)
3768 .n(4)
3769 .k(1)
3770 .cm_stride(7)
3771 .Test(xnn_f32_igemm_relu_ukernel_2x4__scalar);
3772}
3773
3774
3775TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1) {
3776 GemmMicrokernelTester()
3777 .mr(4)
3778 .nr(4)
3779 .kr(1)
3780 .sr(1)
3781 .m(4)
3782 .n(4)
3783 .k(1)
3784 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3785}
3786
3787TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cn) {
3788 GemmMicrokernelTester()
3789 .mr(4)
3790 .nr(4)
3791 .kr(1)
3792 .sr(1)
3793 .m(4)
3794 .n(4)
3795 .k(1)
3796 .cn_stride(7)
3797 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3798}
3799
3800TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile) {
3801 for (uint32_t m = 1; m <= 4; m++) {
3802 for (uint32_t n = 1; n <= 4; n++) {
3803 GemmMicrokernelTester()
3804 .mr(4)
3805 .nr(4)
3806 .kr(1)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(1)
3811 .iterations(1)
3812 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3813 }
3814 }
3815}
3816
3817TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile_m) {
3818 for (uint32_t m = 1; m <= 4; m++) {
3819 GemmMicrokernelTester()
3820 .mr(4)
3821 .nr(4)
3822 .kr(1)
3823 .sr(1)
3824 .m(m)
3825 .n(4)
3826 .k(1)
3827 .iterations(1)
3828 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3829 }
3830}
3831
3832TEST(F32_IGEMM_RELU_4X4__SCALAR, k_eq_1_subtile_n) {
3833 for (uint32_t n = 1; n <= 4; n++) {
3834 GemmMicrokernelTester()
3835 .mr(4)
3836 .nr(4)
3837 .kr(1)
3838 .sr(1)
3839 .m(4)
3840 .n(n)
3841 .k(1)
3842 .iterations(1)
3843 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3844 }
3845}
3846
3847TEST(F32_IGEMM_RELU_4X4__SCALAR, k_gt_1) {
3848 for (size_t k = 2; k < 10; k++) {
3849 GemmMicrokernelTester()
3850 .mr(4)
3851 .nr(4)
3852 .kr(1)
3853 .sr(1)
3854 .m(4)
3855 .n(4)
3856 .k(k)
3857 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3858 }
3859}
3860
3861TEST(F32_IGEMM_RELU_4X4__SCALAR, k_gt_1_subtile) {
3862 for (size_t k = 2; k < 10; k++) {
3863 for (uint32_t m = 1; m <= 4; m++) {
3864 for (uint32_t n = 1; n <= 4; n++) {
3865 GemmMicrokernelTester()
3866 .mr(4)
3867 .nr(4)
3868 .kr(1)
3869 .sr(1)
3870 .m(m)
3871 .n(n)
3872 .k(k)
3873 .iterations(1)
3874 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3875 }
3876 }
3877 }
3878}
3879
3880TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4) {
3881 for (uint32_t n = 5; n < 8; n++) {
3882 for (size_t k = 1; k <= 5; k += 2) {
3883 GemmMicrokernelTester()
3884 .mr(4)
3885 .nr(4)
3886 .kr(1)
3887 .sr(1)
3888 .m(4)
3889 .n(4)
3890 .k(k)
3891 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3892 }
3893 }
3894}
3895
3896TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_strided_cn) {
3897 for (uint32_t n = 5; n < 8; n++) {
3898 for (size_t k = 1; k <= 5; k += 2) {
3899 GemmMicrokernelTester()
3900 .mr(4)
3901 .nr(4)
3902 .kr(1)
3903 .sr(1)
3904 .m(4)
3905 .n(4)
3906 .k(k)
3907 .cn_stride(7)
3908 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3909 }
3910 }
3911}
3912
3913TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_subtile) {
3914 for (uint32_t n = 5; n < 8; n++) {
3915 for (size_t k = 1; k <= 5; k += 2) {
3916 for (uint32_t m = 1; m <= 4; m++) {
3917 GemmMicrokernelTester()
3918 .mr(4)
3919 .nr(4)
3920 .kr(1)
3921 .sr(1)
3922 .m(m)
3923 .n(n)
3924 .k(k)
3925 .iterations(1)
3926 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3927 }
3928 }
3929 }
3930}
3931
3932TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4) {
3933 for (uint32_t n = 8; n <= 12; n += 4) {
3934 for (size_t k = 1; k <= 5; k += 2) {
3935 GemmMicrokernelTester()
3936 .mr(4)
3937 .nr(4)
3938 .kr(1)
3939 .sr(1)
3940 .m(4)
3941 .n(4)
3942 .k(k)
3943 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3944 }
3945 }
3946}
3947
3948TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_strided_cn) {
3949 for (uint32_t n = 8; n <= 12; n += 4) {
3950 for (size_t k = 1; k <= 5; k += 2) {
3951 GemmMicrokernelTester()
3952 .mr(4)
3953 .nr(4)
3954 .kr(1)
3955 .sr(1)
3956 .m(4)
3957 .n(n)
3958 .k(k)
3959 .cn_stride(7)
3960 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3961 }
3962 }
3963}
3964
3965TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_subtile) {
3966 for (uint32_t n = 8; n <= 12; n += 4) {
3967 for (size_t k = 1; k <= 5; k += 2) {
3968 for (uint32_t m = 1; m <= 4; m++) {
3969 GemmMicrokernelTester()
3970 .mr(4)
3971 .nr(4)
3972 .kr(1)
3973 .sr(1)
3974 .m(m)
3975 .n(n)
3976 .k(k)
3977 .iterations(1)
3978 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3979 }
3980 }
3981 }
3982}
3983
3984TEST(F32_IGEMM_RELU_4X4__SCALAR, small_kernel) {
3985 for (size_t k = 1; k <= 5; k += 2) {
3986 GemmMicrokernelTester()
3987 .mr(4)
3988 .nr(4)
3989 .kr(1)
3990 .sr(1)
3991 .m(4)
3992 .n(4)
3993 .k(k)
3994 .ks(3)
3995 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
3996 }
3997}
3998
3999TEST(F32_IGEMM_RELU_4X4__SCALAR, small_kernel_subtile) {
4000 for (size_t k = 1; k <= 5; k += 2) {
4001 for (uint32_t m = 1; m <= 4; m++) {
4002 for (uint32_t n = 1; n <= 4; n++) {
4003 GemmMicrokernelTester()
4004 .mr(4)
4005 .nr(4)
4006 .kr(1)
4007 .sr(1)
4008 .m(m)
4009 .n(n)
4010 .k(k)
4011 .ks(3)
4012 .iterations(1)
4013 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4014 }
4015 }
4016 }
4017}
4018
4019TEST(F32_IGEMM_RELU_4X4__SCALAR, n_gt_4_small_kernel) {
4020 for (uint32_t n = 5; n < 8; n++) {
4021 for (size_t k = 1; k <= 5; k += 2) {
4022 GemmMicrokernelTester()
4023 .mr(4)
4024 .nr(4)
4025 .kr(1)
4026 .sr(1)
4027 .m(4)
4028 .n(4)
4029 .k(k)
4030 .ks(3)
4031 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4032 }
4033 }
4034}
4035
4036TEST(F32_IGEMM_RELU_4X4__SCALAR, n_div_4_small_kernel) {
4037 for (uint32_t n = 8; n <= 12; n += 4) {
4038 for (size_t k = 1; k <= 5; k += 2) {
4039 GemmMicrokernelTester()
4040 .mr(4)
4041 .nr(4)
4042 .kr(1)
4043 .sr(1)
4044 .m(4)
4045 .n(4)
4046 .k(k)
4047 .ks(3)
4048 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4049 }
4050 }
4051}
4052
4053TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cm_subtile) {
4054 for (size_t k = 1; k <= 5; k += 2) {
4055 for (uint32_t m = 1; m <= 4; m++) {
4056 for (uint32_t n = 1; n <= 4; n++) {
4057 GemmMicrokernelTester()
4058 .mr(4)
4059 .nr(4)
4060 .kr(1)
4061 .sr(1)
4062 .m(m)
4063 .n(n)
4064 .k(k)
4065 .cm_stride(7)
4066 .iterations(1)
4067 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4068 }
4069 }
4070 }
4071}
4072
4073TEST(F32_IGEMM_RELU_4X4__SCALAR, a_offset) {
4074 for (size_t k = 1; k <= 5; k += 2) {
4075 GemmMicrokernelTester()
4076 .mr(4)
4077 .nr(4)
4078 .kr(1)
4079 .sr(1)
4080 .m(4)
4081 .n(4)
4082 .k(k)
4083 .ks(3)
4084 .a_offset(23)
4085 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4086 }
4087}
4088
4089TEST(F32_IGEMM_RELU_4X4__SCALAR, zero) {
4090 for (uint32_t mz = 0; mz < 4; mz++) {
4091 for (size_t k = 1; k <= 5; k += 2) {
4092 GemmMicrokernelTester()
4093 .mr(4)
4094 .nr(4)
4095 .kr(1)
4096 .sr(1)
4097 .m(4)
4098 .n(4)
4099 .k(k)
4100 .ks(3)
4101 .a_offset(23)
4102 .zero_index(mz)
4103 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4104 }
4105 }
4106}
4107
4108TEST(F32_IGEMM_RELU_4X4__SCALAR, strided_cm) {
4109 GemmMicrokernelTester()
4110 .mr(4)
4111 .nr(4)
4112 .kr(1)
4113 .sr(1)
4114 .m(4)
4115 .n(4)
4116 .k(1)
4117 .cm_stride(7)
4118 .Test(xnn_f32_igemm_relu_ukernel_4x4__scalar);
4119}
4120
4121
4122TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1) {
4123 GemmMicrokernelTester()
4124 .mr(4)
4125 .nr(2)
4126 .kr(1)
4127 .sr(1)
4128 .m(4)
4129 .n(2)
4130 .k(1)
4131 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4132}
4133
4134TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cn) {
4135 GemmMicrokernelTester()
4136 .mr(4)
4137 .nr(2)
4138 .kr(1)
4139 .sr(1)
4140 .m(4)
4141 .n(2)
4142 .k(1)
4143 .cn_stride(5)
4144 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4145}
4146
4147TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile) {
4148 for (uint32_t m = 1; m <= 4; m++) {
4149 for (uint32_t n = 1; n <= 2; n++) {
4150 GemmMicrokernelTester()
4151 .mr(4)
4152 .nr(2)
4153 .kr(1)
4154 .sr(1)
4155 .m(m)
4156 .n(n)
4157 .k(1)
4158 .iterations(1)
4159 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4160 }
4161 }
4162}
4163
4164TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile_m) {
4165 for (uint32_t m = 1; m <= 4; m++) {
4166 GemmMicrokernelTester()
4167 .mr(4)
4168 .nr(2)
4169 .kr(1)
4170 .sr(1)
4171 .m(m)
4172 .n(2)
4173 .k(1)
4174 .iterations(1)
4175 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4176 }
4177}
4178
4179TEST(F32_IGEMM_RELU_4X2__SCALAR, k_eq_1_subtile_n) {
4180 for (uint32_t n = 1; n <= 2; n++) {
4181 GemmMicrokernelTester()
4182 .mr(4)
4183 .nr(2)
4184 .kr(1)
4185 .sr(1)
4186 .m(4)
4187 .n(n)
4188 .k(1)
4189 .iterations(1)
4190 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4191 }
4192}
4193
4194TEST(F32_IGEMM_RELU_4X2__SCALAR, k_gt_1) {
4195 for (size_t k = 2; k < 10; k++) {
4196 GemmMicrokernelTester()
4197 .mr(4)
4198 .nr(2)
4199 .kr(1)
4200 .sr(1)
4201 .m(4)
4202 .n(2)
4203 .k(k)
4204 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4205 }
4206}
4207
4208TEST(F32_IGEMM_RELU_4X2__SCALAR, k_gt_1_subtile) {
4209 for (size_t k = 2; k < 10; k++) {
4210 for (uint32_t m = 1; m <= 4; m++) {
4211 for (uint32_t n = 1; n <= 2; n++) {
4212 GemmMicrokernelTester()
4213 .mr(4)
4214 .nr(2)
4215 .kr(1)
4216 .sr(1)
4217 .m(m)
4218 .n(n)
4219 .k(k)
4220 .iterations(1)
4221 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4222 }
4223 }
4224 }
4225}
4226
4227TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2) {
4228 for (uint32_t n = 3; n < 4; n++) {
4229 for (size_t k = 1; k <= 5; k += 2) {
4230 GemmMicrokernelTester()
4231 .mr(4)
4232 .nr(2)
4233 .kr(1)
4234 .sr(1)
4235 .m(4)
4236 .n(2)
4237 .k(k)
4238 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4239 }
4240 }
4241}
4242
4243TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_strided_cn) {
4244 for (uint32_t n = 3; n < 4; n++) {
4245 for (size_t k = 1; k <= 5; k += 2) {
4246 GemmMicrokernelTester()
4247 .mr(4)
4248 .nr(2)
4249 .kr(1)
4250 .sr(1)
4251 .m(4)
4252 .n(2)
4253 .k(k)
4254 .cn_stride(5)
4255 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4256 }
4257 }
4258}
4259
4260TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_subtile) {
4261 for (uint32_t n = 3; n < 4; n++) {
4262 for (size_t k = 1; k <= 5; k += 2) {
4263 for (uint32_t m = 1; m <= 4; m++) {
4264 GemmMicrokernelTester()
4265 .mr(4)
4266 .nr(2)
4267 .kr(1)
4268 .sr(1)
4269 .m(m)
4270 .n(n)
4271 .k(k)
4272 .iterations(1)
4273 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4274 }
4275 }
4276 }
4277}
4278
4279TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2) {
4280 for (uint32_t n = 4; n <= 6; n += 2) {
4281 for (size_t k = 1; k <= 5; k += 2) {
4282 GemmMicrokernelTester()
4283 .mr(4)
4284 .nr(2)
4285 .kr(1)
4286 .sr(1)
4287 .m(4)
4288 .n(2)
4289 .k(k)
4290 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4291 }
4292 }
4293}
4294
4295TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_strided_cn) {
4296 for (uint32_t n = 4; n <= 6; n += 2) {
4297 for (size_t k = 1; k <= 5; k += 2) {
4298 GemmMicrokernelTester()
4299 .mr(4)
4300 .nr(2)
4301 .kr(1)
4302 .sr(1)
4303 .m(4)
4304 .n(n)
4305 .k(k)
4306 .cn_stride(5)
4307 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4308 }
4309 }
4310}
4311
4312TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_subtile) {
4313 for (uint32_t n = 4; n <= 6; n += 2) {
4314 for (size_t k = 1; k <= 5; k += 2) {
4315 for (uint32_t m = 1; m <= 4; m++) {
4316 GemmMicrokernelTester()
4317 .mr(4)
4318 .nr(2)
4319 .kr(1)
4320 .sr(1)
4321 .m(m)
4322 .n(n)
4323 .k(k)
4324 .iterations(1)
4325 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4326 }
4327 }
4328 }
4329}
4330
4331TEST(F32_IGEMM_RELU_4X2__SCALAR, small_kernel) {
4332 for (size_t k = 1; k <= 5; k += 2) {
4333 GemmMicrokernelTester()
4334 .mr(4)
4335 .nr(2)
4336 .kr(1)
4337 .sr(1)
4338 .m(4)
4339 .n(2)
4340 .k(k)
4341 .ks(3)
4342 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4343 }
4344}
4345
4346TEST(F32_IGEMM_RELU_4X2__SCALAR, small_kernel_subtile) {
4347 for (size_t k = 1; k <= 5; k += 2) {
4348 for (uint32_t m = 1; m <= 4; m++) {
4349 for (uint32_t n = 1; n <= 2; n++) {
4350 GemmMicrokernelTester()
4351 .mr(4)
4352 .nr(2)
4353 .kr(1)
4354 .sr(1)
4355 .m(m)
4356 .n(n)
4357 .k(k)
4358 .ks(3)
4359 .iterations(1)
4360 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4361 }
4362 }
4363 }
4364}
4365
4366TEST(F32_IGEMM_RELU_4X2__SCALAR, n_gt_2_small_kernel) {
4367 for (uint32_t n = 3; n < 4; n++) {
4368 for (size_t k = 1; k <= 5; k += 2) {
4369 GemmMicrokernelTester()
4370 .mr(4)
4371 .nr(2)
4372 .kr(1)
4373 .sr(1)
4374 .m(4)
4375 .n(2)
4376 .k(k)
4377 .ks(3)
4378 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4379 }
4380 }
4381}
4382
4383TEST(F32_IGEMM_RELU_4X2__SCALAR, n_div_2_small_kernel) {
4384 for (uint32_t n = 4; n <= 6; n += 2) {
4385 for (size_t k = 1; k <= 5; k += 2) {
4386 GemmMicrokernelTester()
4387 .mr(4)
4388 .nr(2)
4389 .kr(1)
4390 .sr(1)
4391 .m(4)
4392 .n(2)
4393 .k(k)
4394 .ks(3)
4395 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4396 }
4397 }
4398}
4399
4400TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cm_subtile) {
4401 for (size_t k = 1; k <= 5; k += 2) {
4402 for (uint32_t m = 1; m <= 4; m++) {
4403 for (uint32_t n = 1; n <= 2; n++) {
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(2)
4407 .kr(1)
4408 .sr(1)
4409 .m(m)
4410 .n(n)
4411 .k(k)
4412 .cm_stride(5)
4413 .iterations(1)
4414 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4415 }
4416 }
4417 }
4418}
4419
4420TEST(F32_IGEMM_RELU_4X2__SCALAR, a_offset) {
4421 for (size_t k = 1; k <= 5; k += 2) {
4422 GemmMicrokernelTester()
4423 .mr(4)
4424 .nr(2)
4425 .kr(1)
4426 .sr(1)
4427 .m(4)
4428 .n(2)
4429 .k(k)
4430 .ks(3)
4431 .a_offset(23)
4432 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4433 }
4434}
4435
4436TEST(F32_IGEMM_RELU_4X2__SCALAR, zero) {
4437 for (uint32_t mz = 0; mz < 4; mz++) {
4438 for (size_t k = 1; k <= 5; k += 2) {
4439 GemmMicrokernelTester()
4440 .mr(4)
4441 .nr(2)
4442 .kr(1)
4443 .sr(1)
4444 .m(4)
4445 .n(2)
4446 .k(k)
4447 .ks(3)
4448 .a_offset(23)
4449 .zero_index(mz)
4450 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4451 }
4452 }
4453}
4454
4455TEST(F32_IGEMM_RELU_4X2__SCALAR, strided_cm) {
4456 GemmMicrokernelTester()
4457 .mr(4)
4458 .nr(2)
4459 .kr(1)
4460 .sr(1)
4461 .m(4)
4462 .n(2)
4463 .k(1)
4464 .cm_stride(5)
4465 .Test(xnn_f32_igemm_relu_ukernel_4x2__scalar);
4466}