blob: f1a3105454f2da2e19886bd5ffa7eecc3e4ff758 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08005//
6// Auto-generated file. Do not edit!
7// Specification: test/f32-prelu.yaml
8// Generator: tools/generate-prelu-test.py
9
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/prelu.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "prelu-microkernel-tester.h"
18
19
Marat Dukhan69c3f2c2019-11-06 12:30:01 -080020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharda5316982020-07-23 13:19:28 -070021 TEST(F32_PRELU__NEON_1X4, channels_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 PReLUMicrokernelTester()
24 .rows(1)
25 .channels(4)
26 .Test(xnn_f32_prelu_ukernel__neon_1x4);
27 }
28
29 TEST(F32_PRELU__NEON_1X4, channels_div_4) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t channels = 8; channels < 40; channels += 4) {
32 PReLUMicrokernelTester()
33 .rows(1)
34 .channels(channels)
35 .Test(xnn_f32_prelu_ukernel__neon_1x4);
36 }
37 }
38
39 TEST(F32_PRELU__NEON_1X4, channels_lt_4) {
40 TEST_REQUIRES_ARM_NEON;
41 for (size_t channels = 1; channels < 4; channels++) {
42 PReLUMicrokernelTester()
43 .rows(1)
44 .channels(channels)
45 .Test(xnn_f32_prelu_ukernel__neon_1x4);
46 }
47 }
48
49 TEST(F32_PRELU__NEON_1X4, channels_gt_4) {
50 TEST_REQUIRES_ARM_NEON;
51 for (size_t channels = 5; channels < 8; channels++) {
52 PReLUMicrokernelTester()
53 .rows(1)
54 .channels(channels)
55 .Test(xnn_f32_prelu_ukernel__neon_1x4);
56 }
57 }
58
59 TEST(F32_PRELU__NEON_1X4, rows_gt_1) {
60 TEST_REQUIRES_ARM_NEON;
61 for (size_t rows = 2; rows < 2; rows++) {
62 for (size_t channels = 1; channels <= 20; channels += 3) {
63 PReLUMicrokernelTester()
64 .rows(rows)
65 .channels(channels)
66 .Test(xnn_f32_prelu_ukernel__neon_1x4);
67 }
68 }
69 }
70
71 TEST(F32_PRELU__NEON_1X4, input_stride) {
72 TEST_REQUIRES_ARM_NEON;
73 for (size_t rows = 1; rows <= 3; rows += 1) {
74 for (size_t channels = 1; channels <= 20; channels += 3) {
75 PReLUMicrokernelTester()
76 .rows(rows)
77 .channels(channels)
78 .input_stride(23)
79 .iterations(1)
80 .Test(xnn_f32_prelu_ukernel__neon_1x4);
81 }
82 }
83 }
84
85 TEST(F32_PRELU__NEON_1X4, output_stride) {
86 TEST_REQUIRES_ARM_NEON;
87 for (size_t rows = 1; rows <= 3; rows += 1) {
88 for (size_t channels = 1; channels <= 20; channels += 3) {
89 PReLUMicrokernelTester()
90 .rows(rows)
91 .channels(channels)
92 .output_stride(23)
93 .iterations(1)
94 .Test(xnn_f32_prelu_ukernel__neon_1x4);
95 }
96 }
97 }
98
99 TEST(F32_PRELU__NEON_1X4, inplace) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t rows = 1; rows <= 3; rows += 1) {
102 for (size_t channels = 1; channels <= 20; channels += 3) {
103 PReLUMicrokernelTester()
104 .rows(rows)
105 .channels(channels)
106 .inplace(true)
107 .iterations(1)
108 .Test(xnn_f32_prelu_ukernel__neon_1x4);
109 }
110 }
111 }
112#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
113
114
115#if XNN_ARCH_ARM || XNN_ARCH_ARM64
116 TEST(F32_PRELU__NEON_1X8, channels_eq_8) {
117 TEST_REQUIRES_ARM_NEON;
118 PReLUMicrokernelTester()
119 .rows(1)
120 .channels(8)
121 .Test(xnn_f32_prelu_ukernel__neon_1x8);
122 }
123
124 TEST(F32_PRELU__NEON_1X8, channels_div_8) {
125 TEST_REQUIRES_ARM_NEON;
126 for (size_t channels = 16; channels < 80; channels += 8) {
127 PReLUMicrokernelTester()
128 .rows(1)
129 .channels(channels)
130 .Test(xnn_f32_prelu_ukernel__neon_1x8);
131 }
132 }
133
134 TEST(F32_PRELU__NEON_1X8, channels_lt_8) {
135 TEST_REQUIRES_ARM_NEON;
136 for (size_t channels = 1; channels < 8; channels++) {
137 PReLUMicrokernelTester()
138 .rows(1)
139 .channels(channels)
140 .Test(xnn_f32_prelu_ukernel__neon_1x8);
141 }
142 }
143
144 TEST(F32_PRELU__NEON_1X8, channels_gt_8) {
145 TEST_REQUIRES_ARM_NEON;
146 for (size_t channels = 9; channels < 16; channels++) {
147 PReLUMicrokernelTester()
148 .rows(1)
149 .channels(channels)
150 .Test(xnn_f32_prelu_ukernel__neon_1x8);
151 }
152 }
153
154 TEST(F32_PRELU__NEON_1X8, rows_gt_1) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t rows = 2; rows < 2; rows++) {
157 for (size_t channels = 1; channels <= 40; channels += 7) {
158 PReLUMicrokernelTester()
159 .rows(rows)
160 .channels(channels)
161 .Test(xnn_f32_prelu_ukernel__neon_1x8);
162 }
163 }
164 }
165
166 TEST(F32_PRELU__NEON_1X8, input_stride) {
167 TEST_REQUIRES_ARM_NEON;
168 for (size_t rows = 1; rows <= 3; rows += 1) {
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 PReLUMicrokernelTester()
171 .rows(rows)
172 .channels(channels)
173 .input_stride(43)
174 .iterations(1)
175 .Test(xnn_f32_prelu_ukernel__neon_1x8);
176 }
177 }
178 }
179
180 TEST(F32_PRELU__NEON_1X8, output_stride) {
181 TEST_REQUIRES_ARM_NEON;
182 for (size_t rows = 1; rows <= 3; rows += 1) {
183 for (size_t channels = 1; channels <= 40; channels += 7) {
184 PReLUMicrokernelTester()
185 .rows(rows)
186 .channels(channels)
187 .output_stride(43)
188 .iterations(1)
189 .Test(xnn_f32_prelu_ukernel__neon_1x8);
190 }
191 }
192 }
193
194 TEST(F32_PRELU__NEON_1X8, inplace) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t rows = 1; rows <= 3; rows += 1) {
197 for (size_t channels = 1; channels <= 40; channels += 7) {
198 PReLUMicrokernelTester()
199 .rows(rows)
200 .channels(channels)
201 .inplace(true)
202 .iterations(1)
203 .Test(xnn_f32_prelu_ukernel__neon_1x8);
204 }
205 }
206 }
207#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
208
209
210#if XNN_ARCH_ARM || XNN_ARCH_ARM64
211 TEST(F32_PRELU__NEON_1X16, channels_eq_16) {
212 TEST_REQUIRES_ARM_NEON;
213 PReLUMicrokernelTester()
214 .rows(1)
215 .channels(16)
216 .Test(xnn_f32_prelu_ukernel__neon_1x16);
217 }
218
219 TEST(F32_PRELU__NEON_1X16, channels_div_16) {
220 TEST_REQUIRES_ARM_NEON;
221 for (size_t channels = 32; channels < 160; channels += 16) {
222 PReLUMicrokernelTester()
223 .rows(1)
224 .channels(channels)
225 .Test(xnn_f32_prelu_ukernel__neon_1x16);
226 }
227 }
228
229 TEST(F32_PRELU__NEON_1X16, channels_lt_16) {
230 TEST_REQUIRES_ARM_NEON;
231 for (size_t channels = 1; channels < 16; channels++) {
232 PReLUMicrokernelTester()
233 .rows(1)
234 .channels(channels)
235 .Test(xnn_f32_prelu_ukernel__neon_1x16);
236 }
237 }
238
239 TEST(F32_PRELU__NEON_1X16, channels_gt_16) {
240 TEST_REQUIRES_ARM_NEON;
241 for (size_t channels = 17; channels < 32; channels++) {
242 PReLUMicrokernelTester()
243 .rows(1)
244 .channels(channels)
245 .Test(xnn_f32_prelu_ukernel__neon_1x16);
246 }
247 }
248
249 TEST(F32_PRELU__NEON_1X16, rows_gt_1) {
250 TEST_REQUIRES_ARM_NEON;
251 for (size_t rows = 2; rows < 2; rows++) {
252 for (size_t channels = 1; channels <= 80; channels += 15) {
253 PReLUMicrokernelTester()
254 .rows(rows)
255 .channels(channels)
256 .Test(xnn_f32_prelu_ukernel__neon_1x16);
257 }
258 }
259 }
260
261 TEST(F32_PRELU__NEON_1X16, input_stride) {
262 TEST_REQUIRES_ARM_NEON;
263 for (size_t rows = 1; rows <= 3; rows += 1) {
264 for (size_t channels = 1; channels <= 80; channels += 15) {
265 PReLUMicrokernelTester()
266 .rows(rows)
267 .channels(channels)
268 .input_stride(83)
269 .iterations(1)
270 .Test(xnn_f32_prelu_ukernel__neon_1x16);
271 }
272 }
273 }
274
275 TEST(F32_PRELU__NEON_1X16, output_stride) {
276 TEST_REQUIRES_ARM_NEON;
277 for (size_t rows = 1; rows <= 3; rows += 1) {
278 for (size_t channels = 1; channels <= 80; channels += 15) {
279 PReLUMicrokernelTester()
280 .rows(rows)
281 .channels(channels)
282 .output_stride(83)
283 .iterations(1)
284 .Test(xnn_f32_prelu_ukernel__neon_1x16);
285 }
286 }
287 }
288
289 TEST(F32_PRELU__NEON_1X16, inplace) {
290 TEST_REQUIRES_ARM_NEON;
291 for (size_t rows = 1; rows <= 3; rows += 1) {
292 for (size_t channels = 1; channels <= 80; channels += 15) {
293 PReLUMicrokernelTester()
294 .rows(rows)
295 .channels(channels)
296 .inplace(true)
297 .iterations(1)
298 .Test(xnn_f32_prelu_ukernel__neon_1x16);
299 }
300 }
301 }
302#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
303
304
305#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800306 TEST(F32_PRELU__NEON_2X4, channels_eq_4) {
307 TEST_REQUIRES_ARM_NEON;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700308 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800309 .rows(2)
310 .channels(4)
311 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700312 }
313
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800314 TEST(F32_PRELU__NEON_2X4, channels_div_4) {
315 TEST_REQUIRES_ARM_NEON;
316 for (size_t channels = 8; channels < 40; channels += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700317 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800318 .rows(2)
319 .channels(channels)
320 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700321 }
322 }
323
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800324 TEST(F32_PRELU__NEON_2X4, channels_lt_4) {
325 TEST_REQUIRES_ARM_NEON;
326 for (size_t channels = 1; channels < 4; channels++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700327 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800328 .rows(2)
329 .channels(channels)
330 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700331 }
332 }
333
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800334 TEST(F32_PRELU__NEON_2X4, channels_gt_4) {
335 TEST_REQUIRES_ARM_NEON;
336 for (size_t channels = 5; channels < 8; channels++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800338 .rows(2)
339 .channels(channels)
340 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700341 }
342 }
343
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800344 TEST(F32_PRELU__NEON_2X4, rows_lt_2) {
345 TEST_REQUIRES_ARM_NEON;
346 for (size_t rows = 1; rows < 2; rows++) {
347 for (size_t channels = 1; channels <= 20; channels += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700348 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800349 .rows(rows)
350 .channels(channels)
351 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352 }
353 }
354 }
355
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800356 TEST(F32_PRELU__NEON_2X4, rows_div_2) {
357 TEST_REQUIRES_ARM_NEON;
358 for (size_t rows = 4; rows <= 8; rows += 2) {
359 for (size_t channels = 1; channels <= 20; channels += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700360 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800361 .rows(rows)
362 .channels(channels)
363 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700364 }
365 }
366 }
367
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800368 TEST(F32_PRELU__NEON_2X4, rows_gt_2) {
369 TEST_REQUIRES_ARM_NEON;
370 for (size_t rows = 3; rows < 4; rows++) {
371 for (size_t channels = 1; channels <= 20; channels += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700372 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800373 .rows(rows)
374 .channels(channels)
375 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700376 }
377 }
378 }
379
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800380 TEST(F32_PRELU__NEON_2X4, input_stride) {
381 TEST_REQUIRES_ARM_NEON;
382 for (size_t rows = 1; rows <= 6; rows += 1) {
383 for (size_t channels = 1; channels <= 20; channels += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700384 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800385 .rows(rows)
386 .channels(channels)
387 .input_stride(23)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700388 .iterations(1)
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800389 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700390 }
391 }
392 }
393
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800394 TEST(F32_PRELU__NEON_2X4, output_stride) {
395 TEST_REQUIRES_ARM_NEON;
396 for (size_t rows = 1; rows <= 6; rows += 1) {
397 for (size_t channels = 1; channels <= 20; channels += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700398 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800399 .rows(rows)
400 .channels(channels)
401 .output_stride(23)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700402 .iterations(1)
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800403 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700404 }
405 }
406 }
407
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800408 TEST(F32_PRELU__NEON_2X4, inplace) {
409 TEST_REQUIRES_ARM_NEON;
410 for (size_t rows = 1; rows <= 6; rows += 1) {
411 for (size_t channels = 1; channels <= 20; channels += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700412 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800413 .rows(rows)
414 .channels(channels)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700415 .inplace(true)
416 .iterations(1)
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800417 .Test(xnn_f32_prelu_ukernel__neon_2x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700418 }
419 }
420 }
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800421#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
422
423
424#if XNN_ARCH_ARM || XNN_ARCH_ARM64
425 TEST(F32_PRELU__NEON_2X8, channels_eq_8) {
426 TEST_REQUIRES_ARM_NEON;
427 PReLUMicrokernelTester()
428 .rows(2)
429 .channels(8)
430 .Test(xnn_f32_prelu_ukernel__neon_2x8);
431 }
432
433 TEST(F32_PRELU__NEON_2X8, channels_div_8) {
434 TEST_REQUIRES_ARM_NEON;
435 for (size_t channels = 16; channels < 80; channels += 8) {
436 PReLUMicrokernelTester()
437 .rows(2)
438 .channels(channels)
439 .Test(xnn_f32_prelu_ukernel__neon_2x8);
440 }
441 }
442
443 TEST(F32_PRELU__NEON_2X8, channels_lt_8) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t channels = 1; channels < 8; channels++) {
446 PReLUMicrokernelTester()
447 .rows(2)
448 .channels(channels)
449 .Test(xnn_f32_prelu_ukernel__neon_2x8);
450 }
451 }
452
453 TEST(F32_PRELU__NEON_2X8, channels_gt_8) {
454 TEST_REQUIRES_ARM_NEON;
455 for (size_t channels = 9; channels < 16; channels++) {
456 PReLUMicrokernelTester()
457 .rows(2)
458 .channels(channels)
459 .Test(xnn_f32_prelu_ukernel__neon_2x8);
460 }
461 }
462
463 TEST(F32_PRELU__NEON_2X8, rows_lt_2) {
464 TEST_REQUIRES_ARM_NEON;
465 for (size_t rows = 1; rows < 2; rows++) {
466 for (size_t channels = 1; channels <= 40; channels += 7) {
467 PReLUMicrokernelTester()
468 .rows(rows)
469 .channels(channels)
470 .Test(xnn_f32_prelu_ukernel__neon_2x8);
471 }
472 }
473 }
474
475 TEST(F32_PRELU__NEON_2X8, rows_div_2) {
476 TEST_REQUIRES_ARM_NEON;
477 for (size_t rows = 4; rows <= 8; rows += 2) {
478 for (size_t channels = 1; channels <= 40; channels += 7) {
479 PReLUMicrokernelTester()
480 .rows(rows)
481 .channels(channels)
482 .Test(xnn_f32_prelu_ukernel__neon_2x8);
483 }
484 }
485 }
486
487 TEST(F32_PRELU__NEON_2X8, rows_gt_2) {
488 TEST_REQUIRES_ARM_NEON;
489 for (size_t rows = 3; rows < 4; rows++) {
490 for (size_t channels = 1; channels <= 40; channels += 7) {
491 PReLUMicrokernelTester()
492 .rows(rows)
493 .channels(channels)
494 .Test(xnn_f32_prelu_ukernel__neon_2x8);
495 }
496 }
497 }
498
499 TEST(F32_PRELU__NEON_2X8, input_stride) {
500 TEST_REQUIRES_ARM_NEON;
501 for (size_t rows = 1; rows <= 6; rows += 1) {
502 for (size_t channels = 1; channels <= 40; channels += 7) {
503 PReLUMicrokernelTester()
504 .rows(rows)
505 .channels(channels)
506 .input_stride(43)
507 .iterations(1)
508 .Test(xnn_f32_prelu_ukernel__neon_2x8);
509 }
510 }
511 }
512
513 TEST(F32_PRELU__NEON_2X8, output_stride) {
514 TEST_REQUIRES_ARM_NEON;
515 for (size_t rows = 1; rows <= 6; rows += 1) {
516 for (size_t channels = 1; channels <= 40; channels += 7) {
517 PReLUMicrokernelTester()
518 .rows(rows)
519 .channels(channels)
520 .output_stride(43)
521 .iterations(1)
522 .Test(xnn_f32_prelu_ukernel__neon_2x8);
523 }
524 }
525 }
526
527 TEST(F32_PRELU__NEON_2X8, inplace) {
528 TEST_REQUIRES_ARM_NEON;
529 for (size_t rows = 1; rows <= 6; rows += 1) {
530 for (size_t channels = 1; channels <= 40; channels += 7) {
531 PReLUMicrokernelTester()
532 .rows(rows)
533 .channels(channels)
534 .inplace(true)
535 .iterations(1)
536 .Test(xnn_f32_prelu_ukernel__neon_2x8);
537 }
538 }
539 }
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800540#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
541
542
Frank Barcharda5316982020-07-23 13:19:28 -0700543#if XNN_ARCH_ARM || XNN_ARCH_ARM64
544 TEST(F32_PRELU__NEON_2X16, channels_eq_16) {
545 TEST_REQUIRES_ARM_NEON;
546 PReLUMicrokernelTester()
547 .rows(2)
548 .channels(16)
549 .Test(xnn_f32_prelu_ukernel__neon_2x16);
550 }
551
552 TEST(F32_PRELU__NEON_2X16, channels_div_16) {
553 TEST_REQUIRES_ARM_NEON;
554 for (size_t channels = 32; channels < 160; channels += 16) {
555 PReLUMicrokernelTester()
556 .rows(2)
557 .channels(channels)
558 .Test(xnn_f32_prelu_ukernel__neon_2x16);
559 }
560 }
561
562 TEST(F32_PRELU__NEON_2X16, channels_lt_16) {
563 TEST_REQUIRES_ARM_NEON;
564 for (size_t channels = 1; channels < 16; channels++) {
565 PReLUMicrokernelTester()
566 .rows(2)
567 .channels(channels)
568 .Test(xnn_f32_prelu_ukernel__neon_2x16);
569 }
570 }
571
572 TEST(F32_PRELU__NEON_2X16, channels_gt_16) {
573 TEST_REQUIRES_ARM_NEON;
574 for (size_t channels = 17; channels < 32; channels++) {
575 PReLUMicrokernelTester()
576 .rows(2)
577 .channels(channels)
578 .Test(xnn_f32_prelu_ukernel__neon_2x16);
579 }
580 }
581
582 TEST(F32_PRELU__NEON_2X16, rows_lt_2) {
583 TEST_REQUIRES_ARM_NEON;
584 for (size_t rows = 1; rows < 2; rows++) {
585 for (size_t channels = 1; channels <= 80; channels += 15) {
586 PReLUMicrokernelTester()
587 .rows(rows)
588 .channels(channels)
589 .Test(xnn_f32_prelu_ukernel__neon_2x16);
590 }
591 }
592 }
593
594 TEST(F32_PRELU__NEON_2X16, rows_div_2) {
595 TEST_REQUIRES_ARM_NEON;
596 for (size_t rows = 4; rows <= 8; rows += 2) {
597 for (size_t channels = 1; channels <= 80; channels += 15) {
598 PReLUMicrokernelTester()
599 .rows(rows)
600 .channels(channels)
601 .Test(xnn_f32_prelu_ukernel__neon_2x16);
602 }
603 }
604 }
605
606 TEST(F32_PRELU__NEON_2X16, rows_gt_2) {
607 TEST_REQUIRES_ARM_NEON;
608 for (size_t rows = 3; rows < 4; rows++) {
609 for (size_t channels = 1; channels <= 80; channels += 15) {
610 PReLUMicrokernelTester()
611 .rows(rows)
612 .channels(channels)
613 .Test(xnn_f32_prelu_ukernel__neon_2x16);
614 }
615 }
616 }
617
618 TEST(F32_PRELU__NEON_2X16, input_stride) {
619 TEST_REQUIRES_ARM_NEON;
620 for (size_t rows = 1; rows <= 6; rows += 1) {
621 for (size_t channels = 1; channels <= 80; channels += 15) {
622 PReLUMicrokernelTester()
623 .rows(rows)
624 .channels(channels)
625 .input_stride(83)
626 .iterations(1)
627 .Test(xnn_f32_prelu_ukernel__neon_2x16);
628 }
629 }
630 }
631
632 TEST(F32_PRELU__NEON_2X16, output_stride) {
633 TEST_REQUIRES_ARM_NEON;
634 for (size_t rows = 1; rows <= 6; rows += 1) {
635 for (size_t channels = 1; channels <= 80; channels += 15) {
636 PReLUMicrokernelTester()
637 .rows(rows)
638 .channels(channels)
639 .output_stride(83)
640 .iterations(1)
641 .Test(xnn_f32_prelu_ukernel__neon_2x16);
642 }
643 }
644 }
645
646 TEST(F32_PRELU__NEON_2X16, inplace) {
647 TEST_REQUIRES_ARM_NEON;
648 for (size_t rows = 1; rows <= 6; rows += 1) {
649 for (size_t channels = 1; channels <= 80; channels += 15) {
650 PReLUMicrokernelTester()
651 .rows(rows)
652 .channels(channels)
653 .inplace(true)
654 .iterations(1)
655 .Test(xnn_f32_prelu_ukernel__neon_2x16);
656 }
657 }
658 }
659#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
660
661
662#if XNN_ARCH_ARM || XNN_ARCH_ARM64
663 TEST(F32_PRELU__NEON_4X4, channels_eq_4) {
664 TEST_REQUIRES_ARM_NEON;
665 PReLUMicrokernelTester()
666 .rows(4)
667 .channels(4)
668 .Test(xnn_f32_prelu_ukernel__neon_4x4);
669 }
670
671 TEST(F32_PRELU__NEON_4X4, channels_div_4) {
672 TEST_REQUIRES_ARM_NEON;
673 for (size_t channels = 8; channels < 40; channels += 4) {
674 PReLUMicrokernelTester()
675 .rows(4)
676 .channels(channels)
677 .Test(xnn_f32_prelu_ukernel__neon_4x4);
678 }
679 }
680
681 TEST(F32_PRELU__NEON_4X4, channels_lt_4) {
682 TEST_REQUIRES_ARM_NEON;
683 for (size_t channels = 1; channels < 4; channels++) {
684 PReLUMicrokernelTester()
685 .rows(4)
686 .channels(channels)
687 .Test(xnn_f32_prelu_ukernel__neon_4x4);
688 }
689 }
690
691 TEST(F32_PRELU__NEON_4X4, channels_gt_4) {
692 TEST_REQUIRES_ARM_NEON;
693 for (size_t channels = 5; channels < 8; channels++) {
694 PReLUMicrokernelTester()
695 .rows(4)
696 .channels(channels)
697 .Test(xnn_f32_prelu_ukernel__neon_4x4);
698 }
699 }
700
701 TEST(F32_PRELU__NEON_4X4, rows_lt_4) {
702 TEST_REQUIRES_ARM_NEON;
703 for (size_t rows = 1; rows < 4; rows++) {
704 for (size_t channels = 1; channels <= 20; channels += 3) {
705 PReLUMicrokernelTester()
706 .rows(rows)
707 .channels(channels)
708 .Test(xnn_f32_prelu_ukernel__neon_4x4);
709 }
710 }
711 }
712
713 TEST(F32_PRELU__NEON_4X4, rows_div_4) {
714 TEST_REQUIRES_ARM_NEON;
715 for (size_t rows = 8; rows <= 16; rows += 4) {
716 for (size_t channels = 1; channels <= 20; channels += 3) {
717 PReLUMicrokernelTester()
718 .rows(rows)
719 .channels(channels)
720 .Test(xnn_f32_prelu_ukernel__neon_4x4);
721 }
722 }
723 }
724
725 TEST(F32_PRELU__NEON_4X4, rows_gt_4) {
726 TEST_REQUIRES_ARM_NEON;
727 for (size_t rows = 5; rows < 8; rows++) {
728 for (size_t channels = 1; channels <= 20; channels += 3) {
729 PReLUMicrokernelTester()
730 .rows(rows)
731 .channels(channels)
732 .Test(xnn_f32_prelu_ukernel__neon_4x4);
733 }
734 }
735 }
736
737 TEST(F32_PRELU__NEON_4X4, input_stride) {
738 TEST_REQUIRES_ARM_NEON;
739 for (size_t rows = 1; rows <= 12; rows += 3) {
740 for (size_t channels = 1; channels <= 20; channels += 3) {
741 PReLUMicrokernelTester()
742 .rows(rows)
743 .channels(channels)
744 .input_stride(23)
745 .iterations(1)
746 .Test(xnn_f32_prelu_ukernel__neon_4x4);
747 }
748 }
749 }
750
751 TEST(F32_PRELU__NEON_4X4, output_stride) {
752 TEST_REQUIRES_ARM_NEON;
753 for (size_t rows = 1; rows <= 12; rows += 3) {
754 for (size_t channels = 1; channels <= 20; channels += 3) {
755 PReLUMicrokernelTester()
756 .rows(rows)
757 .channels(channels)
758 .output_stride(23)
759 .iterations(1)
760 .Test(xnn_f32_prelu_ukernel__neon_4x4);
761 }
762 }
763 }
764
765 TEST(F32_PRELU__NEON_4X4, inplace) {
766 TEST_REQUIRES_ARM_NEON;
767 for (size_t rows = 1; rows <= 12; rows += 3) {
768 for (size_t channels = 1; channels <= 20; channels += 3) {
769 PReLUMicrokernelTester()
770 .rows(rows)
771 .channels(channels)
772 .inplace(true)
773 .iterations(1)
774 .Test(xnn_f32_prelu_ukernel__neon_4x4);
775 }
776 }
777 }
778#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
779
780
781#if XNN_ARCH_ARM || XNN_ARCH_ARM64
782 TEST(F32_PRELU__NEON_4X8, channels_eq_8) {
783 TEST_REQUIRES_ARM_NEON;
784 PReLUMicrokernelTester()
785 .rows(4)
786 .channels(8)
787 .Test(xnn_f32_prelu_ukernel__neon_4x8);
788 }
789
790 TEST(F32_PRELU__NEON_4X8, channels_div_8) {
791 TEST_REQUIRES_ARM_NEON;
792 for (size_t channels = 16; channels < 80; channels += 8) {
793 PReLUMicrokernelTester()
794 .rows(4)
795 .channels(channels)
796 .Test(xnn_f32_prelu_ukernel__neon_4x8);
797 }
798 }
799
800 TEST(F32_PRELU__NEON_4X8, channels_lt_8) {
801 TEST_REQUIRES_ARM_NEON;
802 for (size_t channels = 1; channels < 8; channels++) {
803 PReLUMicrokernelTester()
804 .rows(4)
805 .channels(channels)
806 .Test(xnn_f32_prelu_ukernel__neon_4x8);
807 }
808 }
809
810 TEST(F32_PRELU__NEON_4X8, channels_gt_8) {
811 TEST_REQUIRES_ARM_NEON;
812 for (size_t channels = 9; channels < 16; channels++) {
813 PReLUMicrokernelTester()
814 .rows(4)
815 .channels(channels)
816 .Test(xnn_f32_prelu_ukernel__neon_4x8);
817 }
818 }
819
820 TEST(F32_PRELU__NEON_4X8, rows_lt_4) {
821 TEST_REQUIRES_ARM_NEON;
822 for (size_t rows = 1; rows < 4; rows++) {
823 for (size_t channels = 1; channels <= 40; channels += 7) {
824 PReLUMicrokernelTester()
825 .rows(rows)
826 .channels(channels)
827 .Test(xnn_f32_prelu_ukernel__neon_4x8);
828 }
829 }
830 }
831
832 TEST(F32_PRELU__NEON_4X8, rows_div_4) {
833 TEST_REQUIRES_ARM_NEON;
834 for (size_t rows = 8; rows <= 16; rows += 4) {
835 for (size_t channels = 1; channels <= 40; channels += 7) {
836 PReLUMicrokernelTester()
837 .rows(rows)
838 .channels(channels)
839 .Test(xnn_f32_prelu_ukernel__neon_4x8);
840 }
841 }
842 }
843
844 TEST(F32_PRELU__NEON_4X8, rows_gt_4) {
845 TEST_REQUIRES_ARM_NEON;
846 for (size_t rows = 5; rows < 8; rows++) {
847 for (size_t channels = 1; channels <= 40; channels += 7) {
848 PReLUMicrokernelTester()
849 .rows(rows)
850 .channels(channels)
851 .Test(xnn_f32_prelu_ukernel__neon_4x8);
852 }
853 }
854 }
855
856 TEST(F32_PRELU__NEON_4X8, input_stride) {
857 TEST_REQUIRES_ARM_NEON;
858 for (size_t rows = 1; rows <= 12; rows += 3) {
859 for (size_t channels = 1; channels <= 40; channels += 7) {
860 PReLUMicrokernelTester()
861 .rows(rows)
862 .channels(channels)
863 .input_stride(43)
864 .iterations(1)
865 .Test(xnn_f32_prelu_ukernel__neon_4x8);
866 }
867 }
868 }
869
870 TEST(F32_PRELU__NEON_4X8, output_stride) {
871 TEST_REQUIRES_ARM_NEON;
872 for (size_t rows = 1; rows <= 12; rows += 3) {
873 for (size_t channels = 1; channels <= 40; channels += 7) {
874 PReLUMicrokernelTester()
875 .rows(rows)
876 .channels(channels)
877 .output_stride(43)
878 .iterations(1)
879 .Test(xnn_f32_prelu_ukernel__neon_4x8);
880 }
881 }
882 }
883
884 TEST(F32_PRELU__NEON_4X8, inplace) {
885 TEST_REQUIRES_ARM_NEON;
886 for (size_t rows = 1; rows <= 12; rows += 3) {
887 for (size_t channels = 1; channels <= 40; channels += 7) {
888 PReLUMicrokernelTester()
889 .rows(rows)
890 .channels(channels)
891 .inplace(true)
892 .iterations(1)
893 .Test(xnn_f32_prelu_ukernel__neon_4x8);
894 }
895 }
896 }
897#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
898
899
900#if XNN_ARCH_ARM || XNN_ARCH_ARM64
901 TEST(F32_PRELU__NEON_4X16, channels_eq_16) {
902 TEST_REQUIRES_ARM_NEON;
903 PReLUMicrokernelTester()
904 .rows(4)
905 .channels(16)
906 .Test(xnn_f32_prelu_ukernel__neon_4x16);
907 }
908
909 TEST(F32_PRELU__NEON_4X16, channels_div_16) {
910 TEST_REQUIRES_ARM_NEON;
911 for (size_t channels = 32; channels < 160; channels += 16) {
912 PReLUMicrokernelTester()
913 .rows(4)
914 .channels(channels)
915 .Test(xnn_f32_prelu_ukernel__neon_4x16);
916 }
917 }
918
919 TEST(F32_PRELU__NEON_4X16, channels_lt_16) {
920 TEST_REQUIRES_ARM_NEON;
921 for (size_t channels = 1; channels < 16; channels++) {
922 PReLUMicrokernelTester()
923 .rows(4)
924 .channels(channels)
925 .Test(xnn_f32_prelu_ukernel__neon_4x16);
926 }
927 }
928
929 TEST(F32_PRELU__NEON_4X16, channels_gt_16) {
930 TEST_REQUIRES_ARM_NEON;
931 for (size_t channels = 17; channels < 32; channels++) {
932 PReLUMicrokernelTester()
933 .rows(4)
934 .channels(channels)
935 .Test(xnn_f32_prelu_ukernel__neon_4x16);
936 }
937 }
938
939 TEST(F32_PRELU__NEON_4X16, rows_lt_4) {
940 TEST_REQUIRES_ARM_NEON;
941 for (size_t rows = 1; rows < 4; rows++) {
942 for (size_t channels = 1; channels <= 80; channels += 15) {
943 PReLUMicrokernelTester()
944 .rows(rows)
945 .channels(channels)
946 .Test(xnn_f32_prelu_ukernel__neon_4x16);
947 }
948 }
949 }
950
951 TEST(F32_PRELU__NEON_4X16, rows_div_4) {
952 TEST_REQUIRES_ARM_NEON;
953 for (size_t rows = 8; rows <= 16; rows += 4) {
954 for (size_t channels = 1; channels <= 80; channels += 15) {
955 PReLUMicrokernelTester()
956 .rows(rows)
957 .channels(channels)
958 .Test(xnn_f32_prelu_ukernel__neon_4x16);
959 }
960 }
961 }
962
963 TEST(F32_PRELU__NEON_4X16, rows_gt_4) {
964 TEST_REQUIRES_ARM_NEON;
965 for (size_t rows = 5; rows < 8; rows++) {
966 for (size_t channels = 1; channels <= 80; channels += 15) {
967 PReLUMicrokernelTester()
968 .rows(rows)
969 .channels(channels)
970 .Test(xnn_f32_prelu_ukernel__neon_4x16);
971 }
972 }
973 }
974
975 TEST(F32_PRELU__NEON_4X16, input_stride) {
976 TEST_REQUIRES_ARM_NEON;
977 for (size_t rows = 1; rows <= 12; rows += 3) {
978 for (size_t channels = 1; channels <= 80; channels += 15) {
979 PReLUMicrokernelTester()
980 .rows(rows)
981 .channels(channels)
982 .input_stride(83)
983 .iterations(1)
984 .Test(xnn_f32_prelu_ukernel__neon_4x16);
985 }
986 }
987 }
988
989 TEST(F32_PRELU__NEON_4X16, output_stride) {
990 TEST_REQUIRES_ARM_NEON;
991 for (size_t rows = 1; rows <= 12; rows += 3) {
992 for (size_t channels = 1; channels <= 80; channels += 15) {
993 PReLUMicrokernelTester()
994 .rows(rows)
995 .channels(channels)
996 .output_stride(83)
997 .iterations(1)
998 .Test(xnn_f32_prelu_ukernel__neon_4x16);
999 }
1000 }
1001 }
1002
1003 TEST(F32_PRELU__NEON_4X16, inplace) {
1004 TEST_REQUIRES_ARM_NEON;
1005 for (size_t rows = 1; rows <= 12; rows += 3) {
1006 for (size_t channels = 1; channels <= 80; channels += 15) {
1007 PReLUMicrokernelTester()
1008 .rows(rows)
1009 .channels(channels)
1010 .inplace(true)
1011 .iterations(1)
1012 .Test(xnn_f32_prelu_ukernel__neon_4x16);
1013 }
1014 }
1015 }
1016#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1017
1018
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001019#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan39b5e942020-06-24 15:03:48 -07001020 TEST(F32_PRELU__SSE_2X4, channels_eq_4) {
1021 TEST_REQUIRES_X86_SSE;
1022 PReLUMicrokernelTester()
1023 .rows(2)
1024 .channels(4)
1025 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1026 }
1027
1028 TEST(F32_PRELU__SSE_2X4, channels_div_4) {
1029 TEST_REQUIRES_X86_SSE;
1030 for (size_t channels = 8; channels < 40; channels += 4) {
1031 PReLUMicrokernelTester()
1032 .rows(2)
1033 .channels(channels)
1034 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1035 }
1036 }
1037
1038 TEST(F32_PRELU__SSE_2X4, channels_lt_4) {
1039 TEST_REQUIRES_X86_SSE;
1040 for (size_t channels = 1; channels < 4; channels++) {
1041 PReLUMicrokernelTester()
1042 .rows(2)
1043 .channels(channels)
1044 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1045 }
1046 }
1047
1048 TEST(F32_PRELU__SSE_2X4, channels_gt_4) {
1049 TEST_REQUIRES_X86_SSE;
1050 for (size_t channels = 5; channels < 8; channels++) {
1051 PReLUMicrokernelTester()
1052 .rows(2)
1053 .channels(channels)
1054 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1055 }
1056 }
1057
1058 TEST(F32_PRELU__SSE_2X4, rows_lt_2) {
1059 TEST_REQUIRES_X86_SSE;
1060 for (size_t rows = 1; rows < 2; rows++) {
1061 for (size_t channels = 1; channels <= 20; channels += 3) {
1062 PReLUMicrokernelTester()
1063 .rows(rows)
1064 .channels(channels)
1065 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1066 }
1067 }
1068 }
1069
1070 TEST(F32_PRELU__SSE_2X4, rows_div_2) {
1071 TEST_REQUIRES_X86_SSE;
1072 for (size_t rows = 4; rows <= 8; rows += 2) {
1073 for (size_t channels = 1; channels <= 20; channels += 3) {
1074 PReLUMicrokernelTester()
1075 .rows(rows)
1076 .channels(channels)
1077 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1078 }
1079 }
1080 }
1081
1082 TEST(F32_PRELU__SSE_2X4, rows_gt_2) {
1083 TEST_REQUIRES_X86_SSE;
1084 for (size_t rows = 3; rows < 4; rows++) {
1085 for (size_t channels = 1; channels <= 20; channels += 3) {
1086 PReLUMicrokernelTester()
1087 .rows(rows)
1088 .channels(channels)
1089 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1090 }
1091 }
1092 }
1093
1094 TEST(F32_PRELU__SSE_2X4, input_stride) {
1095 TEST_REQUIRES_X86_SSE;
1096 for (size_t rows = 1; rows <= 6; rows += 1) {
1097 for (size_t channels = 1; channels <= 20; channels += 3) {
1098 PReLUMicrokernelTester()
1099 .rows(rows)
1100 .channels(channels)
1101 .input_stride(23)
1102 .iterations(1)
1103 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1104 }
1105 }
1106 }
1107
1108 TEST(F32_PRELU__SSE_2X4, output_stride) {
1109 TEST_REQUIRES_X86_SSE;
1110 for (size_t rows = 1; rows <= 6; rows += 1) {
1111 for (size_t channels = 1; channels <= 20; channels += 3) {
1112 PReLUMicrokernelTester()
1113 .rows(rows)
1114 .channels(channels)
1115 .output_stride(23)
1116 .iterations(1)
1117 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1118 }
1119 }
1120 }
1121
1122 TEST(F32_PRELU__SSE_2X4, inplace) {
1123 TEST_REQUIRES_X86_SSE;
1124 for (size_t rows = 1; rows <= 6; rows += 1) {
1125 for (size_t channels = 1; channels <= 20; channels += 3) {
1126 PReLUMicrokernelTester()
1127 .rows(rows)
1128 .channels(channels)
1129 .inplace(true)
1130 .iterations(1)
1131 .Test(xnn_f32_prelu_ukernel__sse_2x4);
1132 }
1133 }
1134 }
1135#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1136
1137
1138#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1139 TEST(F32_PRELU__SSE_2X8, channels_eq_8) {
1140 TEST_REQUIRES_X86_SSE;
1141 PReLUMicrokernelTester()
1142 .rows(2)
1143 .channels(8)
1144 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1145 }
1146
1147 TEST(F32_PRELU__SSE_2X8, channels_div_8) {
1148 TEST_REQUIRES_X86_SSE;
1149 for (size_t channels = 16; channels < 80; channels += 8) {
1150 PReLUMicrokernelTester()
1151 .rows(2)
1152 .channels(channels)
1153 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1154 }
1155 }
1156
1157 TEST(F32_PRELU__SSE_2X8, channels_lt_8) {
1158 TEST_REQUIRES_X86_SSE;
1159 for (size_t channels = 1; channels < 8; channels++) {
1160 PReLUMicrokernelTester()
1161 .rows(2)
1162 .channels(channels)
1163 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1164 }
1165 }
1166
1167 TEST(F32_PRELU__SSE_2X8, channels_gt_8) {
1168 TEST_REQUIRES_X86_SSE;
1169 for (size_t channels = 9; channels < 16; channels++) {
1170 PReLUMicrokernelTester()
1171 .rows(2)
1172 .channels(channels)
1173 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1174 }
1175 }
1176
1177 TEST(F32_PRELU__SSE_2X8, rows_lt_2) {
1178 TEST_REQUIRES_X86_SSE;
1179 for (size_t rows = 1; rows < 2; rows++) {
1180 for (size_t channels = 1; channels <= 40; channels += 7) {
1181 PReLUMicrokernelTester()
1182 .rows(rows)
1183 .channels(channels)
1184 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1185 }
1186 }
1187 }
1188
1189 TEST(F32_PRELU__SSE_2X8, rows_div_2) {
1190 TEST_REQUIRES_X86_SSE;
1191 for (size_t rows = 4; rows <= 8; rows += 2) {
1192 for (size_t channels = 1; channels <= 40; channels += 7) {
1193 PReLUMicrokernelTester()
1194 .rows(rows)
1195 .channels(channels)
1196 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1197 }
1198 }
1199 }
1200
1201 TEST(F32_PRELU__SSE_2X8, rows_gt_2) {
1202 TEST_REQUIRES_X86_SSE;
1203 for (size_t rows = 3; rows < 4; rows++) {
1204 for (size_t channels = 1; channels <= 40; channels += 7) {
1205 PReLUMicrokernelTester()
1206 .rows(rows)
1207 .channels(channels)
1208 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1209 }
1210 }
1211 }
1212
1213 TEST(F32_PRELU__SSE_2X8, input_stride) {
1214 TEST_REQUIRES_X86_SSE;
1215 for (size_t rows = 1; rows <= 6; rows += 1) {
1216 for (size_t channels = 1; channels <= 40; channels += 7) {
1217 PReLUMicrokernelTester()
1218 .rows(rows)
1219 .channels(channels)
1220 .input_stride(43)
1221 .iterations(1)
1222 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1223 }
1224 }
1225 }
1226
1227 TEST(F32_PRELU__SSE_2X8, output_stride) {
1228 TEST_REQUIRES_X86_SSE;
1229 for (size_t rows = 1; rows <= 6; rows += 1) {
1230 for (size_t channels = 1; channels <= 40; channels += 7) {
1231 PReLUMicrokernelTester()
1232 .rows(rows)
1233 .channels(channels)
1234 .output_stride(43)
1235 .iterations(1)
1236 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1237 }
1238 }
1239 }
1240
1241 TEST(F32_PRELU__SSE_2X8, inplace) {
1242 TEST_REQUIRES_X86_SSE;
1243 for (size_t rows = 1; rows <= 6; rows += 1) {
1244 for (size_t channels = 1; channels <= 40; channels += 7) {
1245 PReLUMicrokernelTester()
1246 .rows(rows)
1247 .channels(channels)
1248 .inplace(true)
1249 .iterations(1)
1250 .Test(xnn_f32_prelu_ukernel__sse_2x8);
1251 }
1252 }
1253 }
1254#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1255
1256
1257#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001258 TEST(F32_PRELU__SSE2_2X4, channels_eq_4) {
1259 TEST_REQUIRES_X86_SSE2;
1260 PReLUMicrokernelTester()
1261 .rows(2)
1262 .channels(4)
1263 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1264 }
1265
1266 TEST(F32_PRELU__SSE2_2X4, channels_div_4) {
1267 TEST_REQUIRES_X86_SSE2;
1268 for (size_t channels = 8; channels < 40; channels += 4) {
1269 PReLUMicrokernelTester()
1270 .rows(2)
1271 .channels(channels)
1272 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1273 }
1274 }
1275
1276 TEST(F32_PRELU__SSE2_2X4, channels_lt_4) {
1277 TEST_REQUIRES_X86_SSE2;
1278 for (size_t channels = 1; channels < 4; channels++) {
1279 PReLUMicrokernelTester()
1280 .rows(2)
1281 .channels(channels)
1282 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1283 }
1284 }
1285
1286 TEST(F32_PRELU__SSE2_2X4, channels_gt_4) {
1287 TEST_REQUIRES_X86_SSE2;
1288 for (size_t channels = 5; channels < 8; channels++) {
1289 PReLUMicrokernelTester()
1290 .rows(2)
1291 .channels(channels)
1292 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1293 }
1294 }
1295
1296 TEST(F32_PRELU__SSE2_2X4, rows_lt_2) {
1297 TEST_REQUIRES_X86_SSE2;
1298 for (size_t rows = 1; rows < 2; rows++) {
1299 for (size_t channels = 1; channels <= 20; channels += 3) {
1300 PReLUMicrokernelTester()
1301 .rows(rows)
1302 .channels(channels)
1303 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1304 }
1305 }
1306 }
1307
1308 TEST(F32_PRELU__SSE2_2X4, rows_div_2) {
1309 TEST_REQUIRES_X86_SSE2;
1310 for (size_t rows = 4; rows <= 8; rows += 2) {
1311 for (size_t channels = 1; channels <= 20; channels += 3) {
1312 PReLUMicrokernelTester()
1313 .rows(rows)
1314 .channels(channels)
1315 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1316 }
1317 }
1318 }
1319
1320 TEST(F32_PRELU__SSE2_2X4, rows_gt_2) {
1321 TEST_REQUIRES_X86_SSE2;
1322 for (size_t rows = 3; rows < 4; rows++) {
1323 for (size_t channels = 1; channels <= 20; channels += 3) {
1324 PReLUMicrokernelTester()
1325 .rows(rows)
1326 .channels(channels)
1327 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1328 }
1329 }
1330 }
1331
1332 TEST(F32_PRELU__SSE2_2X4, input_stride) {
1333 TEST_REQUIRES_X86_SSE2;
1334 for (size_t rows = 1; rows <= 6; rows += 1) {
1335 for (size_t channels = 1; channels <= 20; channels += 3) {
1336 PReLUMicrokernelTester()
1337 .rows(rows)
1338 .channels(channels)
1339 .input_stride(23)
1340 .iterations(1)
1341 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1342 }
1343 }
1344 }
1345
1346 TEST(F32_PRELU__SSE2_2X4, output_stride) {
1347 TEST_REQUIRES_X86_SSE2;
1348 for (size_t rows = 1; rows <= 6; rows += 1) {
1349 for (size_t channels = 1; channels <= 20; channels += 3) {
1350 PReLUMicrokernelTester()
1351 .rows(rows)
1352 .channels(channels)
1353 .output_stride(23)
1354 .iterations(1)
1355 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1356 }
1357 }
1358 }
1359
1360 TEST(F32_PRELU__SSE2_2X4, inplace) {
1361 TEST_REQUIRES_X86_SSE2;
1362 for (size_t rows = 1; rows <= 6; rows += 1) {
1363 for (size_t channels = 1; channels <= 20; channels += 3) {
1364 PReLUMicrokernelTester()
1365 .rows(rows)
1366 .channels(channels)
1367 .inplace(true)
1368 .iterations(1)
1369 .Test(xnn_f32_prelu_ukernel__sse2_2x4);
1370 }
1371 }
1372 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001373#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001374
1375
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001376#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1377 TEST(F32_PRELU__SSE2_2X8, channels_eq_8) {
1378 TEST_REQUIRES_X86_SSE2;
XNNPACK Teamb455b122019-09-27 18:10:33 -07001379 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001380 .rows(2)
1381 .channels(8)
1382 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001383 }
1384
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001385 TEST(F32_PRELU__SSE2_2X8, channels_div_8) {
1386 TEST_REQUIRES_X86_SSE2;
1387 for (size_t channels = 16; channels < 80; channels += 8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001388 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001389 .rows(2)
1390 .channels(channels)
1391 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001392 }
1393 }
1394
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001395 TEST(F32_PRELU__SSE2_2X8, channels_lt_8) {
1396 TEST_REQUIRES_X86_SSE2;
1397 for (size_t channels = 1; channels < 8; channels++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001398 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001399 .rows(2)
1400 .channels(channels)
1401 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001402 }
1403 }
1404
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001405 TEST(F32_PRELU__SSE2_2X8, channels_gt_8) {
1406 TEST_REQUIRES_X86_SSE2;
1407 for (size_t channels = 9; channels < 16; channels++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001408 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001409 .rows(2)
1410 .channels(channels)
1411 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001412 }
1413 }
1414
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001415 TEST(F32_PRELU__SSE2_2X8, rows_lt_2) {
1416 TEST_REQUIRES_X86_SSE2;
1417 for (size_t rows = 1; rows < 2; rows++) {
1418 for (size_t channels = 1; channels <= 40; channels += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001419 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001420 .rows(rows)
1421 .channels(channels)
1422 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001423 }
1424 }
1425 }
1426
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001427 TEST(F32_PRELU__SSE2_2X8, rows_div_2) {
1428 TEST_REQUIRES_X86_SSE2;
1429 for (size_t rows = 4; rows <= 8; rows += 2) {
1430 for (size_t channels = 1; channels <= 40; channels += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001431 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001432 .rows(rows)
1433 .channels(channels)
1434 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001435 }
1436 }
1437 }
1438
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001439 TEST(F32_PRELU__SSE2_2X8, rows_gt_2) {
1440 TEST_REQUIRES_X86_SSE2;
1441 for (size_t rows = 3; rows < 4; rows++) {
1442 for (size_t channels = 1; channels <= 40; channels += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001443 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001444 .rows(rows)
1445 .channels(channels)
1446 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001447 }
1448 }
1449 }
1450
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001451 TEST(F32_PRELU__SSE2_2X8, input_stride) {
1452 TEST_REQUIRES_X86_SSE2;
1453 for (size_t rows = 1; rows <= 6; rows += 1) {
1454 for (size_t channels = 1; channels <= 40; channels += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001455 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001456 .rows(rows)
1457 .channels(channels)
1458 .input_stride(43)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001459 .iterations(1)
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001460 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001461 }
1462 }
1463 }
1464
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001465 TEST(F32_PRELU__SSE2_2X8, output_stride) {
1466 TEST_REQUIRES_X86_SSE2;
1467 for (size_t rows = 1; rows <= 6; rows += 1) {
1468 for (size_t channels = 1; channels <= 40; channels += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001469 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001470 .rows(rows)
1471 .channels(channels)
1472 .output_stride(43)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001473 .iterations(1)
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001474 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001475 }
1476 }
1477 }
1478
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001479 TEST(F32_PRELU__SSE2_2X8, inplace) {
1480 TEST_REQUIRES_X86_SSE2;
1481 for (size_t rows = 1; rows <= 6; rows += 1) {
1482 for (size_t channels = 1; channels <= 40; channels += 7) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001483 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001484 .rows(rows)
1485 .channels(channels)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001486 .inplace(true)
1487 .iterations(1)
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001488 .Test(xnn_f32_prelu_ukernel__sse2_2x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001489 }
1490 }
1491 }
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001492#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001493
1494
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001495#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1496 TEST(F32_PRELU__SSE41_2X4, channels_eq_4) {
1497 TEST_REQUIRES_X86_SSE41;
1498 PReLUMicrokernelTester()
1499 .rows(2)
1500 .channels(4)
1501 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1502 }
1503
1504 TEST(F32_PRELU__SSE41_2X4, channels_div_4) {
1505 TEST_REQUIRES_X86_SSE41;
1506 for (size_t channels = 8; channels < 40; channels += 4) {
1507 PReLUMicrokernelTester()
1508 .rows(2)
1509 .channels(channels)
1510 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1511 }
1512 }
1513
1514 TEST(F32_PRELU__SSE41_2X4, channels_lt_4) {
1515 TEST_REQUIRES_X86_SSE41;
1516 for (size_t channels = 1; channels < 4; channels++) {
1517 PReLUMicrokernelTester()
1518 .rows(2)
1519 .channels(channels)
1520 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1521 }
1522 }
1523
1524 TEST(F32_PRELU__SSE41_2X4, channels_gt_4) {
1525 TEST_REQUIRES_X86_SSE41;
1526 for (size_t channels = 5; channels < 8; channels++) {
1527 PReLUMicrokernelTester()
1528 .rows(2)
1529 .channels(channels)
1530 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1531 }
1532 }
1533
1534 TEST(F32_PRELU__SSE41_2X4, rows_lt_2) {
1535 TEST_REQUIRES_X86_SSE41;
1536 for (size_t rows = 1; rows < 2; rows++) {
1537 for (size_t channels = 1; channels <= 20; channels += 3) {
1538 PReLUMicrokernelTester()
1539 .rows(rows)
1540 .channels(channels)
1541 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1542 }
1543 }
1544 }
1545
1546 TEST(F32_PRELU__SSE41_2X4, rows_div_2) {
1547 TEST_REQUIRES_X86_SSE41;
1548 for (size_t rows = 4; rows <= 8; rows += 2) {
1549 for (size_t channels = 1; channels <= 20; channels += 3) {
1550 PReLUMicrokernelTester()
1551 .rows(rows)
1552 .channels(channels)
1553 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1554 }
1555 }
1556 }
1557
1558 TEST(F32_PRELU__SSE41_2X4, rows_gt_2) {
1559 TEST_REQUIRES_X86_SSE41;
1560 for (size_t rows = 3; rows < 4; rows++) {
1561 for (size_t channels = 1; channels <= 20; channels += 3) {
1562 PReLUMicrokernelTester()
1563 .rows(rows)
1564 .channels(channels)
1565 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1566 }
1567 }
1568 }
1569
1570 TEST(F32_PRELU__SSE41_2X4, input_stride) {
1571 TEST_REQUIRES_X86_SSE41;
1572 for (size_t rows = 1; rows <= 6; rows += 1) {
1573 for (size_t channels = 1; channels <= 20; channels += 3) {
1574 PReLUMicrokernelTester()
1575 .rows(rows)
1576 .channels(channels)
1577 .input_stride(23)
1578 .iterations(1)
1579 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1580 }
1581 }
1582 }
1583
1584 TEST(F32_PRELU__SSE41_2X4, output_stride) {
1585 TEST_REQUIRES_X86_SSE41;
1586 for (size_t rows = 1; rows <= 6; rows += 1) {
1587 for (size_t channels = 1; channels <= 20; channels += 3) {
1588 PReLUMicrokernelTester()
1589 .rows(rows)
1590 .channels(channels)
1591 .output_stride(23)
1592 .iterations(1)
1593 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1594 }
1595 }
1596 }
1597
1598 TEST(F32_PRELU__SSE41_2X4, inplace) {
1599 TEST_REQUIRES_X86_SSE41;
1600 for (size_t rows = 1; rows <= 6; rows += 1) {
1601 for (size_t channels = 1; channels <= 20; channels += 3) {
1602 PReLUMicrokernelTester()
1603 .rows(rows)
1604 .channels(channels)
1605 .inplace(true)
1606 .iterations(1)
1607 .Test(xnn_f32_prelu_ukernel__sse41_2x4);
1608 }
1609 }
1610 }
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001611#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1612
1613
1614#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1615 TEST(F32_PRELU__SSE41_2X8, channels_eq_8) {
1616 TEST_REQUIRES_X86_SSE41;
1617 PReLUMicrokernelTester()
1618 .rows(2)
1619 .channels(8)
1620 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1621 }
1622
1623 TEST(F32_PRELU__SSE41_2X8, channels_div_8) {
1624 TEST_REQUIRES_X86_SSE41;
1625 for (size_t channels = 16; channels < 80; channels += 8) {
1626 PReLUMicrokernelTester()
1627 .rows(2)
1628 .channels(channels)
1629 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1630 }
1631 }
1632
1633 TEST(F32_PRELU__SSE41_2X8, channels_lt_8) {
1634 TEST_REQUIRES_X86_SSE41;
1635 for (size_t channels = 1; channels < 8; channels++) {
1636 PReLUMicrokernelTester()
1637 .rows(2)
1638 .channels(channels)
1639 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1640 }
1641 }
1642
1643 TEST(F32_PRELU__SSE41_2X8, channels_gt_8) {
1644 TEST_REQUIRES_X86_SSE41;
1645 for (size_t channels = 9; channels < 16; channels++) {
1646 PReLUMicrokernelTester()
1647 .rows(2)
1648 .channels(channels)
1649 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1650 }
1651 }
1652
1653 TEST(F32_PRELU__SSE41_2X8, rows_lt_2) {
1654 TEST_REQUIRES_X86_SSE41;
1655 for (size_t rows = 1; rows < 2; rows++) {
1656 for (size_t channels = 1; channels <= 40; channels += 7) {
1657 PReLUMicrokernelTester()
1658 .rows(rows)
1659 .channels(channels)
1660 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1661 }
1662 }
1663 }
1664
1665 TEST(F32_PRELU__SSE41_2X8, rows_div_2) {
1666 TEST_REQUIRES_X86_SSE41;
1667 for (size_t rows = 4; rows <= 8; rows += 2) {
1668 for (size_t channels = 1; channels <= 40; channels += 7) {
1669 PReLUMicrokernelTester()
1670 .rows(rows)
1671 .channels(channels)
1672 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1673 }
1674 }
1675 }
1676
1677 TEST(F32_PRELU__SSE41_2X8, rows_gt_2) {
1678 TEST_REQUIRES_X86_SSE41;
1679 for (size_t rows = 3; rows < 4; rows++) {
1680 for (size_t channels = 1; channels <= 40; channels += 7) {
1681 PReLUMicrokernelTester()
1682 .rows(rows)
1683 .channels(channels)
1684 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1685 }
1686 }
1687 }
1688
1689 TEST(F32_PRELU__SSE41_2X8, input_stride) {
1690 TEST_REQUIRES_X86_SSE41;
1691 for (size_t rows = 1; rows <= 6; rows += 1) {
1692 for (size_t channels = 1; channels <= 40; channels += 7) {
1693 PReLUMicrokernelTester()
1694 .rows(rows)
1695 .channels(channels)
1696 .input_stride(43)
1697 .iterations(1)
1698 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1699 }
1700 }
1701 }
1702
1703 TEST(F32_PRELU__SSE41_2X8, output_stride) {
1704 TEST_REQUIRES_X86_SSE41;
1705 for (size_t rows = 1; rows <= 6; rows += 1) {
1706 for (size_t channels = 1; channels <= 40; channels += 7) {
1707 PReLUMicrokernelTester()
1708 .rows(rows)
1709 .channels(channels)
1710 .output_stride(43)
1711 .iterations(1)
1712 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1713 }
1714 }
1715 }
1716
1717 TEST(F32_PRELU__SSE41_2X8, inplace) {
1718 TEST_REQUIRES_X86_SSE41;
1719 for (size_t rows = 1; rows <= 6; rows += 1) {
1720 for (size_t channels = 1; channels <= 40; channels += 7) {
1721 PReLUMicrokernelTester()
1722 .rows(rows)
1723 .channels(channels)
1724 .inplace(true)
1725 .iterations(1)
1726 .Test(xnn_f32_prelu_ukernel__sse41_2x8);
1727 }
1728 }
1729 }
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001730#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1731
1732
Marat Dukhan90eca0a2020-03-11 00:52:23 -07001733#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1734 TEST(F32_PRELU__AVX_2X8, channels_eq_8) {
1735 TEST_REQUIRES_X86_AVX;
1736 PReLUMicrokernelTester()
1737 .rows(2)
1738 .channels(8)
1739 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1740 }
1741
1742 TEST(F32_PRELU__AVX_2X8, channels_div_8) {
1743 TEST_REQUIRES_X86_AVX;
1744 for (size_t channels = 16; channels < 80; channels += 8) {
1745 PReLUMicrokernelTester()
1746 .rows(2)
1747 .channels(channels)
1748 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1749 }
1750 }
1751
1752 TEST(F32_PRELU__AVX_2X8, channels_lt_8) {
1753 TEST_REQUIRES_X86_AVX;
1754 for (size_t channels = 1; channels < 8; channels++) {
1755 PReLUMicrokernelTester()
1756 .rows(2)
1757 .channels(channels)
1758 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1759 }
1760 }
1761
1762 TEST(F32_PRELU__AVX_2X8, channels_gt_8) {
1763 TEST_REQUIRES_X86_AVX;
1764 for (size_t channels = 9; channels < 16; channels++) {
1765 PReLUMicrokernelTester()
1766 .rows(2)
1767 .channels(channels)
1768 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1769 }
1770 }
1771
1772 TEST(F32_PRELU__AVX_2X8, rows_lt_2) {
1773 TEST_REQUIRES_X86_AVX;
1774 for (size_t rows = 1; rows < 2; rows++) {
1775 for (size_t channels = 1; channels <= 40; channels += 7) {
1776 PReLUMicrokernelTester()
1777 .rows(rows)
1778 .channels(channels)
1779 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1780 }
1781 }
1782 }
1783
1784 TEST(F32_PRELU__AVX_2X8, rows_div_2) {
1785 TEST_REQUIRES_X86_AVX;
1786 for (size_t rows = 4; rows <= 8; rows += 2) {
1787 for (size_t channels = 1; channels <= 40; channels += 7) {
1788 PReLUMicrokernelTester()
1789 .rows(rows)
1790 .channels(channels)
1791 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1792 }
1793 }
1794 }
1795
1796 TEST(F32_PRELU__AVX_2X8, rows_gt_2) {
1797 TEST_REQUIRES_X86_AVX;
1798 for (size_t rows = 3; rows < 4; rows++) {
1799 for (size_t channels = 1; channels <= 40; channels += 7) {
1800 PReLUMicrokernelTester()
1801 .rows(rows)
1802 .channels(channels)
1803 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1804 }
1805 }
1806 }
1807
1808 TEST(F32_PRELU__AVX_2X8, input_stride) {
1809 TEST_REQUIRES_X86_AVX;
1810 for (size_t rows = 1; rows <= 6; rows += 1) {
1811 for (size_t channels = 1; channels <= 40; channels += 7) {
1812 PReLUMicrokernelTester()
1813 .rows(rows)
1814 .channels(channels)
1815 .input_stride(43)
1816 .iterations(1)
1817 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1818 }
1819 }
1820 }
1821
1822 TEST(F32_PRELU__AVX_2X8, output_stride) {
1823 TEST_REQUIRES_X86_AVX;
1824 for (size_t rows = 1; rows <= 6; rows += 1) {
1825 for (size_t channels = 1; channels <= 40; channels += 7) {
1826 PReLUMicrokernelTester()
1827 .rows(rows)
1828 .channels(channels)
1829 .output_stride(43)
1830 .iterations(1)
1831 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1832 }
1833 }
1834 }
1835
1836 TEST(F32_PRELU__AVX_2X8, inplace) {
1837 TEST_REQUIRES_X86_AVX;
1838 for (size_t rows = 1; rows <= 6; rows += 1) {
1839 for (size_t channels = 1; channels <= 40; channels += 7) {
1840 PReLUMicrokernelTester()
1841 .rows(rows)
1842 .channels(channels)
1843 .inplace(true)
1844 .iterations(1)
1845 .Test(xnn_f32_prelu_ukernel__avx_2x8);
1846 }
1847 }
1848 }
1849#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1850
1851
1852#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1853 TEST(F32_PRELU__AVX_2X16, channels_eq_16) {
1854 TEST_REQUIRES_X86_AVX;
1855 PReLUMicrokernelTester()
1856 .rows(2)
1857 .channels(16)
1858 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1859 }
1860
1861 TEST(F32_PRELU__AVX_2X16, channels_div_16) {
1862 TEST_REQUIRES_X86_AVX;
1863 for (size_t channels = 32; channels < 160; channels += 16) {
1864 PReLUMicrokernelTester()
1865 .rows(2)
1866 .channels(channels)
1867 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1868 }
1869 }
1870
1871 TEST(F32_PRELU__AVX_2X16, channels_lt_16) {
1872 TEST_REQUIRES_X86_AVX;
1873 for (size_t channels = 1; channels < 16; channels++) {
1874 PReLUMicrokernelTester()
1875 .rows(2)
1876 .channels(channels)
1877 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1878 }
1879 }
1880
1881 TEST(F32_PRELU__AVX_2X16, channels_gt_16) {
1882 TEST_REQUIRES_X86_AVX;
1883 for (size_t channels = 17; channels < 32; channels++) {
1884 PReLUMicrokernelTester()
1885 .rows(2)
1886 .channels(channels)
1887 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1888 }
1889 }
1890
1891 TEST(F32_PRELU__AVX_2X16, rows_lt_2) {
1892 TEST_REQUIRES_X86_AVX;
1893 for (size_t rows = 1; rows < 2; rows++) {
1894 for (size_t channels = 1; channels <= 80; channels += 15) {
1895 PReLUMicrokernelTester()
1896 .rows(rows)
1897 .channels(channels)
1898 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1899 }
1900 }
1901 }
1902
1903 TEST(F32_PRELU__AVX_2X16, rows_div_2) {
1904 TEST_REQUIRES_X86_AVX;
1905 for (size_t rows = 4; rows <= 8; rows += 2) {
1906 for (size_t channels = 1; channels <= 80; channels += 15) {
1907 PReLUMicrokernelTester()
1908 .rows(rows)
1909 .channels(channels)
1910 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1911 }
1912 }
1913 }
1914
1915 TEST(F32_PRELU__AVX_2X16, rows_gt_2) {
1916 TEST_REQUIRES_X86_AVX;
1917 for (size_t rows = 3; rows < 4; rows++) {
1918 for (size_t channels = 1; channels <= 80; channels += 15) {
1919 PReLUMicrokernelTester()
1920 .rows(rows)
1921 .channels(channels)
1922 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1923 }
1924 }
1925 }
1926
1927 TEST(F32_PRELU__AVX_2X16, input_stride) {
1928 TEST_REQUIRES_X86_AVX;
1929 for (size_t rows = 1; rows <= 6; rows += 1) {
1930 for (size_t channels = 1; channels <= 80; channels += 15) {
1931 PReLUMicrokernelTester()
1932 .rows(rows)
1933 .channels(channels)
1934 .input_stride(83)
1935 .iterations(1)
1936 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1937 }
1938 }
1939 }
1940
1941 TEST(F32_PRELU__AVX_2X16, output_stride) {
1942 TEST_REQUIRES_X86_AVX;
1943 for (size_t rows = 1; rows <= 6; rows += 1) {
1944 for (size_t channels = 1; channels <= 80; channels += 15) {
1945 PReLUMicrokernelTester()
1946 .rows(rows)
1947 .channels(channels)
1948 .output_stride(83)
1949 .iterations(1)
1950 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1951 }
1952 }
1953 }
1954
1955 TEST(F32_PRELU__AVX_2X16, inplace) {
1956 TEST_REQUIRES_X86_AVX;
1957 for (size_t rows = 1; rows <= 6; rows += 1) {
1958 for (size_t channels = 1; channels <= 80; channels += 15) {
1959 PReLUMicrokernelTester()
1960 .rows(rows)
1961 .channels(channels)
1962 .inplace(true)
1963 .iterations(1)
1964 .Test(xnn_f32_prelu_ukernel__avx_2x16);
1965 }
1966 }
1967 }
1968#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1969
1970
1971#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1972 TEST(F32_PRELU__AVX512F_2X16, channels_eq_16) {
1973 TEST_REQUIRES_X86_AVX512F;
1974 PReLUMicrokernelTester()
1975 .rows(2)
1976 .channels(16)
1977 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
1978 }
1979
1980 TEST(F32_PRELU__AVX512F_2X16, channels_div_16) {
1981 TEST_REQUIRES_X86_AVX512F;
1982 for (size_t channels = 32; channels < 160; channels += 16) {
1983 PReLUMicrokernelTester()
1984 .rows(2)
1985 .channels(channels)
1986 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
1987 }
1988 }
1989
1990 TEST(F32_PRELU__AVX512F_2X16, channels_lt_16) {
1991 TEST_REQUIRES_X86_AVX512F;
1992 for (size_t channels = 1; channels < 16; channels++) {
1993 PReLUMicrokernelTester()
1994 .rows(2)
1995 .channels(channels)
1996 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
1997 }
1998 }
1999
2000 TEST(F32_PRELU__AVX512F_2X16, channels_gt_16) {
2001 TEST_REQUIRES_X86_AVX512F;
2002 for (size_t channels = 17; channels < 32; channels++) {
2003 PReLUMicrokernelTester()
2004 .rows(2)
2005 .channels(channels)
2006 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2007 }
2008 }
2009
2010 TEST(F32_PRELU__AVX512F_2X16, rows_lt_2) {
2011 TEST_REQUIRES_X86_AVX512F;
2012 for (size_t rows = 1; rows < 2; rows++) {
2013 for (size_t channels = 1; channels <= 80; channels += 15) {
2014 PReLUMicrokernelTester()
2015 .rows(rows)
2016 .channels(channels)
2017 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2018 }
2019 }
2020 }
2021
2022 TEST(F32_PRELU__AVX512F_2X16, rows_div_2) {
2023 TEST_REQUIRES_X86_AVX512F;
2024 for (size_t rows = 4; rows <= 8; rows += 2) {
2025 for (size_t channels = 1; channels <= 80; channels += 15) {
2026 PReLUMicrokernelTester()
2027 .rows(rows)
2028 .channels(channels)
2029 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2030 }
2031 }
2032 }
2033
2034 TEST(F32_PRELU__AVX512F_2X16, rows_gt_2) {
2035 TEST_REQUIRES_X86_AVX512F;
2036 for (size_t rows = 3; rows < 4; rows++) {
2037 for (size_t channels = 1; channels <= 80; channels += 15) {
2038 PReLUMicrokernelTester()
2039 .rows(rows)
2040 .channels(channels)
2041 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2042 }
2043 }
2044 }
2045
2046 TEST(F32_PRELU__AVX512F_2X16, input_stride) {
2047 TEST_REQUIRES_X86_AVX512F;
2048 for (size_t rows = 1; rows <= 6; rows += 1) {
2049 for (size_t channels = 1; channels <= 80; channels += 15) {
2050 PReLUMicrokernelTester()
2051 .rows(rows)
2052 .channels(channels)
2053 .input_stride(83)
2054 .iterations(1)
2055 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2056 }
2057 }
2058 }
2059
2060 TEST(F32_PRELU__AVX512F_2X16, output_stride) {
2061 TEST_REQUIRES_X86_AVX512F;
2062 for (size_t rows = 1; rows <= 6; rows += 1) {
2063 for (size_t channels = 1; channels <= 80; channels += 15) {
2064 PReLUMicrokernelTester()
2065 .rows(rows)
2066 .channels(channels)
2067 .output_stride(83)
2068 .iterations(1)
2069 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2070 }
2071 }
2072 }
2073
2074 TEST(F32_PRELU__AVX512F_2X16, inplace) {
2075 TEST_REQUIRES_X86_AVX512F;
2076 for (size_t rows = 1; rows <= 6; rows += 1) {
2077 for (size_t channels = 1; channels <= 80; channels += 15) {
2078 PReLUMicrokernelTester()
2079 .rows(rows)
2080 .channels(channels)
2081 .inplace(true)
2082 .iterations(1)
2083 .Test(xnn_f32_prelu_ukernel__avx512f_2x16);
2084 }
2085 }
2086 }
2087#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2088
2089
2090#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2091 TEST(F32_PRELU__AVX512F_2X32, channels_eq_32) {
2092 TEST_REQUIRES_X86_AVX512F;
2093 PReLUMicrokernelTester()
2094 .rows(2)
2095 .channels(32)
2096 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2097 }
2098
2099 TEST(F32_PRELU__AVX512F_2X32, channels_div_32) {
2100 TEST_REQUIRES_X86_AVX512F;
2101 for (size_t channels = 64; channels < 320; channels += 32) {
2102 PReLUMicrokernelTester()
2103 .rows(2)
2104 .channels(channels)
2105 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2106 }
2107 }
2108
2109 TEST(F32_PRELU__AVX512F_2X32, channels_lt_32) {
2110 TEST_REQUIRES_X86_AVX512F;
2111 for (size_t channels = 1; channels < 32; channels++) {
2112 PReLUMicrokernelTester()
2113 .rows(2)
2114 .channels(channels)
2115 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2116 }
2117 }
2118
2119 TEST(F32_PRELU__AVX512F_2X32, channels_gt_32) {
2120 TEST_REQUIRES_X86_AVX512F;
2121 for (size_t channels = 33; channels < 64; channels++) {
2122 PReLUMicrokernelTester()
2123 .rows(2)
2124 .channels(channels)
2125 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2126 }
2127 }
2128
2129 TEST(F32_PRELU__AVX512F_2X32, rows_lt_2) {
2130 TEST_REQUIRES_X86_AVX512F;
2131 for (size_t rows = 1; rows < 2; rows++) {
2132 for (size_t channels = 1; channels <= 160; channels += 31) {
2133 PReLUMicrokernelTester()
2134 .rows(rows)
2135 .channels(channels)
2136 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2137 }
2138 }
2139 }
2140
2141 TEST(F32_PRELU__AVX512F_2X32, rows_div_2) {
2142 TEST_REQUIRES_X86_AVX512F;
2143 for (size_t rows = 4; rows <= 8; rows += 2) {
2144 for (size_t channels = 1; channels <= 160; channels += 31) {
2145 PReLUMicrokernelTester()
2146 .rows(rows)
2147 .channels(channels)
2148 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2149 }
2150 }
2151 }
2152
2153 TEST(F32_PRELU__AVX512F_2X32, rows_gt_2) {
2154 TEST_REQUIRES_X86_AVX512F;
2155 for (size_t rows = 3; rows < 4; rows++) {
2156 for (size_t channels = 1; channels <= 160; channels += 31) {
2157 PReLUMicrokernelTester()
2158 .rows(rows)
2159 .channels(channels)
2160 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2161 }
2162 }
2163 }
2164
2165 TEST(F32_PRELU__AVX512F_2X32, input_stride) {
2166 TEST_REQUIRES_X86_AVX512F;
2167 for (size_t rows = 1; rows <= 6; rows += 1) {
2168 for (size_t channels = 1; channels <= 160; channels += 31) {
2169 PReLUMicrokernelTester()
2170 .rows(rows)
2171 .channels(channels)
2172 .input_stride(163)
2173 .iterations(1)
2174 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2175 }
2176 }
2177 }
2178
2179 TEST(F32_PRELU__AVX512F_2X32, output_stride) {
2180 TEST_REQUIRES_X86_AVX512F;
2181 for (size_t rows = 1; rows <= 6; rows += 1) {
2182 for (size_t channels = 1; channels <= 160; channels += 31) {
2183 PReLUMicrokernelTester()
2184 .rows(rows)
2185 .channels(channels)
2186 .output_stride(163)
2187 .iterations(1)
2188 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2189 }
2190 }
2191 }
2192
2193 TEST(F32_PRELU__AVX512F_2X32, inplace) {
2194 TEST_REQUIRES_X86_AVX512F;
2195 for (size_t rows = 1; rows <= 6; rows += 1) {
2196 for (size_t channels = 1; channels <= 160; channels += 31) {
2197 PReLUMicrokernelTester()
2198 .rows(rows)
2199 .channels(channels)
2200 .inplace(true)
2201 .iterations(1)
2202 .Test(xnn_f32_prelu_ukernel__avx512f_2x32);
2203 }
2204 }
2205 }
2206#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2207
2208
Marat Dukhan195f8eb2020-06-25 12:50:57 -07002209#if XNN_ARCH_WASMSIMD
Frank Barcharda5316982020-07-23 13:19:28 -07002210 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_eq_4) {
2211 PReLUMicrokernelTester()
2212 .rows(1)
2213 .channels(4)
2214 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2215 }
2216
2217 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_div_4) {
2218 for (size_t channels = 8; channels < 40; channels += 4) {
2219 PReLUMicrokernelTester()
2220 .rows(1)
2221 .channels(channels)
2222 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2223 }
2224 }
2225
2226 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_lt_4) {
2227 for (size_t channels = 1; channels < 4; channels++) {
2228 PReLUMicrokernelTester()
2229 .rows(1)
2230 .channels(channels)
2231 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2232 }
2233 }
2234
2235 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, channels_gt_4) {
2236 for (size_t channels = 5; channels < 8; channels++) {
2237 PReLUMicrokernelTester()
2238 .rows(1)
2239 .channels(channels)
2240 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2241 }
2242 }
2243
2244 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, rows_gt_1) {
2245 for (size_t rows = 2; rows < 2; rows++) {
2246 for (size_t channels = 1; channels <= 20; channels += 3) {
2247 PReLUMicrokernelTester()
2248 .rows(rows)
2249 .channels(channels)
2250 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2251 }
2252 }
2253 }
2254
2255 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, input_stride) {
2256 for (size_t rows = 1; rows <= 3; rows += 1) {
2257 for (size_t channels = 1; channels <= 20; channels += 3) {
2258 PReLUMicrokernelTester()
2259 .rows(rows)
2260 .channels(channels)
2261 .input_stride(23)
2262 .iterations(1)
2263 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2264 }
2265 }
2266 }
2267
2268 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, output_stride) {
2269 for (size_t rows = 1; rows <= 3; rows += 1) {
2270 for (size_t channels = 1; channels <= 20; channels += 3) {
2271 PReLUMicrokernelTester()
2272 .rows(rows)
2273 .channels(channels)
2274 .output_stride(23)
2275 .iterations(1)
2276 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2277 }
2278 }
2279 }
2280
2281 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X4, inplace) {
2282 for (size_t rows = 1; rows <= 3; rows += 1) {
2283 for (size_t channels = 1; channels <= 20; channels += 3) {
2284 PReLUMicrokernelTester()
2285 .rows(rows)
2286 .channels(channels)
2287 .inplace(true)
2288 .iterations(1)
2289 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4);
2290 }
2291 }
2292 }
2293#endif // XNN_ARCH_WASMSIMD
2294
2295
2296#if XNN_ARCH_WASMSIMD
2297 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_eq_8) {
2298 PReLUMicrokernelTester()
2299 .rows(1)
2300 .channels(8)
2301 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2302 }
2303
2304 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_div_8) {
2305 for (size_t channels = 16; channels < 80; channels += 8) {
2306 PReLUMicrokernelTester()
2307 .rows(1)
2308 .channels(channels)
2309 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2310 }
2311 }
2312
2313 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_lt_8) {
2314 for (size_t channels = 1; channels < 8; channels++) {
2315 PReLUMicrokernelTester()
2316 .rows(1)
2317 .channels(channels)
2318 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2319 }
2320 }
2321
2322 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, channels_gt_8) {
2323 for (size_t channels = 9; channels < 16; channels++) {
2324 PReLUMicrokernelTester()
2325 .rows(1)
2326 .channels(channels)
2327 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2328 }
2329 }
2330
2331 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, rows_gt_1) {
2332 for (size_t rows = 2; rows < 2; rows++) {
2333 for (size_t channels = 1; channels <= 40; channels += 7) {
2334 PReLUMicrokernelTester()
2335 .rows(rows)
2336 .channels(channels)
2337 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2338 }
2339 }
2340 }
2341
2342 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, input_stride) {
2343 for (size_t rows = 1; rows <= 3; rows += 1) {
2344 for (size_t channels = 1; channels <= 40; channels += 7) {
2345 PReLUMicrokernelTester()
2346 .rows(rows)
2347 .channels(channels)
2348 .input_stride(43)
2349 .iterations(1)
2350 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2351 }
2352 }
2353 }
2354
2355 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, output_stride) {
2356 for (size_t rows = 1; rows <= 3; rows += 1) {
2357 for (size_t channels = 1; channels <= 40; channels += 7) {
2358 PReLUMicrokernelTester()
2359 .rows(rows)
2360 .channels(channels)
2361 .output_stride(43)
2362 .iterations(1)
2363 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2364 }
2365 }
2366 }
2367
2368 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X8, inplace) {
2369 for (size_t rows = 1; rows <= 3; rows += 1) {
2370 for (size_t channels = 1; channels <= 40; channels += 7) {
2371 PReLUMicrokernelTester()
2372 .rows(rows)
2373 .channels(channels)
2374 .inplace(true)
2375 .iterations(1)
2376 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8);
2377 }
2378 }
2379 }
2380#endif // XNN_ARCH_WASMSIMD
2381
2382
2383#if XNN_ARCH_WASMSIMD
2384 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_eq_16) {
2385 PReLUMicrokernelTester()
2386 .rows(1)
2387 .channels(16)
2388 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2389 }
2390
2391 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_div_16) {
2392 for (size_t channels = 32; channels < 160; channels += 16) {
2393 PReLUMicrokernelTester()
2394 .rows(1)
2395 .channels(channels)
2396 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2397 }
2398 }
2399
2400 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_lt_16) {
2401 for (size_t channels = 1; channels < 16; channels++) {
2402 PReLUMicrokernelTester()
2403 .rows(1)
2404 .channels(channels)
2405 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2406 }
2407 }
2408
2409 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, channels_gt_16) {
2410 for (size_t channels = 17; channels < 32; channels++) {
2411 PReLUMicrokernelTester()
2412 .rows(1)
2413 .channels(channels)
2414 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2415 }
2416 }
2417
2418 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, rows_gt_1) {
2419 for (size_t rows = 2; rows < 2; rows++) {
2420 for (size_t channels = 1; channels <= 80; channels += 15) {
2421 PReLUMicrokernelTester()
2422 .rows(rows)
2423 .channels(channels)
2424 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2425 }
2426 }
2427 }
2428
2429 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, input_stride) {
2430 for (size_t rows = 1; rows <= 3; rows += 1) {
2431 for (size_t channels = 1; channels <= 80; channels += 15) {
2432 PReLUMicrokernelTester()
2433 .rows(rows)
2434 .channels(channels)
2435 .input_stride(83)
2436 .iterations(1)
2437 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2438 }
2439 }
2440 }
2441
2442 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, output_stride) {
2443 for (size_t rows = 1; rows <= 3; rows += 1) {
2444 for (size_t channels = 1; channels <= 80; channels += 15) {
2445 PReLUMicrokernelTester()
2446 .rows(rows)
2447 .channels(channels)
2448 .output_stride(83)
2449 .iterations(1)
2450 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2451 }
2452 }
2453 }
2454
2455 TEST(F32_PRELU__WASMSIMD_BITSELECT_1X16, inplace) {
2456 for (size_t rows = 1; rows <= 3; rows += 1) {
2457 for (size_t channels = 1; channels <= 80; channels += 15) {
2458 PReLUMicrokernelTester()
2459 .rows(rows)
2460 .channels(channels)
2461 .inplace(true)
2462 .iterations(1)
2463 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16);
2464 }
2465 }
2466 }
2467#endif // XNN_ARCH_WASMSIMD
2468
2469
2470#if XNN_ARCH_WASMSIMD
Marat Dukhan195f8eb2020-06-25 12:50:57 -07002471 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_eq_4) {
2472 PReLUMicrokernelTester()
2473 .rows(2)
2474 .channels(4)
2475 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2476 }
2477
2478 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_div_4) {
2479 for (size_t channels = 8; channels < 40; channels += 4) {
2480 PReLUMicrokernelTester()
2481 .rows(2)
2482 .channels(channels)
2483 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2484 }
2485 }
2486
2487 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_lt_4) {
2488 for (size_t channels = 1; channels < 4; channels++) {
2489 PReLUMicrokernelTester()
2490 .rows(2)
2491 .channels(channels)
2492 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2493 }
2494 }
2495
2496 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, channels_gt_4) {
2497 for (size_t channels = 5; channels < 8; channels++) {
2498 PReLUMicrokernelTester()
2499 .rows(2)
2500 .channels(channels)
2501 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2502 }
2503 }
2504
2505 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, rows_lt_2) {
2506 for (size_t rows = 1; rows < 2; rows++) {
2507 for (size_t channels = 1; channels <= 20; channels += 3) {
2508 PReLUMicrokernelTester()
2509 .rows(rows)
2510 .channels(channels)
2511 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2512 }
2513 }
2514 }
2515
2516 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, rows_div_2) {
2517 for (size_t rows = 4; rows <= 8; rows += 2) {
2518 for (size_t channels = 1; channels <= 20; channels += 3) {
2519 PReLUMicrokernelTester()
2520 .rows(rows)
2521 .channels(channels)
2522 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2523 }
2524 }
2525 }
2526
2527 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, rows_gt_2) {
2528 for (size_t rows = 3; rows < 4; rows++) {
2529 for (size_t channels = 1; channels <= 20; channels += 3) {
2530 PReLUMicrokernelTester()
2531 .rows(rows)
2532 .channels(channels)
2533 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2534 }
2535 }
2536 }
2537
2538 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, input_stride) {
2539 for (size_t rows = 1; rows <= 6; rows += 1) {
2540 for (size_t channels = 1; channels <= 20; channels += 3) {
2541 PReLUMicrokernelTester()
2542 .rows(rows)
2543 .channels(channels)
2544 .input_stride(23)
2545 .iterations(1)
2546 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2547 }
2548 }
2549 }
2550
2551 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, output_stride) {
2552 for (size_t rows = 1; rows <= 6; rows += 1) {
2553 for (size_t channels = 1; channels <= 20; channels += 3) {
2554 PReLUMicrokernelTester()
2555 .rows(rows)
2556 .channels(channels)
2557 .output_stride(23)
2558 .iterations(1)
2559 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2560 }
2561 }
2562 }
2563
2564 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X4, inplace) {
2565 for (size_t rows = 1; rows <= 6; rows += 1) {
2566 for (size_t channels = 1; channels <= 20; channels += 3) {
2567 PReLUMicrokernelTester()
2568 .rows(rows)
2569 .channels(channels)
2570 .inplace(true)
2571 .iterations(1)
2572 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4);
2573 }
2574 }
2575 }
2576#endif // XNN_ARCH_WASMSIMD
2577
2578
2579#if XNN_ARCH_WASMSIMD
2580 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_eq_8) {
2581 PReLUMicrokernelTester()
2582 .rows(2)
2583 .channels(8)
2584 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2585 }
2586
2587 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_div_8) {
2588 for (size_t channels = 16; channels < 80; channels += 8) {
2589 PReLUMicrokernelTester()
2590 .rows(2)
2591 .channels(channels)
2592 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2593 }
2594 }
2595
2596 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_lt_8) {
2597 for (size_t channels = 1; channels < 8; channels++) {
2598 PReLUMicrokernelTester()
2599 .rows(2)
2600 .channels(channels)
2601 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2602 }
2603 }
2604
2605 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, channels_gt_8) {
2606 for (size_t channels = 9; channels < 16; channels++) {
2607 PReLUMicrokernelTester()
2608 .rows(2)
2609 .channels(channels)
2610 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2611 }
2612 }
2613
2614 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, rows_lt_2) {
2615 for (size_t rows = 1; rows < 2; rows++) {
2616 for (size_t channels = 1; channels <= 40; channels += 7) {
2617 PReLUMicrokernelTester()
2618 .rows(rows)
2619 .channels(channels)
2620 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2621 }
2622 }
2623 }
2624
2625 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, rows_div_2) {
2626 for (size_t rows = 4; rows <= 8; rows += 2) {
2627 for (size_t channels = 1; channels <= 40; channels += 7) {
2628 PReLUMicrokernelTester()
2629 .rows(rows)
2630 .channels(channels)
2631 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2632 }
2633 }
2634 }
2635
2636 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, rows_gt_2) {
2637 for (size_t rows = 3; rows < 4; rows++) {
2638 for (size_t channels = 1; channels <= 40; channels += 7) {
2639 PReLUMicrokernelTester()
2640 .rows(rows)
2641 .channels(channels)
2642 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2643 }
2644 }
2645 }
2646
2647 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, input_stride) {
2648 for (size_t rows = 1; rows <= 6; rows += 1) {
2649 for (size_t channels = 1; channels <= 40; channels += 7) {
2650 PReLUMicrokernelTester()
2651 .rows(rows)
2652 .channels(channels)
2653 .input_stride(43)
2654 .iterations(1)
2655 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2656 }
2657 }
2658 }
2659
2660 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, output_stride) {
2661 for (size_t rows = 1; rows <= 6; rows += 1) {
2662 for (size_t channels = 1; channels <= 40; channels += 7) {
2663 PReLUMicrokernelTester()
2664 .rows(rows)
2665 .channels(channels)
2666 .output_stride(43)
2667 .iterations(1)
2668 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2669 }
2670 }
2671 }
2672
2673 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X8, inplace) {
2674 for (size_t rows = 1; rows <= 6; rows += 1) {
2675 for (size_t channels = 1; channels <= 40; channels += 7) {
2676 PReLUMicrokernelTester()
2677 .rows(rows)
2678 .channels(channels)
2679 .inplace(true)
2680 .iterations(1)
2681 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8);
2682 }
2683 }
2684 }
2685#endif // XNN_ARCH_WASMSIMD
2686
2687
2688#if XNN_ARCH_WASMSIMD
Frank Barcharda5316982020-07-23 13:19:28 -07002689 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_eq_16) {
2690 PReLUMicrokernelTester()
2691 .rows(2)
2692 .channels(16)
2693 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2694 }
2695
2696 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_div_16) {
2697 for (size_t channels = 32; channels < 160; channels += 16) {
2698 PReLUMicrokernelTester()
2699 .rows(2)
2700 .channels(channels)
2701 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2702 }
2703 }
2704
2705 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_lt_16) {
2706 for (size_t channels = 1; channels < 16; channels++) {
2707 PReLUMicrokernelTester()
2708 .rows(2)
2709 .channels(channels)
2710 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2711 }
2712 }
2713
2714 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, channels_gt_16) {
2715 for (size_t channels = 17; channels < 32; channels++) {
2716 PReLUMicrokernelTester()
2717 .rows(2)
2718 .channels(channels)
2719 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2720 }
2721 }
2722
2723 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, rows_lt_2) {
2724 for (size_t rows = 1; rows < 2; rows++) {
2725 for (size_t channels = 1; channels <= 80; channels += 15) {
2726 PReLUMicrokernelTester()
2727 .rows(rows)
2728 .channels(channels)
2729 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2730 }
2731 }
2732 }
2733
2734 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, rows_div_2) {
2735 for (size_t rows = 4; rows <= 8; rows += 2) {
2736 for (size_t channels = 1; channels <= 80; channels += 15) {
2737 PReLUMicrokernelTester()
2738 .rows(rows)
2739 .channels(channels)
2740 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2741 }
2742 }
2743 }
2744
2745 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, rows_gt_2) {
2746 for (size_t rows = 3; rows < 4; rows++) {
2747 for (size_t channels = 1; channels <= 80; channels += 15) {
2748 PReLUMicrokernelTester()
2749 .rows(rows)
2750 .channels(channels)
2751 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2752 }
2753 }
2754 }
2755
2756 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, input_stride) {
2757 for (size_t rows = 1; rows <= 6; rows += 1) {
2758 for (size_t channels = 1; channels <= 80; channels += 15) {
2759 PReLUMicrokernelTester()
2760 .rows(rows)
2761 .channels(channels)
2762 .input_stride(83)
2763 .iterations(1)
2764 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2765 }
2766 }
2767 }
2768
2769 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, output_stride) {
2770 for (size_t rows = 1; rows <= 6; rows += 1) {
2771 for (size_t channels = 1; channels <= 80; channels += 15) {
2772 PReLUMicrokernelTester()
2773 .rows(rows)
2774 .channels(channels)
2775 .output_stride(83)
2776 .iterations(1)
2777 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2778 }
2779 }
2780 }
2781
2782 TEST(F32_PRELU__WASMSIMD_BITSELECT_2X16, inplace) {
2783 for (size_t rows = 1; rows <= 6; rows += 1) {
2784 for (size_t channels = 1; channels <= 80; channels += 15) {
2785 PReLUMicrokernelTester()
2786 .rows(rows)
2787 .channels(channels)
2788 .inplace(true)
2789 .iterations(1)
2790 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16);
2791 }
2792 }
2793 }
2794#endif // XNN_ARCH_WASMSIMD
2795
2796
2797#if XNN_ARCH_WASMSIMD
2798 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_eq_4) {
2799 PReLUMicrokernelTester()
2800 .rows(4)
2801 .channels(4)
2802 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2803 }
2804
2805 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_div_4) {
2806 for (size_t channels = 8; channels < 40; channels += 4) {
2807 PReLUMicrokernelTester()
2808 .rows(4)
2809 .channels(channels)
2810 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2811 }
2812 }
2813
2814 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_lt_4) {
2815 for (size_t channels = 1; channels < 4; channels++) {
2816 PReLUMicrokernelTester()
2817 .rows(4)
2818 .channels(channels)
2819 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2820 }
2821 }
2822
2823 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, channels_gt_4) {
2824 for (size_t channels = 5; channels < 8; channels++) {
2825 PReLUMicrokernelTester()
2826 .rows(4)
2827 .channels(channels)
2828 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2829 }
2830 }
2831
2832 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, rows_lt_4) {
2833 for (size_t rows = 1; rows < 4; rows++) {
2834 for (size_t channels = 1; channels <= 20; channels += 3) {
2835 PReLUMicrokernelTester()
2836 .rows(rows)
2837 .channels(channels)
2838 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2839 }
2840 }
2841 }
2842
2843 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, rows_div_4) {
2844 for (size_t rows = 8; rows <= 16; rows += 4) {
2845 for (size_t channels = 1; channels <= 20; channels += 3) {
2846 PReLUMicrokernelTester()
2847 .rows(rows)
2848 .channels(channels)
2849 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2850 }
2851 }
2852 }
2853
2854 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, rows_gt_4) {
2855 for (size_t rows = 5; rows < 8; rows++) {
2856 for (size_t channels = 1; channels <= 20; channels += 3) {
2857 PReLUMicrokernelTester()
2858 .rows(rows)
2859 .channels(channels)
2860 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2861 }
2862 }
2863 }
2864
2865 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, input_stride) {
2866 for (size_t rows = 1; rows <= 12; rows += 3) {
2867 for (size_t channels = 1; channels <= 20; channels += 3) {
2868 PReLUMicrokernelTester()
2869 .rows(rows)
2870 .channels(channels)
2871 .input_stride(23)
2872 .iterations(1)
2873 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2874 }
2875 }
2876 }
2877
2878 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, output_stride) {
2879 for (size_t rows = 1; rows <= 12; rows += 3) {
2880 for (size_t channels = 1; channels <= 20; channels += 3) {
2881 PReLUMicrokernelTester()
2882 .rows(rows)
2883 .channels(channels)
2884 .output_stride(23)
2885 .iterations(1)
2886 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2887 }
2888 }
2889 }
2890
2891 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X4, inplace) {
2892 for (size_t rows = 1; rows <= 12; rows += 3) {
2893 for (size_t channels = 1; channels <= 20; channels += 3) {
2894 PReLUMicrokernelTester()
2895 .rows(rows)
2896 .channels(channels)
2897 .inplace(true)
2898 .iterations(1)
2899 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4);
2900 }
2901 }
2902 }
2903#endif // XNN_ARCH_WASMSIMD
2904
2905
2906#if XNN_ARCH_WASMSIMD
2907 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_eq_8) {
2908 PReLUMicrokernelTester()
2909 .rows(4)
2910 .channels(8)
2911 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2912 }
2913
2914 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_div_8) {
2915 for (size_t channels = 16; channels < 80; channels += 8) {
2916 PReLUMicrokernelTester()
2917 .rows(4)
2918 .channels(channels)
2919 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2920 }
2921 }
2922
2923 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_lt_8) {
2924 for (size_t channels = 1; channels < 8; channels++) {
2925 PReLUMicrokernelTester()
2926 .rows(4)
2927 .channels(channels)
2928 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2929 }
2930 }
2931
2932 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, channels_gt_8) {
2933 for (size_t channels = 9; channels < 16; channels++) {
2934 PReLUMicrokernelTester()
2935 .rows(4)
2936 .channels(channels)
2937 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2938 }
2939 }
2940
2941 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, rows_lt_4) {
2942 for (size_t rows = 1; rows < 4; rows++) {
2943 for (size_t channels = 1; channels <= 40; channels += 7) {
2944 PReLUMicrokernelTester()
2945 .rows(rows)
2946 .channels(channels)
2947 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2948 }
2949 }
2950 }
2951
2952 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, rows_div_4) {
2953 for (size_t rows = 8; rows <= 16; rows += 4) {
2954 for (size_t channels = 1; channels <= 40; channels += 7) {
2955 PReLUMicrokernelTester()
2956 .rows(rows)
2957 .channels(channels)
2958 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2959 }
2960 }
2961 }
2962
2963 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, rows_gt_4) {
2964 for (size_t rows = 5; rows < 8; rows++) {
2965 for (size_t channels = 1; channels <= 40; channels += 7) {
2966 PReLUMicrokernelTester()
2967 .rows(rows)
2968 .channels(channels)
2969 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2970 }
2971 }
2972 }
2973
2974 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, input_stride) {
2975 for (size_t rows = 1; rows <= 12; rows += 3) {
2976 for (size_t channels = 1; channels <= 40; channels += 7) {
2977 PReLUMicrokernelTester()
2978 .rows(rows)
2979 .channels(channels)
2980 .input_stride(43)
2981 .iterations(1)
2982 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2983 }
2984 }
2985 }
2986
2987 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, output_stride) {
2988 for (size_t rows = 1; rows <= 12; rows += 3) {
2989 for (size_t channels = 1; channels <= 40; channels += 7) {
2990 PReLUMicrokernelTester()
2991 .rows(rows)
2992 .channels(channels)
2993 .output_stride(43)
2994 .iterations(1)
2995 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
2996 }
2997 }
2998 }
2999
3000 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X8, inplace) {
3001 for (size_t rows = 1; rows <= 12; rows += 3) {
3002 for (size_t channels = 1; channels <= 40; channels += 7) {
3003 PReLUMicrokernelTester()
3004 .rows(rows)
3005 .channels(channels)
3006 .inplace(true)
3007 .iterations(1)
3008 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8);
3009 }
3010 }
3011 }
3012#endif // XNN_ARCH_WASMSIMD
3013
3014
3015#if XNN_ARCH_WASMSIMD
3016 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_eq_16) {
3017 PReLUMicrokernelTester()
3018 .rows(4)
3019 .channels(16)
3020 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3021 }
3022
3023 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_div_16) {
3024 for (size_t channels = 32; channels < 160; channels += 16) {
3025 PReLUMicrokernelTester()
3026 .rows(4)
3027 .channels(channels)
3028 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3029 }
3030 }
3031
3032 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_lt_16) {
3033 for (size_t channels = 1; channels < 16; channels++) {
3034 PReLUMicrokernelTester()
3035 .rows(4)
3036 .channels(channels)
3037 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3038 }
3039 }
3040
3041 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, channels_gt_16) {
3042 for (size_t channels = 17; channels < 32; channels++) {
3043 PReLUMicrokernelTester()
3044 .rows(4)
3045 .channels(channels)
3046 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3047 }
3048 }
3049
3050 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, rows_lt_4) {
3051 for (size_t rows = 1; rows < 4; rows++) {
3052 for (size_t channels = 1; channels <= 80; channels += 15) {
3053 PReLUMicrokernelTester()
3054 .rows(rows)
3055 .channels(channels)
3056 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3057 }
3058 }
3059 }
3060
3061 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, rows_div_4) {
3062 for (size_t rows = 8; rows <= 16; rows += 4) {
3063 for (size_t channels = 1; channels <= 80; channels += 15) {
3064 PReLUMicrokernelTester()
3065 .rows(rows)
3066 .channels(channels)
3067 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3068 }
3069 }
3070 }
3071
3072 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, rows_gt_4) {
3073 for (size_t rows = 5; rows < 8; rows++) {
3074 for (size_t channels = 1; channels <= 80; channels += 15) {
3075 PReLUMicrokernelTester()
3076 .rows(rows)
3077 .channels(channels)
3078 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3079 }
3080 }
3081 }
3082
3083 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, input_stride) {
3084 for (size_t rows = 1; rows <= 12; rows += 3) {
3085 for (size_t channels = 1; channels <= 80; channels += 15) {
3086 PReLUMicrokernelTester()
3087 .rows(rows)
3088 .channels(channels)
3089 .input_stride(83)
3090 .iterations(1)
3091 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3092 }
3093 }
3094 }
3095
3096 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, output_stride) {
3097 for (size_t rows = 1; rows <= 12; rows += 3) {
3098 for (size_t channels = 1; channels <= 80; channels += 15) {
3099 PReLUMicrokernelTester()
3100 .rows(rows)
3101 .channels(channels)
3102 .output_stride(83)
3103 .iterations(1)
3104 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3105 }
3106 }
3107 }
3108
3109 TEST(F32_PRELU__WASMSIMD_BITSELECT_4X16, inplace) {
3110 for (size_t rows = 1; rows <= 12; rows += 3) {
3111 for (size_t channels = 1; channels <= 80; channels += 15) {
3112 PReLUMicrokernelTester()
3113 .rows(rows)
3114 .channels(channels)
3115 .inplace(true)
3116 .iterations(1)
3117 .Test(xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16);
3118 }
3119 }
3120 }
3121#endif // XNN_ARCH_WASMSIMD
3122
3123
3124#if XNN_ARCH_WASMSIMD
3125 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_eq_4) {
3126 PReLUMicrokernelTester()
3127 .rows(1)
3128 .channels(4)
3129 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3130 }
3131
3132 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_div_4) {
3133 for (size_t channels = 8; channels < 40; channels += 4) {
3134 PReLUMicrokernelTester()
3135 .rows(1)
3136 .channels(channels)
3137 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3138 }
3139 }
3140
3141 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_lt_4) {
3142 for (size_t channels = 1; channels < 4; channels++) {
3143 PReLUMicrokernelTester()
3144 .rows(1)
3145 .channels(channels)
3146 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3147 }
3148 }
3149
3150 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, channels_gt_4) {
3151 for (size_t channels = 5; channels < 8; channels++) {
3152 PReLUMicrokernelTester()
3153 .rows(1)
3154 .channels(channels)
3155 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3156 }
3157 }
3158
3159 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, rows_gt_1) {
3160 for (size_t rows = 2; rows < 2; rows++) {
3161 for (size_t channels = 1; channels <= 20; channels += 3) {
3162 PReLUMicrokernelTester()
3163 .rows(rows)
3164 .channels(channels)
3165 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3166 }
3167 }
3168 }
3169
3170 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, input_stride) {
3171 for (size_t rows = 1; rows <= 3; rows += 1) {
3172 for (size_t channels = 1; channels <= 20; channels += 3) {
3173 PReLUMicrokernelTester()
3174 .rows(rows)
3175 .channels(channels)
3176 .input_stride(23)
3177 .iterations(1)
3178 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3179 }
3180 }
3181 }
3182
3183 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, output_stride) {
3184 for (size_t rows = 1; rows <= 3; rows += 1) {
3185 for (size_t channels = 1; channels <= 20; channels += 3) {
3186 PReLUMicrokernelTester()
3187 .rows(rows)
3188 .channels(channels)
3189 .output_stride(23)
3190 .iterations(1)
3191 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3192 }
3193 }
3194 }
3195
3196 TEST(F32_PRELU__WASMSIMD_MINMAX_1X4, inplace) {
3197 for (size_t rows = 1; rows <= 3; rows += 1) {
3198 for (size_t channels = 1; channels <= 20; channels += 3) {
3199 PReLUMicrokernelTester()
3200 .rows(rows)
3201 .channels(channels)
3202 .inplace(true)
3203 .iterations(1)
3204 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4);
3205 }
3206 }
3207 }
3208#endif // XNN_ARCH_WASMSIMD
3209
3210
3211#if XNN_ARCH_WASMSIMD
3212 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_eq_8) {
3213 PReLUMicrokernelTester()
3214 .rows(1)
3215 .channels(8)
3216 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3217 }
3218
3219 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_div_8) {
3220 for (size_t channels = 16; channels < 80; channels += 8) {
3221 PReLUMicrokernelTester()
3222 .rows(1)
3223 .channels(channels)
3224 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3225 }
3226 }
3227
3228 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_lt_8) {
3229 for (size_t channels = 1; channels < 8; channels++) {
3230 PReLUMicrokernelTester()
3231 .rows(1)
3232 .channels(channels)
3233 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3234 }
3235 }
3236
3237 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, channels_gt_8) {
3238 for (size_t channels = 9; channels < 16; channels++) {
3239 PReLUMicrokernelTester()
3240 .rows(1)
3241 .channels(channels)
3242 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3243 }
3244 }
3245
3246 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, rows_gt_1) {
3247 for (size_t rows = 2; rows < 2; rows++) {
3248 for (size_t channels = 1; channels <= 40; channels += 7) {
3249 PReLUMicrokernelTester()
3250 .rows(rows)
3251 .channels(channels)
3252 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3253 }
3254 }
3255 }
3256
3257 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, input_stride) {
3258 for (size_t rows = 1; rows <= 3; rows += 1) {
3259 for (size_t channels = 1; channels <= 40; channels += 7) {
3260 PReLUMicrokernelTester()
3261 .rows(rows)
3262 .channels(channels)
3263 .input_stride(43)
3264 .iterations(1)
3265 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3266 }
3267 }
3268 }
3269
3270 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, output_stride) {
3271 for (size_t rows = 1; rows <= 3; rows += 1) {
3272 for (size_t channels = 1; channels <= 40; channels += 7) {
3273 PReLUMicrokernelTester()
3274 .rows(rows)
3275 .channels(channels)
3276 .output_stride(43)
3277 .iterations(1)
3278 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3279 }
3280 }
3281 }
3282
3283 TEST(F32_PRELU__WASMSIMD_MINMAX_1X8, inplace) {
3284 for (size_t rows = 1; rows <= 3; rows += 1) {
3285 for (size_t channels = 1; channels <= 40; channels += 7) {
3286 PReLUMicrokernelTester()
3287 .rows(rows)
3288 .channels(channels)
3289 .inplace(true)
3290 .iterations(1)
3291 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8);
3292 }
3293 }
3294 }
3295#endif // XNN_ARCH_WASMSIMD
3296
3297
3298#if XNN_ARCH_WASMSIMD
3299 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_eq_16) {
3300 PReLUMicrokernelTester()
3301 .rows(1)
3302 .channels(16)
3303 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3304 }
3305
3306 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_div_16) {
3307 for (size_t channels = 32; channels < 160; channels += 16) {
3308 PReLUMicrokernelTester()
3309 .rows(1)
3310 .channels(channels)
3311 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3312 }
3313 }
3314
3315 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_lt_16) {
3316 for (size_t channels = 1; channels < 16; channels++) {
3317 PReLUMicrokernelTester()
3318 .rows(1)
3319 .channels(channels)
3320 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3321 }
3322 }
3323
3324 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, channels_gt_16) {
3325 for (size_t channels = 17; channels < 32; channels++) {
3326 PReLUMicrokernelTester()
3327 .rows(1)
3328 .channels(channels)
3329 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3330 }
3331 }
3332
3333 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, rows_gt_1) {
3334 for (size_t rows = 2; rows < 2; rows++) {
3335 for (size_t channels = 1; channels <= 80; channels += 15) {
3336 PReLUMicrokernelTester()
3337 .rows(rows)
3338 .channels(channels)
3339 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3340 }
3341 }
3342 }
3343
3344 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, input_stride) {
3345 for (size_t rows = 1; rows <= 3; rows += 1) {
3346 for (size_t channels = 1; channels <= 80; channels += 15) {
3347 PReLUMicrokernelTester()
3348 .rows(rows)
3349 .channels(channels)
3350 .input_stride(83)
3351 .iterations(1)
3352 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3353 }
3354 }
3355 }
3356
3357 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, output_stride) {
3358 for (size_t rows = 1; rows <= 3; rows += 1) {
3359 for (size_t channels = 1; channels <= 80; channels += 15) {
3360 PReLUMicrokernelTester()
3361 .rows(rows)
3362 .channels(channels)
3363 .output_stride(83)
3364 .iterations(1)
3365 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3366 }
3367 }
3368 }
3369
3370 TEST(F32_PRELU__WASMSIMD_MINMAX_1X16, inplace) {
3371 for (size_t rows = 1; rows <= 3; rows += 1) {
3372 for (size_t channels = 1; channels <= 80; channels += 15) {
3373 PReLUMicrokernelTester()
3374 .rows(rows)
3375 .channels(channels)
3376 .inplace(true)
3377 .iterations(1)
3378 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16);
3379 }
3380 }
3381 }
3382#endif // XNN_ARCH_WASMSIMD
3383
3384
3385#if XNN_ARCH_WASMSIMD
Marat Dukhan195f8eb2020-06-25 12:50:57 -07003386 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_eq_4) {
3387 PReLUMicrokernelTester()
3388 .rows(2)
3389 .channels(4)
3390 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3391 }
3392
3393 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_div_4) {
3394 for (size_t channels = 8; channels < 40; channels += 4) {
3395 PReLUMicrokernelTester()
3396 .rows(2)
3397 .channels(channels)
3398 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3399 }
3400 }
3401
3402 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_lt_4) {
3403 for (size_t channels = 1; channels < 4; channels++) {
3404 PReLUMicrokernelTester()
3405 .rows(2)
3406 .channels(channels)
3407 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3408 }
3409 }
3410
3411 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, channels_gt_4) {
3412 for (size_t channels = 5; channels < 8; channels++) {
3413 PReLUMicrokernelTester()
3414 .rows(2)
3415 .channels(channels)
3416 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3417 }
3418 }
3419
3420 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, rows_lt_2) {
3421 for (size_t rows = 1; rows < 2; rows++) {
3422 for (size_t channels = 1; channels <= 20; channels += 3) {
3423 PReLUMicrokernelTester()
3424 .rows(rows)
3425 .channels(channels)
3426 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3427 }
3428 }
3429 }
3430
3431 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, rows_div_2) {
3432 for (size_t rows = 4; rows <= 8; rows += 2) {
3433 for (size_t channels = 1; channels <= 20; channels += 3) {
3434 PReLUMicrokernelTester()
3435 .rows(rows)
3436 .channels(channels)
3437 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3438 }
3439 }
3440 }
3441
3442 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, rows_gt_2) {
3443 for (size_t rows = 3; rows < 4; rows++) {
3444 for (size_t channels = 1; channels <= 20; channels += 3) {
3445 PReLUMicrokernelTester()
3446 .rows(rows)
3447 .channels(channels)
3448 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3449 }
3450 }
3451 }
3452
3453 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, input_stride) {
3454 for (size_t rows = 1; rows <= 6; rows += 1) {
3455 for (size_t channels = 1; channels <= 20; channels += 3) {
3456 PReLUMicrokernelTester()
3457 .rows(rows)
3458 .channels(channels)
3459 .input_stride(23)
3460 .iterations(1)
3461 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3462 }
3463 }
3464 }
3465
3466 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, output_stride) {
3467 for (size_t rows = 1; rows <= 6; rows += 1) {
3468 for (size_t channels = 1; channels <= 20; channels += 3) {
3469 PReLUMicrokernelTester()
3470 .rows(rows)
3471 .channels(channels)
3472 .output_stride(23)
3473 .iterations(1)
3474 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3475 }
3476 }
3477 }
3478
3479 TEST(F32_PRELU__WASMSIMD_MINMAX_2X4, inplace) {
3480 for (size_t rows = 1; rows <= 6; rows += 1) {
3481 for (size_t channels = 1; channels <= 20; channels += 3) {
3482 PReLUMicrokernelTester()
3483 .rows(rows)
3484 .channels(channels)
3485 .inplace(true)
3486 .iterations(1)
3487 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4);
3488 }
3489 }
3490 }
3491#endif // XNN_ARCH_WASMSIMD
3492
3493
3494#if XNN_ARCH_WASMSIMD
3495 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_eq_8) {
3496 PReLUMicrokernelTester()
3497 .rows(2)
3498 .channels(8)
3499 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3500 }
3501
3502 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_div_8) {
3503 for (size_t channels = 16; channels < 80; channels += 8) {
3504 PReLUMicrokernelTester()
3505 .rows(2)
3506 .channels(channels)
3507 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3508 }
3509 }
3510
3511 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_lt_8) {
3512 for (size_t channels = 1; channels < 8; channels++) {
3513 PReLUMicrokernelTester()
3514 .rows(2)
3515 .channels(channels)
3516 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3517 }
3518 }
3519
3520 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, channels_gt_8) {
3521 for (size_t channels = 9; channels < 16; channels++) {
3522 PReLUMicrokernelTester()
3523 .rows(2)
3524 .channels(channels)
3525 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3526 }
3527 }
3528
3529 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, rows_lt_2) {
3530 for (size_t rows = 1; rows < 2; rows++) {
3531 for (size_t channels = 1; channels <= 40; channels += 7) {
3532 PReLUMicrokernelTester()
3533 .rows(rows)
3534 .channels(channels)
3535 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3536 }
3537 }
3538 }
3539
3540 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, rows_div_2) {
3541 for (size_t rows = 4; rows <= 8; rows += 2) {
3542 for (size_t channels = 1; channels <= 40; channels += 7) {
3543 PReLUMicrokernelTester()
3544 .rows(rows)
3545 .channels(channels)
3546 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3547 }
3548 }
3549 }
3550
3551 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, rows_gt_2) {
3552 for (size_t rows = 3; rows < 4; rows++) {
3553 for (size_t channels = 1; channels <= 40; channels += 7) {
3554 PReLUMicrokernelTester()
3555 .rows(rows)
3556 .channels(channels)
3557 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3558 }
3559 }
3560 }
3561
3562 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, input_stride) {
3563 for (size_t rows = 1; rows <= 6; rows += 1) {
3564 for (size_t channels = 1; channels <= 40; channels += 7) {
3565 PReLUMicrokernelTester()
3566 .rows(rows)
3567 .channels(channels)
3568 .input_stride(43)
3569 .iterations(1)
3570 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3571 }
3572 }
3573 }
3574
3575 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, output_stride) {
3576 for (size_t rows = 1; rows <= 6; rows += 1) {
3577 for (size_t channels = 1; channels <= 40; channels += 7) {
3578 PReLUMicrokernelTester()
3579 .rows(rows)
3580 .channels(channels)
3581 .output_stride(43)
3582 .iterations(1)
3583 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3584 }
3585 }
3586 }
3587
3588 TEST(F32_PRELU__WASMSIMD_MINMAX_2X8, inplace) {
3589 for (size_t rows = 1; rows <= 6; rows += 1) {
3590 for (size_t channels = 1; channels <= 40; channels += 7) {
3591 PReLUMicrokernelTester()
3592 .rows(rows)
3593 .channels(channels)
3594 .inplace(true)
3595 .iterations(1)
3596 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8);
3597 }
3598 }
3599 }
3600#endif // XNN_ARCH_WASMSIMD
3601
3602
Frank Barcharda5316982020-07-23 13:19:28 -07003603#if XNN_ARCH_WASMSIMD
3604 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_eq_16) {
3605 PReLUMicrokernelTester()
3606 .rows(2)
3607 .channels(16)
3608 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3609 }
3610
3611 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_div_16) {
3612 for (size_t channels = 32; channels < 160; channels += 16) {
3613 PReLUMicrokernelTester()
3614 .rows(2)
3615 .channels(channels)
3616 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3617 }
3618 }
3619
3620 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_lt_16) {
3621 for (size_t channels = 1; channels < 16; channels++) {
3622 PReLUMicrokernelTester()
3623 .rows(2)
3624 .channels(channels)
3625 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3626 }
3627 }
3628
3629 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, channels_gt_16) {
3630 for (size_t channels = 17; channels < 32; channels++) {
3631 PReLUMicrokernelTester()
3632 .rows(2)
3633 .channels(channels)
3634 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3635 }
3636 }
3637
3638 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, rows_lt_2) {
3639 for (size_t rows = 1; rows < 2; rows++) {
3640 for (size_t channels = 1; channels <= 80; channels += 15) {
3641 PReLUMicrokernelTester()
3642 .rows(rows)
3643 .channels(channels)
3644 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3645 }
3646 }
3647 }
3648
3649 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, rows_div_2) {
3650 for (size_t rows = 4; rows <= 8; rows += 2) {
3651 for (size_t channels = 1; channels <= 80; channels += 15) {
3652 PReLUMicrokernelTester()
3653 .rows(rows)
3654 .channels(channels)
3655 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3656 }
3657 }
3658 }
3659
3660 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, rows_gt_2) {
3661 for (size_t rows = 3; rows < 4; rows++) {
3662 for (size_t channels = 1; channels <= 80; channels += 15) {
3663 PReLUMicrokernelTester()
3664 .rows(rows)
3665 .channels(channels)
3666 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3667 }
3668 }
3669 }
3670
3671 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, input_stride) {
3672 for (size_t rows = 1; rows <= 6; rows += 1) {
3673 for (size_t channels = 1; channels <= 80; channels += 15) {
3674 PReLUMicrokernelTester()
3675 .rows(rows)
3676 .channels(channels)
3677 .input_stride(83)
3678 .iterations(1)
3679 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3680 }
3681 }
3682 }
3683
3684 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, output_stride) {
3685 for (size_t rows = 1; rows <= 6; rows += 1) {
3686 for (size_t channels = 1; channels <= 80; channels += 15) {
3687 PReLUMicrokernelTester()
3688 .rows(rows)
3689 .channels(channels)
3690 .output_stride(83)
3691 .iterations(1)
3692 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3693 }
3694 }
3695 }
3696
3697 TEST(F32_PRELU__WASMSIMD_MINMAX_2X16, inplace) {
3698 for (size_t rows = 1; rows <= 6; rows += 1) {
3699 for (size_t channels = 1; channels <= 80; channels += 15) {
3700 PReLUMicrokernelTester()
3701 .rows(rows)
3702 .channels(channels)
3703 .inplace(true)
3704 .iterations(1)
3705 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16);
3706 }
3707 }
3708 }
3709#endif // XNN_ARCH_WASMSIMD
3710
3711
3712#if XNN_ARCH_WASMSIMD
3713 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_eq_4) {
3714 PReLUMicrokernelTester()
3715 .rows(4)
3716 .channels(4)
3717 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3718 }
3719
3720 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_div_4) {
3721 for (size_t channels = 8; channels < 40; channels += 4) {
3722 PReLUMicrokernelTester()
3723 .rows(4)
3724 .channels(channels)
3725 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3726 }
3727 }
3728
3729 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_lt_4) {
3730 for (size_t channels = 1; channels < 4; channels++) {
3731 PReLUMicrokernelTester()
3732 .rows(4)
3733 .channels(channels)
3734 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3735 }
3736 }
3737
3738 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, channels_gt_4) {
3739 for (size_t channels = 5; channels < 8; channels++) {
3740 PReLUMicrokernelTester()
3741 .rows(4)
3742 .channels(channels)
3743 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3744 }
3745 }
3746
3747 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, rows_lt_4) {
3748 for (size_t rows = 1; rows < 4; rows++) {
3749 for (size_t channels = 1; channels <= 20; channels += 3) {
3750 PReLUMicrokernelTester()
3751 .rows(rows)
3752 .channels(channels)
3753 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3754 }
3755 }
3756 }
3757
3758 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, rows_div_4) {
3759 for (size_t rows = 8; rows <= 16; rows += 4) {
3760 for (size_t channels = 1; channels <= 20; channels += 3) {
3761 PReLUMicrokernelTester()
3762 .rows(rows)
3763 .channels(channels)
3764 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3765 }
3766 }
3767 }
3768
3769 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, rows_gt_4) {
3770 for (size_t rows = 5; rows < 8; rows++) {
3771 for (size_t channels = 1; channels <= 20; channels += 3) {
3772 PReLUMicrokernelTester()
3773 .rows(rows)
3774 .channels(channels)
3775 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3776 }
3777 }
3778 }
3779
3780 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, input_stride) {
3781 for (size_t rows = 1; rows <= 12; rows += 3) {
3782 for (size_t channels = 1; channels <= 20; channels += 3) {
3783 PReLUMicrokernelTester()
3784 .rows(rows)
3785 .channels(channels)
3786 .input_stride(23)
3787 .iterations(1)
3788 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3789 }
3790 }
3791 }
3792
3793 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, output_stride) {
3794 for (size_t rows = 1; rows <= 12; rows += 3) {
3795 for (size_t channels = 1; channels <= 20; channels += 3) {
3796 PReLUMicrokernelTester()
3797 .rows(rows)
3798 .channels(channels)
3799 .output_stride(23)
3800 .iterations(1)
3801 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3802 }
3803 }
3804 }
3805
3806 TEST(F32_PRELU__WASMSIMD_MINMAX_4X4, inplace) {
3807 for (size_t rows = 1; rows <= 12; rows += 3) {
3808 for (size_t channels = 1; channels <= 20; channels += 3) {
3809 PReLUMicrokernelTester()
3810 .rows(rows)
3811 .channels(channels)
3812 .inplace(true)
3813 .iterations(1)
3814 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4);
3815 }
3816 }
3817 }
3818#endif // XNN_ARCH_WASMSIMD
3819
3820
3821#if XNN_ARCH_WASMSIMD
3822 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_eq_8) {
3823 PReLUMicrokernelTester()
3824 .rows(4)
3825 .channels(8)
3826 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3827 }
3828
3829 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_div_8) {
3830 for (size_t channels = 16; channels < 80; channels += 8) {
3831 PReLUMicrokernelTester()
3832 .rows(4)
3833 .channels(channels)
3834 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3835 }
3836 }
3837
3838 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_lt_8) {
3839 for (size_t channels = 1; channels < 8; channels++) {
3840 PReLUMicrokernelTester()
3841 .rows(4)
3842 .channels(channels)
3843 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3844 }
3845 }
3846
3847 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, channels_gt_8) {
3848 for (size_t channels = 9; channels < 16; channels++) {
3849 PReLUMicrokernelTester()
3850 .rows(4)
3851 .channels(channels)
3852 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3853 }
3854 }
3855
3856 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, rows_lt_4) {
3857 for (size_t rows = 1; rows < 4; rows++) {
3858 for (size_t channels = 1; channels <= 40; channels += 7) {
3859 PReLUMicrokernelTester()
3860 .rows(rows)
3861 .channels(channels)
3862 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3863 }
3864 }
3865 }
3866
3867 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, rows_div_4) {
3868 for (size_t rows = 8; rows <= 16; rows += 4) {
3869 for (size_t channels = 1; channels <= 40; channels += 7) {
3870 PReLUMicrokernelTester()
3871 .rows(rows)
3872 .channels(channels)
3873 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3874 }
3875 }
3876 }
3877
3878 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, rows_gt_4) {
3879 for (size_t rows = 5; rows < 8; rows++) {
3880 for (size_t channels = 1; channels <= 40; channels += 7) {
3881 PReLUMicrokernelTester()
3882 .rows(rows)
3883 .channels(channels)
3884 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3885 }
3886 }
3887 }
3888
3889 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, input_stride) {
3890 for (size_t rows = 1; rows <= 12; rows += 3) {
3891 for (size_t channels = 1; channels <= 40; channels += 7) {
3892 PReLUMicrokernelTester()
3893 .rows(rows)
3894 .channels(channels)
3895 .input_stride(43)
3896 .iterations(1)
3897 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3898 }
3899 }
3900 }
3901
3902 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, output_stride) {
3903 for (size_t rows = 1; rows <= 12; rows += 3) {
3904 for (size_t channels = 1; channels <= 40; channels += 7) {
3905 PReLUMicrokernelTester()
3906 .rows(rows)
3907 .channels(channels)
3908 .output_stride(43)
3909 .iterations(1)
3910 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3911 }
3912 }
3913 }
3914
3915 TEST(F32_PRELU__WASMSIMD_MINMAX_4X8, inplace) {
3916 for (size_t rows = 1; rows <= 12; rows += 3) {
3917 for (size_t channels = 1; channels <= 40; channels += 7) {
3918 PReLUMicrokernelTester()
3919 .rows(rows)
3920 .channels(channels)
3921 .inplace(true)
3922 .iterations(1)
3923 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8);
3924 }
3925 }
3926 }
3927#endif // XNN_ARCH_WASMSIMD
3928
3929
3930#if XNN_ARCH_WASMSIMD
3931 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_eq_16) {
3932 PReLUMicrokernelTester()
3933 .rows(4)
3934 .channels(16)
3935 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3936 }
3937
3938 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_div_16) {
3939 for (size_t channels = 32; channels < 160; channels += 16) {
3940 PReLUMicrokernelTester()
3941 .rows(4)
3942 .channels(channels)
3943 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3944 }
3945 }
3946
3947 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_lt_16) {
3948 for (size_t channels = 1; channels < 16; channels++) {
3949 PReLUMicrokernelTester()
3950 .rows(4)
3951 .channels(channels)
3952 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3953 }
3954 }
3955
3956 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, channels_gt_16) {
3957 for (size_t channels = 17; channels < 32; channels++) {
3958 PReLUMicrokernelTester()
3959 .rows(4)
3960 .channels(channels)
3961 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3962 }
3963 }
3964
3965 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, rows_lt_4) {
3966 for (size_t rows = 1; rows < 4; rows++) {
3967 for (size_t channels = 1; channels <= 80; channels += 15) {
3968 PReLUMicrokernelTester()
3969 .rows(rows)
3970 .channels(channels)
3971 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3972 }
3973 }
3974 }
3975
3976 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, rows_div_4) {
3977 for (size_t rows = 8; rows <= 16; rows += 4) {
3978 for (size_t channels = 1; channels <= 80; channels += 15) {
3979 PReLUMicrokernelTester()
3980 .rows(rows)
3981 .channels(channels)
3982 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3983 }
3984 }
3985 }
3986
3987 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, rows_gt_4) {
3988 for (size_t rows = 5; rows < 8; rows++) {
3989 for (size_t channels = 1; channels <= 80; channels += 15) {
3990 PReLUMicrokernelTester()
3991 .rows(rows)
3992 .channels(channels)
3993 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
3994 }
3995 }
3996 }
3997
3998 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, input_stride) {
3999 for (size_t rows = 1; rows <= 12; rows += 3) {
4000 for (size_t channels = 1; channels <= 80; channels += 15) {
4001 PReLUMicrokernelTester()
4002 .rows(rows)
4003 .channels(channels)
4004 .input_stride(83)
4005 .iterations(1)
4006 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
4007 }
4008 }
4009 }
4010
4011 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, output_stride) {
4012 for (size_t rows = 1; rows <= 12; rows += 3) {
4013 for (size_t channels = 1; channels <= 80; channels += 15) {
4014 PReLUMicrokernelTester()
4015 .rows(rows)
4016 .channels(channels)
4017 .output_stride(83)
4018 .iterations(1)
4019 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
4020 }
4021 }
4022 }
4023
4024 TEST(F32_PRELU__WASMSIMD_MINMAX_4X16, inplace) {
4025 for (size_t rows = 1; rows <= 12; rows += 3) {
4026 for (size_t channels = 1; channels <= 80; channels += 15) {
4027 PReLUMicrokernelTester()
4028 .rows(rows)
4029 .channels(channels)
4030 .inplace(true)
4031 .iterations(1)
4032 .Test(xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16);
4033 }
4034 }
4035 }
4036#endif // XNN_ARCH_WASMSIMD
4037
4038
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07004039#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan7c1f8082020-06-25 13:26:20 -07004040 TEST(F32_PRELU__WASM_2X1, channels_eq_1) {
4041 PReLUMicrokernelTester()
4042 .rows(2)
4043 .channels(1)
4044 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4045 }
4046
4047 TEST(F32_PRELU__WASM_2X1, channels_gt_1) {
4048 for (size_t channels = 2; channels < 10; channels++) {
4049 PReLUMicrokernelTester()
4050 .rows(2)
4051 .channels(channels)
4052 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4053 }
4054 }
4055
4056 TEST(F32_PRELU__WASM_2X1, rows_lt_2) {
4057 for (size_t rows = 1; rows < 2; rows++) {
4058 for (size_t channels = 1; channels <= 5; channels += 1) {
4059 PReLUMicrokernelTester()
4060 .rows(rows)
4061 .channels(channels)
4062 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4063 }
4064 }
4065 }
4066
4067 TEST(F32_PRELU__WASM_2X1, rows_div_2) {
4068 for (size_t rows = 4; rows <= 8; rows += 2) {
4069 for (size_t channels = 1; channels <= 5; channels += 1) {
4070 PReLUMicrokernelTester()
4071 .rows(rows)
4072 .channels(channels)
4073 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4074 }
4075 }
4076 }
4077
4078 TEST(F32_PRELU__WASM_2X1, rows_gt_2) {
4079 for (size_t rows = 3; rows < 4; rows++) {
4080 for (size_t channels = 1; channels <= 5; channels += 1) {
4081 PReLUMicrokernelTester()
4082 .rows(rows)
4083 .channels(channels)
4084 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4085 }
4086 }
4087 }
4088
4089 TEST(F32_PRELU__WASM_2X1, input_stride) {
4090 for (size_t rows = 1; rows <= 6; rows += 1) {
4091 for (size_t channels = 1; channels <= 5; channels += 1) {
4092 PReLUMicrokernelTester()
4093 .rows(rows)
4094 .channels(channels)
4095 .input_stride(7)
4096 .iterations(1)
4097 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4098 }
4099 }
4100 }
4101
4102 TEST(F32_PRELU__WASM_2X1, output_stride) {
4103 for (size_t rows = 1; rows <= 6; rows += 1) {
4104 for (size_t channels = 1; channels <= 5; channels += 1) {
4105 PReLUMicrokernelTester()
4106 .rows(rows)
4107 .channels(channels)
4108 .output_stride(7)
4109 .iterations(1)
4110 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4111 }
4112 }
4113 }
4114
4115 TEST(F32_PRELU__WASM_2X1, inplace) {
4116 for (size_t rows = 1; rows <= 6; rows += 1) {
4117 for (size_t channels = 1; channels <= 5; channels += 1) {
4118 PReLUMicrokernelTester()
4119 .rows(rows)
4120 .channels(channels)
4121 .inplace(true)
4122 .iterations(1)
4123 .Test(xnn_f32_prelu_ukernel__wasm_2x1);
4124 }
4125 }
4126 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07004127#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan7c1f8082020-06-25 13:26:20 -07004128
4129
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07004130#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan7c1f8082020-06-25 13:26:20 -07004131 TEST(F32_PRELU__WASM_2X4, channels_eq_4) {
4132 PReLUMicrokernelTester()
4133 .rows(2)
4134 .channels(4)
4135 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4136 }
4137
4138 TEST(F32_PRELU__WASM_2X4, channels_div_4) {
4139 for (size_t channels = 8; channels < 40; channels += 4) {
4140 PReLUMicrokernelTester()
4141 .rows(2)
4142 .channels(channels)
4143 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4144 }
4145 }
4146
4147 TEST(F32_PRELU__WASM_2X4, channels_lt_4) {
4148 for (size_t channels = 1; channels < 4; channels++) {
4149 PReLUMicrokernelTester()
4150 .rows(2)
4151 .channels(channels)
4152 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4153 }
4154 }
4155
4156 TEST(F32_PRELU__WASM_2X4, channels_gt_4) {
4157 for (size_t channels = 5; channels < 8; channels++) {
4158 PReLUMicrokernelTester()
4159 .rows(2)
4160 .channels(channels)
4161 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4162 }
4163 }
4164
4165 TEST(F32_PRELU__WASM_2X4, rows_lt_2) {
4166 for (size_t rows = 1; rows < 2; rows++) {
4167 for (size_t channels = 1; channels <= 20; channels += 3) {
4168 PReLUMicrokernelTester()
4169 .rows(rows)
4170 .channels(channels)
4171 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4172 }
4173 }
4174 }
4175
4176 TEST(F32_PRELU__WASM_2X4, rows_div_2) {
4177 for (size_t rows = 4; rows <= 8; rows += 2) {
4178 for (size_t channels = 1; channels <= 20; channels += 3) {
4179 PReLUMicrokernelTester()
4180 .rows(rows)
4181 .channels(channels)
4182 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4183 }
4184 }
4185 }
4186
4187 TEST(F32_PRELU__WASM_2X4, rows_gt_2) {
4188 for (size_t rows = 3; rows < 4; rows++) {
4189 for (size_t channels = 1; channels <= 20; channels += 3) {
4190 PReLUMicrokernelTester()
4191 .rows(rows)
4192 .channels(channels)
4193 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4194 }
4195 }
4196 }
4197
4198 TEST(F32_PRELU__WASM_2X4, input_stride) {
4199 for (size_t rows = 1; rows <= 6; rows += 1) {
4200 for (size_t channels = 1; channels <= 20; channels += 3) {
4201 PReLUMicrokernelTester()
4202 .rows(rows)
4203 .channels(channels)
4204 .input_stride(23)
4205 .iterations(1)
4206 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4207 }
4208 }
4209 }
4210
4211 TEST(F32_PRELU__WASM_2X4, output_stride) {
4212 for (size_t rows = 1; rows <= 6; rows += 1) {
4213 for (size_t channels = 1; channels <= 20; channels += 3) {
4214 PReLUMicrokernelTester()
4215 .rows(rows)
4216 .channels(channels)
4217 .output_stride(23)
4218 .iterations(1)
4219 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4220 }
4221 }
4222 }
4223
4224 TEST(F32_PRELU__WASM_2X4, inplace) {
4225 for (size_t rows = 1; rows <= 6; rows += 1) {
4226 for (size_t channels = 1; channels <= 20; channels += 3) {
4227 PReLUMicrokernelTester()
4228 .rows(rows)
4229 .channels(channels)
4230 .inplace(true)
4231 .iterations(1)
4232 .Test(xnn_f32_prelu_ukernel__wasm_2x4);
4233 }
4234 }
4235 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -07004236#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan7c1f8082020-06-25 13:26:20 -07004237
4238
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004239TEST(F32_PRELU__SCALAR_2X1, channels_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004240 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004241 .rows(2)
4242 .channels(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004243 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004244}
4245
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004246TEST(F32_PRELU__SCALAR_2X1, channels_gt_1) {
Marat Dukhan0f06b5c2019-11-07 19:55:54 -08004247 for (size_t channels = 2; channels < 10; channels++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004248 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004249 .rows(2)
4250 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004251 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004252 }
4253}
4254
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004255TEST(F32_PRELU__SCALAR_2X1, rows_lt_2) {
4256 for (size_t rows = 1; rows < 2; rows++) {
4257 for (size_t channels = 1; channels <= 5; channels += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004258 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004259 .rows(rows)
4260 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004261 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004262 }
4263 }
4264}
4265
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004266TEST(F32_PRELU__SCALAR_2X1, rows_div_2) {
4267 for (size_t rows = 4; rows <= 8; rows += 2) {
4268 for (size_t channels = 1; channels <= 5; channels += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004269 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004270 .rows(rows)
4271 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004272 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004273 }
4274 }
4275}
4276
4277TEST(F32_PRELU__SCALAR_2X1, rows_gt_2) {
4278 for (size_t rows = 3; rows < 4; rows++) {
4279 for (size_t channels = 1; channels <= 5; channels += 1) {
4280 PReLUMicrokernelTester()
4281 .rows(rows)
4282 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004283 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004284 }
4285 }
4286}
4287
4288TEST(F32_PRELU__SCALAR_2X1, input_stride) {
4289 for (size_t rows = 1; rows <= 6; rows += 1) {
4290 for (size_t channels = 1; channels <= 5; channels += 1) {
4291 PReLUMicrokernelTester()
4292 .rows(rows)
4293 .channels(channels)
4294 .input_stride(7)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004295 .iterations(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004296 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004297 }
4298 }
4299}
4300
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004301TEST(F32_PRELU__SCALAR_2X1, output_stride) {
4302 for (size_t rows = 1; rows <= 6; rows += 1) {
4303 for (size_t channels = 1; channels <= 5; channels += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004304 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004305 .rows(rows)
4306 .channels(channels)
4307 .output_stride(7)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004308 .iterations(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004309 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004310 }
4311 }
4312}
4313
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004314TEST(F32_PRELU__SCALAR_2X1, inplace) {
4315 for (size_t rows = 1; rows <= 6; rows += 1) {
4316 for (size_t channels = 1; channels <= 5; channels += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07004317 PReLUMicrokernelTester()
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004318 .rows(rows)
4319 .channels(channels)
XNNPACK Teamb455b122019-09-27 18:10:33 -07004320 .inplace(true)
4321 .iterations(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004322 .Test(xnn_f32_prelu_ukernel__scalar_2x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -07004323 }
4324 }
4325}
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004326
4327TEST(F32_PRELU__SCALAR_2X4, channels_eq_4) {
4328 PReLUMicrokernelTester()
4329 .rows(2)
4330 .channels(4)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004331 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004332}
4333
4334TEST(F32_PRELU__SCALAR_2X4, channels_div_4) {
4335 for (size_t channels = 8; channels < 40; channels += 4) {
4336 PReLUMicrokernelTester()
4337 .rows(2)
4338 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004339 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004340 }
4341}
4342
4343TEST(F32_PRELU__SCALAR_2X4, channels_lt_4) {
4344 for (size_t channels = 1; channels < 4; channels++) {
4345 PReLUMicrokernelTester()
4346 .rows(2)
4347 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004348 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004349 }
4350}
4351
4352TEST(F32_PRELU__SCALAR_2X4, channels_gt_4) {
4353 for (size_t channels = 5; channels < 8; channels++) {
4354 PReLUMicrokernelTester()
4355 .rows(2)
4356 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004357 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004358 }
4359}
4360
4361TEST(F32_PRELU__SCALAR_2X4, rows_lt_2) {
4362 for (size_t rows = 1; rows < 2; rows++) {
4363 for (size_t channels = 1; channels <= 20; channels += 3) {
4364 PReLUMicrokernelTester()
4365 .rows(rows)
4366 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004367 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004368 }
4369 }
4370}
4371
4372TEST(F32_PRELU__SCALAR_2X4, rows_div_2) {
4373 for (size_t rows = 4; rows <= 8; rows += 2) {
4374 for (size_t channels = 1; channels <= 20; channels += 3) {
4375 PReLUMicrokernelTester()
4376 .rows(rows)
4377 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004378 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004379 }
4380 }
4381}
4382
4383TEST(F32_PRELU__SCALAR_2X4, rows_gt_2) {
4384 for (size_t rows = 3; rows < 4; rows++) {
4385 for (size_t channels = 1; channels <= 20; channels += 3) {
4386 PReLUMicrokernelTester()
4387 .rows(rows)
4388 .channels(channels)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004389 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004390 }
4391 }
4392}
4393
4394TEST(F32_PRELU__SCALAR_2X4, input_stride) {
4395 for (size_t rows = 1; rows <= 6; rows += 1) {
4396 for (size_t channels = 1; channels <= 20; channels += 3) {
4397 PReLUMicrokernelTester()
4398 .rows(rows)
4399 .channels(channels)
4400 .input_stride(23)
4401 .iterations(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004402 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004403 }
4404 }
4405}
4406
4407TEST(F32_PRELU__SCALAR_2X4, output_stride) {
4408 for (size_t rows = 1; rows <= 6; rows += 1) {
4409 for (size_t channels = 1; channels <= 20; channels += 3) {
4410 PReLUMicrokernelTester()
4411 .rows(rows)
4412 .channels(channels)
4413 .output_stride(23)
4414 .iterations(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004415 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004416 }
4417 }
4418}
4419
4420TEST(F32_PRELU__SCALAR_2X4, inplace) {
4421 for (size_t rows = 1; rows <= 6; rows += 1) {
4422 for (size_t channels = 1; channels <= 20; channels += 3) {
4423 PReLUMicrokernelTester()
4424 .rows(rows)
4425 .channels(channels)
4426 .inplace(true)
4427 .iterations(1)
Marat Dukhanc8230a42020-02-24 00:00:35 -08004428 .Test(xnn_f32_prelu_ukernel__scalar_2x4);
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08004429 }
4430 }
4431}