blob: 858cc953f41532882747268b651e612c528a00c5 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhan662faa02019-12-09 22:48:16 -08005//
6// Auto-generated file. Do not edit!
7// Specification: test/f32-hswish.yaml
8// Generator: tools/generate-hswish-test.py
9
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/hswish.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "hswish-microkernel-tester.h"
18
19
Marat Dukhan1dadbf72019-10-01 10:46:20 -070020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan662faa02019-12-09 22:48:16 -080021 TEST(F32_HSWISH__NEON_X4, batch_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070022 TEST_REQUIRES_ARM_NEON;
23 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080024 .batch_size(4)
25 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 }
27
Marat Dukhan662faa02019-12-09 22:48:16 -080028 TEST(F32_HSWISH__NEON_X4, batch_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070029 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080030 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070031 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080032 .batch_size(batch_size)
33 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 }
35 }
36
Marat Dukhan662faa02019-12-09 22:48:16 -080037 TEST(F32_HSWISH__NEON_X4, batch_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070038 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080039 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080041 .batch_size(batch_size)
42 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070043 }
44 }
45
Marat Dukhan662faa02019-12-09 22:48:16 -080046 TEST(F32_HSWISH__NEON_X4, batch_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070047 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080048 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070049 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080050 .batch_size(batch_size)
51 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070052 }
53 }
54
Marat Dukhan662faa02019-12-09 22:48:16 -080055 TEST(F32_HSWISH__NEON_X4, inplace) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080057 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080059 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -080061 .Test(xnn_f32_hswish_ukernel__neon_x4);
62 }
63 }
64#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
65
66
67#if XNN_ARCH_ARM || XNN_ARCH_ARM64
68 TEST(F32_HSWISH__NEON_X8, batch_eq_8) {
69 TEST_REQUIRES_ARM_NEON;
70 HSwishMicrokernelTester()
71 .batch_size(8)
72 .Test(xnn_f32_hswish_ukernel__neon_x8);
73 }
74
75 TEST(F32_HSWISH__NEON_X8, batch_div_8) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78 HSwishMicrokernelTester()
79 .batch_size(batch_size)
80 .Test(xnn_f32_hswish_ukernel__neon_x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -070081 }
82 }
83
Marat Dukhan662faa02019-12-09 22:48:16 -080084 TEST(F32_HSWISH__NEON_X8, batch_lt_8) {
85 TEST_REQUIRES_ARM_NEON;
86 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87 HSwishMicrokernelTester()
88 .batch_size(batch_size)
89 .Test(xnn_f32_hswish_ukernel__neon_x8);
90 }
91 }
92
93 TEST(F32_HSWISH__NEON_X8, batch_gt_8) {
94 TEST_REQUIRES_ARM_NEON;
95 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96 HSwishMicrokernelTester()
97 .batch_size(batch_size)
98 .Test(xnn_f32_hswish_ukernel__neon_x8);
99 }
100 }
101
102 TEST(F32_HSWISH__NEON_X8, inplace) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 HSwishMicrokernelTester()
106 .batch_size(batch_size)
107 .inplace(true)
108 .Test(xnn_f32_hswish_ukernel__neon_x8);
109 }
110 }
111#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
112
113
Marat Dukhan55dde5b2020-07-10 22:48:54 -0700114#if XNN_ARCH_ARM || XNN_ARCH_ARM64
115 TEST(F32_HSWISH__NEON_X16, batch_eq_16) {
116 TEST_REQUIRES_ARM_NEON;
117 HSwishMicrokernelTester()
118 .batch_size(16)
119 .Test(xnn_f32_hswish_ukernel__neon_x16);
120 }
121
122 TEST(F32_HSWISH__NEON_X16, batch_div_16) {
123 TEST_REQUIRES_ARM_NEON;
124 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
125 HSwishMicrokernelTester()
126 .batch_size(batch_size)
127 .Test(xnn_f32_hswish_ukernel__neon_x16);
128 }
129 }
130
131 TEST(F32_HSWISH__NEON_X16, batch_lt_16) {
132 TEST_REQUIRES_ARM_NEON;
133 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
134 HSwishMicrokernelTester()
135 .batch_size(batch_size)
136 .Test(xnn_f32_hswish_ukernel__neon_x16);
137 }
138 }
139
140 TEST(F32_HSWISH__NEON_X16, batch_gt_16) {
141 TEST_REQUIRES_ARM_NEON;
142 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
143 HSwishMicrokernelTester()
144 .batch_size(batch_size)
145 .Test(xnn_f32_hswish_ukernel__neon_x16);
146 }
147 }
148
149 TEST(F32_HSWISH__NEON_X16, inplace) {
150 TEST_REQUIRES_ARM_NEON;
151 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
152 HSwishMicrokernelTester()
153 .batch_size(batch_size)
154 .inplace(true)
155 .Test(xnn_f32_hswish_ukernel__neon_x16);
156 }
157 }
158#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
159
160
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700161#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan662faa02019-12-09 22:48:16 -0800162 TEST(F32_HSWISH__SSE_X4, batch_eq_4) {
163 TEST_REQUIRES_X86_SSE;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700164 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800165 .batch_size(4)
166 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700167 }
168
Marat Dukhan662faa02019-12-09 22:48:16 -0800169 TEST(F32_HSWISH__SSE_X4, batch_div_4) {
170 TEST_REQUIRES_X86_SSE;
171 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700172 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800173 .batch_size(batch_size)
174 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700175 }
176 }
177
Marat Dukhan662faa02019-12-09 22:48:16 -0800178 TEST(F32_HSWISH__SSE_X4, batch_lt_4) {
179 TEST_REQUIRES_X86_SSE;
180 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700181 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800182 .batch_size(batch_size)
183 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700184 }
185 }
186
Marat Dukhan662faa02019-12-09 22:48:16 -0800187 TEST(F32_HSWISH__SSE_X4, batch_gt_4) {
188 TEST_REQUIRES_X86_SSE;
189 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700190 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800191 .batch_size(batch_size)
192 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700193 }
194 }
195
Marat Dukhan662faa02019-12-09 22:48:16 -0800196 TEST(F32_HSWISH__SSE_X4, inplace) {
197 TEST_REQUIRES_X86_SSE;
198 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700199 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800200 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700201 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800202 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700203 }
204 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700205#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700206
207
Marat Dukhan662faa02019-12-09 22:48:16 -0800208#if XNN_ARCH_X86 || XNN_ARCH_X86_64
209 TEST(F32_HSWISH__SSE_X8, batch_eq_8) {
210 TEST_REQUIRES_X86_SSE;
211 HSwishMicrokernelTester()
212 .batch_size(8)
213 .Test(xnn_f32_hswish_ukernel__sse_x8);
214 }
215
216 TEST(F32_HSWISH__SSE_X8, batch_div_8) {
217 TEST_REQUIRES_X86_SSE;
218 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
219 HSwishMicrokernelTester()
220 .batch_size(batch_size)
221 .Test(xnn_f32_hswish_ukernel__sse_x8);
222 }
223 }
224
225 TEST(F32_HSWISH__SSE_X8, batch_lt_8) {
226 TEST_REQUIRES_X86_SSE;
227 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
228 HSwishMicrokernelTester()
229 .batch_size(batch_size)
230 .Test(xnn_f32_hswish_ukernel__sse_x8);
231 }
232 }
233
234 TEST(F32_HSWISH__SSE_X8, batch_gt_8) {
235 TEST_REQUIRES_X86_SSE;
236 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
237 HSwishMicrokernelTester()
238 .batch_size(batch_size)
239 .Test(xnn_f32_hswish_ukernel__sse_x8);
240 }
241 }
242
243 TEST(F32_HSWISH__SSE_X8, inplace) {
244 TEST_REQUIRES_X86_SSE;
245 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
246 HSwishMicrokernelTester()
247 .batch_size(batch_size)
248 .inplace(true)
249 .Test(xnn_f32_hswish_ukernel__sse_x8);
250 }
251 }
252#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
253
254
255#if XNN_ARCH_X86 || XNN_ARCH_X86_64
256 TEST(F32_HSWISH__AVX_X8, batch_eq_8) {
257 TEST_REQUIRES_X86_AVX;
258 HSwishMicrokernelTester()
259 .batch_size(8)
260 .Test(xnn_f32_hswish_ukernel__avx_x8);
261 }
262
263 TEST(F32_HSWISH__AVX_X8, batch_div_8) {
264 TEST_REQUIRES_X86_AVX;
265 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
266 HSwishMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_f32_hswish_ukernel__avx_x8);
269 }
270 }
271
272 TEST(F32_HSWISH__AVX_X8, batch_lt_8) {
273 TEST_REQUIRES_X86_AVX;
274 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
275 HSwishMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_f32_hswish_ukernel__avx_x8);
278 }
279 }
280
281 TEST(F32_HSWISH__AVX_X8, batch_gt_8) {
282 TEST_REQUIRES_X86_AVX;
283 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
284 HSwishMicrokernelTester()
285 .batch_size(batch_size)
286 .Test(xnn_f32_hswish_ukernel__avx_x8);
287 }
288 }
289
290 TEST(F32_HSWISH__AVX_X8, inplace) {
291 TEST_REQUIRES_X86_AVX;
292 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
293 HSwishMicrokernelTester()
294 .batch_size(batch_size)
295 .inplace(true)
296 .Test(xnn_f32_hswish_ukernel__avx_x8);
297 }
298 }
299#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
300
301
302#if XNN_ARCH_X86 || XNN_ARCH_X86_64
303 TEST(F32_HSWISH__AVX_X16, batch_eq_16) {
304 TEST_REQUIRES_X86_AVX;
305 HSwishMicrokernelTester()
306 .batch_size(16)
307 .Test(xnn_f32_hswish_ukernel__avx_x16);
308 }
309
310 TEST(F32_HSWISH__AVX_X16, batch_div_16) {
311 TEST_REQUIRES_X86_AVX;
312 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
313 HSwishMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_f32_hswish_ukernel__avx_x16);
316 }
317 }
318
319 TEST(F32_HSWISH__AVX_X16, batch_lt_16) {
320 TEST_REQUIRES_X86_AVX;
321 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
322 HSwishMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_f32_hswish_ukernel__avx_x16);
325 }
326 }
327
328 TEST(F32_HSWISH__AVX_X16, batch_gt_16) {
329 TEST_REQUIRES_X86_AVX;
330 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
331 HSwishMicrokernelTester()
332 .batch_size(batch_size)
333 .Test(xnn_f32_hswish_ukernel__avx_x16);
334 }
335 }
336
337 TEST(F32_HSWISH__AVX_X16, inplace) {
338 TEST_REQUIRES_X86_AVX;
339 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
340 HSwishMicrokernelTester()
341 .batch_size(batch_size)
342 .inplace(true)
343 .Test(xnn_f32_hswish_ukernel__avx_x16);
344 }
345 }
346#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
347
348
349#if XNN_ARCH_X86 || XNN_ARCH_X86_64
350 TEST(F32_HSWISH__FMA3_X8, batch_eq_8) {
351 TEST_REQUIRES_X86_FMA3;
352 HSwishMicrokernelTester()
353 .batch_size(8)
354 .Test(xnn_f32_hswish_ukernel__fma3_x8);
355 }
356
357 TEST(F32_HSWISH__FMA3_X8, batch_div_8) {
358 TEST_REQUIRES_X86_FMA3;
359 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
360 HSwishMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_f32_hswish_ukernel__fma3_x8);
363 }
364 }
365
366 TEST(F32_HSWISH__FMA3_X8, batch_lt_8) {
367 TEST_REQUIRES_X86_FMA3;
368 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
369 HSwishMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_f32_hswish_ukernel__fma3_x8);
372 }
373 }
374
375 TEST(F32_HSWISH__FMA3_X8, batch_gt_8) {
376 TEST_REQUIRES_X86_FMA3;
377 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
378 HSwishMicrokernelTester()
379 .batch_size(batch_size)
380 .Test(xnn_f32_hswish_ukernel__fma3_x8);
381 }
382 }
383
384 TEST(F32_HSWISH__FMA3_X8, inplace) {
385 TEST_REQUIRES_X86_FMA3;
386 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
387 HSwishMicrokernelTester()
388 .batch_size(batch_size)
389 .inplace(true)
390 .Test(xnn_f32_hswish_ukernel__fma3_x8);
391 }
392 }
393#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
394
395
396#if XNN_ARCH_X86 || XNN_ARCH_X86_64
397 TEST(F32_HSWISH__FMA3_X16, batch_eq_16) {
398 TEST_REQUIRES_X86_FMA3;
399 HSwishMicrokernelTester()
400 .batch_size(16)
401 .Test(xnn_f32_hswish_ukernel__fma3_x16);
402 }
403
404 TEST(F32_HSWISH__FMA3_X16, batch_div_16) {
405 TEST_REQUIRES_X86_FMA3;
406 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
407 HSwishMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_f32_hswish_ukernel__fma3_x16);
410 }
411 }
412
413 TEST(F32_HSWISH__FMA3_X16, batch_lt_16) {
414 TEST_REQUIRES_X86_FMA3;
415 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
416 HSwishMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_f32_hswish_ukernel__fma3_x16);
419 }
420 }
421
422 TEST(F32_HSWISH__FMA3_X16, batch_gt_16) {
423 TEST_REQUIRES_X86_FMA3;
424 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
425 HSwishMicrokernelTester()
426 .batch_size(batch_size)
427 .Test(xnn_f32_hswish_ukernel__fma3_x16);
428 }
429 }
430
431 TEST(F32_HSWISH__FMA3_X16, inplace) {
432 TEST_REQUIRES_X86_FMA3;
433 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
434 HSwishMicrokernelTester()
435 .batch_size(batch_size)
436 .inplace(true)
437 .Test(xnn_f32_hswish_ukernel__fma3_x16);
438 }
439 }
440#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
441
442
443#if XNN_ARCH_X86 || XNN_ARCH_X86_64
444 TEST(F32_HSWISH__AVX512F_X16, batch_eq_16) {
445 TEST_REQUIRES_X86_AVX512F;
446 HSwishMicrokernelTester()
447 .batch_size(16)
448 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
449 }
450
451 TEST(F32_HSWISH__AVX512F_X16, batch_div_16) {
452 TEST_REQUIRES_X86_AVX512F;
453 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
454 HSwishMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
457 }
458 }
459
460 TEST(F32_HSWISH__AVX512F_X16, batch_lt_16) {
461 TEST_REQUIRES_X86_AVX512F;
462 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
463 HSwishMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
466 }
467 }
468
469 TEST(F32_HSWISH__AVX512F_X16, batch_gt_16) {
470 TEST_REQUIRES_X86_AVX512F;
471 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
472 HSwishMicrokernelTester()
473 .batch_size(batch_size)
474 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
475 }
476 }
477
478 TEST(F32_HSWISH__AVX512F_X16, inplace) {
479 TEST_REQUIRES_X86_AVX512F;
480 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
481 HSwishMicrokernelTester()
482 .batch_size(batch_size)
483 .inplace(true)
484 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
485 }
486 }
487#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
488
489
490#if XNN_ARCH_X86 || XNN_ARCH_X86_64
491 TEST(F32_HSWISH__AVX512F_X32, batch_eq_32) {
492 TEST_REQUIRES_X86_AVX512F;
493 HSwishMicrokernelTester()
494 .batch_size(32)
495 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
496 }
497
498 TEST(F32_HSWISH__AVX512F_X32, batch_div_32) {
499 TEST_REQUIRES_X86_AVX512F;
500 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
501 HSwishMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
504 }
505 }
506
507 TEST(F32_HSWISH__AVX512F_X32, batch_lt_32) {
508 TEST_REQUIRES_X86_AVX512F;
509 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
510 HSwishMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
513 }
514 }
515
516 TEST(F32_HSWISH__AVX512F_X32, batch_gt_32) {
517 TEST_REQUIRES_X86_AVX512F;
518 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
519 HSwishMicrokernelTester()
520 .batch_size(batch_size)
521 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
522 }
523 }
524
525 TEST(F32_HSWISH__AVX512F_X32, inplace) {
526 TEST_REQUIRES_X86_AVX512F;
527 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
528 HSwishMicrokernelTester()
529 .batch_size(batch_size)
530 .inplace(true)
531 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
532 }
533 }
534#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
535
536
Marat Dukhan9baec802020-06-25 21:34:35 -0700537#if XNN_ARCH_WASMSIMD
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700538 TEST(F32_HSWISH__WASMSIMD_X4, batch_eq_4) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700539 HSwishMicrokernelTester()
540 .batch_size(4)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700541 .Test(xnn_f32_hswish_ukernel__wasmsimd_x4);
Marat Dukhan9baec802020-06-25 21:34:35 -0700542 }
543
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700544 TEST(F32_HSWISH__WASMSIMD_X4, batch_div_4) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700545 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
546 HSwishMicrokernelTester()
547 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700548 .Test(xnn_f32_hswish_ukernel__wasmsimd_x4);
Marat Dukhan9baec802020-06-25 21:34:35 -0700549 }
550 }
551
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700552 TEST(F32_HSWISH__WASMSIMD_X4, batch_lt_4) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700553 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
554 HSwishMicrokernelTester()
555 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700556 .Test(xnn_f32_hswish_ukernel__wasmsimd_x4);
Marat Dukhan9baec802020-06-25 21:34:35 -0700557 }
558 }
559
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700560 TEST(F32_HSWISH__WASMSIMD_X4, batch_gt_4) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700561 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
562 HSwishMicrokernelTester()
563 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700564 .Test(xnn_f32_hswish_ukernel__wasmsimd_x4);
Marat Dukhan9baec802020-06-25 21:34:35 -0700565 }
566 }
567
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700568 TEST(F32_HSWISH__WASMSIMD_X4, inplace) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700569 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
570 HSwishMicrokernelTester()
571 .batch_size(batch_size)
572 .inplace(true)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700573 .Test(xnn_f32_hswish_ukernel__wasmsimd_x4);
Marat Dukhan9baec802020-06-25 21:34:35 -0700574 }
575 }
576#endif // XNN_ARCH_WASMSIMD
577
578
579#if XNN_ARCH_WASMSIMD
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700580 TEST(F32_HSWISH__WASMSIMD_X8, batch_eq_8) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700581 HSwishMicrokernelTester()
582 .batch_size(8)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700583 .Test(xnn_f32_hswish_ukernel__wasmsimd_x8);
Marat Dukhan9baec802020-06-25 21:34:35 -0700584 }
585
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700586 TEST(F32_HSWISH__WASMSIMD_X8, batch_div_8) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700587 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
588 HSwishMicrokernelTester()
589 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700590 .Test(xnn_f32_hswish_ukernel__wasmsimd_x8);
Marat Dukhan9baec802020-06-25 21:34:35 -0700591 }
592 }
593
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700594 TEST(F32_HSWISH__WASMSIMD_X8, batch_lt_8) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700595 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
596 HSwishMicrokernelTester()
597 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700598 .Test(xnn_f32_hswish_ukernel__wasmsimd_x8);
Marat Dukhan9baec802020-06-25 21:34:35 -0700599 }
600 }
601
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700602 TEST(F32_HSWISH__WASMSIMD_X8, batch_gt_8) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700603 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
604 HSwishMicrokernelTester()
605 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700606 .Test(xnn_f32_hswish_ukernel__wasmsimd_x8);
Marat Dukhan9baec802020-06-25 21:34:35 -0700607 }
608 }
609
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700610 TEST(F32_HSWISH__WASMSIMD_X8, inplace) {
Marat Dukhan9baec802020-06-25 21:34:35 -0700611 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
612 HSwishMicrokernelTester()
613 .batch_size(batch_size)
614 .inplace(true)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700615 .Test(xnn_f32_hswish_ukernel__wasmsimd_x8);
Marat Dukhan9baec802020-06-25 21:34:35 -0700616 }
617 }
618#endif // XNN_ARCH_WASMSIMD
619
620
621#if XNN_ARCH_WASMSIMD
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700622 TEST(F32_HSWISH__WASMSIMD_X16, batch_eq_16) {
Marat Dukhanc303fe62020-06-26 10:09:25 -0700623 HSwishMicrokernelTester()
624 .batch_size(16)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700625 .Test(xnn_f32_hswish_ukernel__wasmsimd_x16);
Marat Dukhanc303fe62020-06-26 10:09:25 -0700626 }
627
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700628 TEST(F32_HSWISH__WASMSIMD_X16, batch_div_16) {
Marat Dukhanc303fe62020-06-26 10:09:25 -0700629 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
630 HSwishMicrokernelTester()
631 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700632 .Test(xnn_f32_hswish_ukernel__wasmsimd_x16);
Marat Dukhanc303fe62020-06-26 10:09:25 -0700633 }
634 }
635
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700636 TEST(F32_HSWISH__WASMSIMD_X16, batch_lt_16) {
Marat Dukhanc303fe62020-06-26 10:09:25 -0700637 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
638 HSwishMicrokernelTester()
639 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700640 .Test(xnn_f32_hswish_ukernel__wasmsimd_x16);
Marat Dukhanc303fe62020-06-26 10:09:25 -0700641 }
642 }
643
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700644 TEST(F32_HSWISH__WASMSIMD_X16, batch_gt_16) {
Marat Dukhanc303fe62020-06-26 10:09:25 -0700645 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
646 HSwishMicrokernelTester()
647 .batch_size(batch_size)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700648 .Test(xnn_f32_hswish_ukernel__wasmsimd_x16);
Marat Dukhanc303fe62020-06-26 10:09:25 -0700649 }
650 }
651
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700652 TEST(F32_HSWISH__WASMSIMD_X16, inplace) {
Marat Dukhanc303fe62020-06-26 10:09:25 -0700653 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
654 HSwishMicrokernelTester()
655 .batch_size(batch_size)
656 .inplace(true)
Marat Dukhan9df9dc62020-07-10 20:08:49 -0700657 .Test(xnn_f32_hswish_ukernel__wasmsimd_x16);
Marat Dukhanc303fe62020-06-26 10:09:25 -0700658 }
659 }
660#endif // XNN_ARCH_WASMSIMD
661
662
Marat Dukhanfb5b20a2020-06-26 13:14:50 -0700663#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan662faa02019-12-09 22:48:16 -0800664 TEST(F32_HSWISH__WASM_X1, batch_eq_1) {
Marat Dukhan436ebe62019-12-04 15:10:12 -0800665 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800666 .batch_size(1)
Marat Dukhan47387d62020-06-29 12:53:20 -0700667 .Test(xnn_f32_hswish_ukernel__wasm_x1);
Marat Dukhan436ebe62019-12-04 15:10:12 -0800668 }
669
Marat Dukhan662faa02019-12-09 22:48:16 -0800670 TEST(F32_HSWISH__WASM_X1, batch_gt_1) {
671 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
Marat Dukhan436ebe62019-12-04 15:10:12 -0800672 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800673 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700674 .Test(xnn_f32_hswish_ukernel__wasm_x1);
Marat Dukhan436ebe62019-12-04 15:10:12 -0800675 }
676 }
677
Marat Dukhan662faa02019-12-09 22:48:16 -0800678 TEST(F32_HSWISH__WASM_X1, inplace) {
679 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
Marat Dukhan436ebe62019-12-04 15:10:12 -0800680 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800681 .batch_size(batch_size)
Marat Dukhan436ebe62019-12-04 15:10:12 -0800682 .inplace(true)
Marat Dukhan47387d62020-06-29 12:53:20 -0700683 .Test(xnn_f32_hswish_ukernel__wasm_x1);
Marat Dukhan436ebe62019-12-04 15:10:12 -0800684 }
685 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -0700686#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan436ebe62019-12-04 15:10:12 -0800687
688
Marat Dukhanfb5b20a2020-06-26 13:14:50 -0700689#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan662faa02019-12-09 22:48:16 -0800690 TEST(F32_HSWISH__WASM_X2, batch_eq_2) {
691 HSwishMicrokernelTester()
692 .batch_size(2)
Marat Dukhan47387d62020-06-29 12:53:20 -0700693 .Test(xnn_f32_hswish_ukernel__wasm_x2);
Marat Dukhan662faa02019-12-09 22:48:16 -0800694 }
695
696 TEST(F32_HSWISH__WASM_X2, batch_div_2) {
697 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
698 HSwishMicrokernelTester()
699 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700700 .Test(xnn_f32_hswish_ukernel__wasm_x2);
Marat Dukhan662faa02019-12-09 22:48:16 -0800701 }
702 }
703
704 TEST(F32_HSWISH__WASM_X2, batch_lt_2) {
705 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
706 HSwishMicrokernelTester()
707 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700708 .Test(xnn_f32_hswish_ukernel__wasm_x2);
Marat Dukhan662faa02019-12-09 22:48:16 -0800709 }
710 }
711
712 TEST(F32_HSWISH__WASM_X2, batch_gt_2) {
713 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
714 HSwishMicrokernelTester()
715 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700716 .Test(xnn_f32_hswish_ukernel__wasm_x2);
Marat Dukhan662faa02019-12-09 22:48:16 -0800717 }
718 }
719
720 TEST(F32_HSWISH__WASM_X2, inplace) {
721 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
722 HSwishMicrokernelTester()
723 .batch_size(batch_size)
724 .inplace(true)
Marat Dukhan47387d62020-06-29 12:53:20 -0700725 .Test(xnn_f32_hswish_ukernel__wasm_x2);
Marat Dukhan662faa02019-12-09 22:48:16 -0800726 }
727 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -0700728#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan662faa02019-12-09 22:48:16 -0800729
730
Marat Dukhanfb5b20a2020-06-26 13:14:50 -0700731#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan662faa02019-12-09 22:48:16 -0800732 TEST(F32_HSWISH__WASM_X4, batch_eq_4) {
733 HSwishMicrokernelTester()
734 .batch_size(4)
Marat Dukhan47387d62020-06-29 12:53:20 -0700735 .Test(xnn_f32_hswish_ukernel__wasm_x4);
Marat Dukhan662faa02019-12-09 22:48:16 -0800736 }
737
738 TEST(F32_HSWISH__WASM_X4, batch_div_4) {
739 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
740 HSwishMicrokernelTester()
741 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700742 .Test(xnn_f32_hswish_ukernel__wasm_x4);
Marat Dukhan662faa02019-12-09 22:48:16 -0800743 }
744 }
745
746 TEST(F32_HSWISH__WASM_X4, batch_lt_4) {
747 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
748 HSwishMicrokernelTester()
749 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700750 .Test(xnn_f32_hswish_ukernel__wasm_x4);
Marat Dukhan662faa02019-12-09 22:48:16 -0800751 }
752 }
753
754 TEST(F32_HSWISH__WASM_X4, batch_gt_4) {
755 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
756 HSwishMicrokernelTester()
757 .batch_size(batch_size)
Marat Dukhan47387d62020-06-29 12:53:20 -0700758 .Test(xnn_f32_hswish_ukernel__wasm_x4);
Marat Dukhan662faa02019-12-09 22:48:16 -0800759 }
760 }
761
762 TEST(F32_HSWISH__WASM_X4, inplace) {
763 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
764 HSwishMicrokernelTester()
765 .batch_size(batch_size)
766 .inplace(true)
Marat Dukhan47387d62020-06-29 12:53:20 -0700767 .Test(xnn_f32_hswish_ukernel__wasm_x4);
Marat Dukhan662faa02019-12-09 22:48:16 -0800768 }
769 }
Marat Dukhanfb5b20a2020-06-26 13:14:50 -0700770#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
Marat Dukhan662faa02019-12-09 22:48:16 -0800771
772
773TEST(F32_HSWISH__SCALAR_X1, batch_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700774 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800775 .batch_size(1)
776 .Test(xnn_f32_hswish_ukernel__scalar_x1, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700777}
778
Marat Dukhan662faa02019-12-09 22:48:16 -0800779TEST(F32_HSWISH__SCALAR_X1, batch_gt_1) {
780 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700781 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800782 .batch_size(batch_size)
783 .Test(xnn_f32_hswish_ukernel__scalar_x1, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700784 }
785}
786
Marat Dukhan662faa02019-12-09 22:48:16 -0800787TEST(F32_HSWISH__SCALAR_X1, inplace) {
788 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700789 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800790 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700791 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800792 .Test(xnn_f32_hswish_ukernel__scalar_x1, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700793 }
794}
Marat Dukhan662faa02019-12-09 22:48:16 -0800795
796TEST(F32_HSWISH__SCALAR_X2, batch_eq_2) {
797 HSwishMicrokernelTester()
798 .batch_size(2)
799 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
800}
801
802TEST(F32_HSWISH__SCALAR_X2, batch_div_2) {
803 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
804 HSwishMicrokernelTester()
805 .batch_size(batch_size)
806 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
807 }
808}
809
810TEST(F32_HSWISH__SCALAR_X2, batch_lt_2) {
811 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
812 HSwishMicrokernelTester()
813 .batch_size(batch_size)
814 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
815 }
816}
817
818TEST(F32_HSWISH__SCALAR_X2, batch_gt_2) {
819 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
820 HSwishMicrokernelTester()
821 .batch_size(batch_size)
822 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
823 }
824}
825
826TEST(F32_HSWISH__SCALAR_X2, inplace) {
827 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
828 HSwishMicrokernelTester()
829 .batch_size(batch_size)
830 .inplace(true)
831 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
832 }
833}
834
835TEST(F32_HSWISH__SCALAR_X4, batch_eq_4) {
836 HSwishMicrokernelTester()
837 .batch_size(4)
838 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
839}
840
841TEST(F32_HSWISH__SCALAR_X4, batch_div_4) {
842 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
843 HSwishMicrokernelTester()
844 .batch_size(batch_size)
845 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
846 }
847}
848
849TEST(F32_HSWISH__SCALAR_X4, batch_lt_4) {
850 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
851 HSwishMicrokernelTester()
852 .batch_size(batch_size)
853 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
854 }
855}
856
857TEST(F32_HSWISH__SCALAR_X4, batch_gt_4) {
858 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
859 HSwishMicrokernelTester()
860 .batch_size(batch_size)
861 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
862 }
863}
864
865TEST(F32_HSWISH__SCALAR_X4, inplace) {
866 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
867 HSwishMicrokernelTester()
868 .batch_size(batch_size)
869 .inplace(true)
870 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
871 }
872}