blob: 70a6aa5fde63f73b4e818a40c1dc1b1d54fb4474 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhan662faa02019-12-09 22:48:16 -08005//
6// Auto-generated file. Do not edit!
7// Specification: test/f32-hswish.yaml
8// Generator: tools/generate-hswish-test.py
9
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <gtest/gtest.h>
12
Marat Dukhan1dadbf72019-10-01 10:46:20 -070013#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070015
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/hswish.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "hswish-microkernel-tester.h"
18
19
Marat Dukhan1dadbf72019-10-01 10:46:20 -070020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan662faa02019-12-09 22:48:16 -080021 TEST(F32_HSWISH__NEON_X4, batch_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070022 TEST_REQUIRES_ARM_NEON;
23 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080024 .batch_size(4)
25 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 }
27
Marat Dukhan662faa02019-12-09 22:48:16 -080028 TEST(F32_HSWISH__NEON_X4, batch_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070029 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080030 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070031 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080032 .batch_size(batch_size)
33 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 }
35 }
36
Marat Dukhan662faa02019-12-09 22:48:16 -080037 TEST(F32_HSWISH__NEON_X4, batch_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070038 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080039 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080041 .batch_size(batch_size)
42 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070043 }
44 }
45
Marat Dukhan662faa02019-12-09 22:48:16 -080046 TEST(F32_HSWISH__NEON_X4, batch_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070047 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080048 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070049 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080050 .batch_size(batch_size)
51 .Test(xnn_f32_hswish_ukernel__neon_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -070052 }
53 }
54
Marat Dukhan662faa02019-12-09 22:48:16 -080055 TEST(F32_HSWISH__NEON_X4, inplace) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 TEST_REQUIRES_ARM_NEON;
Marat Dukhan662faa02019-12-09 22:48:16 -080057 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -080059 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -080061 .Test(xnn_f32_hswish_ukernel__neon_x4);
62 }
63 }
64#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
65
66
67#if XNN_ARCH_ARM || XNN_ARCH_ARM64
68 TEST(F32_HSWISH__NEON_X8, batch_eq_8) {
69 TEST_REQUIRES_ARM_NEON;
70 HSwishMicrokernelTester()
71 .batch_size(8)
72 .Test(xnn_f32_hswish_ukernel__neon_x8);
73 }
74
75 TEST(F32_HSWISH__NEON_X8, batch_div_8) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
78 HSwishMicrokernelTester()
79 .batch_size(batch_size)
80 .Test(xnn_f32_hswish_ukernel__neon_x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -070081 }
82 }
83
Marat Dukhan662faa02019-12-09 22:48:16 -080084 TEST(F32_HSWISH__NEON_X8, batch_lt_8) {
85 TEST_REQUIRES_ARM_NEON;
86 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
87 HSwishMicrokernelTester()
88 .batch_size(batch_size)
89 .Test(xnn_f32_hswish_ukernel__neon_x8);
90 }
91 }
92
93 TEST(F32_HSWISH__NEON_X8, batch_gt_8) {
94 TEST_REQUIRES_ARM_NEON;
95 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
96 HSwishMicrokernelTester()
97 .batch_size(batch_size)
98 .Test(xnn_f32_hswish_ukernel__neon_x8);
99 }
100 }
101
102 TEST(F32_HSWISH__NEON_X8, inplace) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 HSwishMicrokernelTester()
106 .batch_size(batch_size)
107 .inplace(true)
108 .Test(xnn_f32_hswish_ukernel__neon_x8);
109 }
110 }
111#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
112
113
114#if XNN_ARCH_ARM || XNN_ARCH_ARM64
115 TEST(F32_HSWISH__NEONFMA_X4, batch_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700116 TEST_REQUIRES_ARM_NEON_FMA;
117 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800118 .batch_size(4)
119 .Test(xnn_f32_hswish_ukernel__neonfma_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120 }
121
Marat Dukhan662faa02019-12-09 22:48:16 -0800122 TEST(F32_HSWISH__NEONFMA_X4, batch_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700123 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhan662faa02019-12-09 22:48:16 -0800124 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800126 .batch_size(batch_size)
127 .Test(xnn_f32_hswish_ukernel__neonfma_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700128 }
129 }
130
Marat Dukhan662faa02019-12-09 22:48:16 -0800131 TEST(F32_HSWISH__NEONFMA_X4, batch_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700132 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhan662faa02019-12-09 22:48:16 -0800133 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700134 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800135 .batch_size(batch_size)
136 .Test(xnn_f32_hswish_ukernel__neonfma_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700137 }
138 }
139
Marat Dukhan662faa02019-12-09 22:48:16 -0800140 TEST(F32_HSWISH__NEONFMA_X4, batch_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700141 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhan662faa02019-12-09 22:48:16 -0800142 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700143 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800144 .batch_size(batch_size)
145 .Test(xnn_f32_hswish_ukernel__neonfma_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700146 }
147 }
148
Marat Dukhan662faa02019-12-09 22:48:16 -0800149 TEST(F32_HSWISH__NEONFMA_X4, inplace) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700150 TEST_REQUIRES_ARM_NEON_FMA;
Marat Dukhan662faa02019-12-09 22:48:16 -0800151 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700152 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800153 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700154 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800155 .Test(xnn_f32_hswish_ukernel__neonfma_x4);
156 }
157 }
158#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
159
160
161#if XNN_ARCH_ARM || XNN_ARCH_ARM64
162 TEST(F32_HSWISH__NEONFMA_X8, batch_eq_8) {
163 TEST_REQUIRES_ARM_NEON_FMA;
164 HSwishMicrokernelTester()
165 .batch_size(8)
166 .Test(xnn_f32_hswish_ukernel__neonfma_x8);
167 }
168
169 TEST(F32_HSWISH__NEONFMA_X8, batch_div_8) {
170 TEST_REQUIRES_ARM_NEON_FMA;
171 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
172 HSwishMicrokernelTester()
173 .batch_size(batch_size)
174 .Test(xnn_f32_hswish_ukernel__neonfma_x8);
175 }
176 }
177
178 TEST(F32_HSWISH__NEONFMA_X8, batch_lt_8) {
179 TEST_REQUIRES_ARM_NEON_FMA;
180 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
181 HSwishMicrokernelTester()
182 .batch_size(batch_size)
183 .Test(xnn_f32_hswish_ukernel__neonfma_x8);
184 }
185 }
186
187 TEST(F32_HSWISH__NEONFMA_X8, batch_gt_8) {
188 TEST_REQUIRES_ARM_NEON_FMA;
189 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
190 HSwishMicrokernelTester()
191 .batch_size(batch_size)
192 .Test(xnn_f32_hswish_ukernel__neonfma_x8);
193 }
194 }
195
196 TEST(F32_HSWISH__NEONFMA_X8, inplace) {
197 TEST_REQUIRES_ARM_NEON_FMA;
198 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
199 HSwishMicrokernelTester()
200 .batch_size(batch_size)
201 .inplace(true)
202 .Test(xnn_f32_hswish_ukernel__neonfma_x8);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700203 }
204 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700205#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700206
207
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700208#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan662faa02019-12-09 22:48:16 -0800209 TEST(F32_HSWISH__SSE_X4, batch_eq_4) {
210 TEST_REQUIRES_X86_SSE;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700211 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800212 .batch_size(4)
213 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700214 }
215
Marat Dukhan662faa02019-12-09 22:48:16 -0800216 TEST(F32_HSWISH__SSE_X4, batch_div_4) {
217 TEST_REQUIRES_X86_SSE;
218 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700219 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800220 .batch_size(batch_size)
221 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700222 }
223 }
224
Marat Dukhan662faa02019-12-09 22:48:16 -0800225 TEST(F32_HSWISH__SSE_X4, batch_lt_4) {
226 TEST_REQUIRES_X86_SSE;
227 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700228 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800229 .batch_size(batch_size)
230 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700231 }
232 }
233
Marat Dukhan662faa02019-12-09 22:48:16 -0800234 TEST(F32_HSWISH__SSE_X4, batch_gt_4) {
235 TEST_REQUIRES_X86_SSE;
236 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700237 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800238 .batch_size(batch_size)
239 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700240 }
241 }
242
Marat Dukhan662faa02019-12-09 22:48:16 -0800243 TEST(F32_HSWISH__SSE_X4, inplace) {
244 TEST_REQUIRES_X86_SSE;
245 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700246 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800247 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700248 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800249 .Test(xnn_f32_hswish_ukernel__sse_x4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250 }
251 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700252#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700253
254
Marat Dukhan662faa02019-12-09 22:48:16 -0800255#if XNN_ARCH_X86 || XNN_ARCH_X86_64
256 TEST(F32_HSWISH__SSE_X8, batch_eq_8) {
257 TEST_REQUIRES_X86_SSE;
258 HSwishMicrokernelTester()
259 .batch_size(8)
260 .Test(xnn_f32_hswish_ukernel__sse_x8);
261 }
262
263 TEST(F32_HSWISH__SSE_X8, batch_div_8) {
264 TEST_REQUIRES_X86_SSE;
265 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
266 HSwishMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_f32_hswish_ukernel__sse_x8);
269 }
270 }
271
272 TEST(F32_HSWISH__SSE_X8, batch_lt_8) {
273 TEST_REQUIRES_X86_SSE;
274 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
275 HSwishMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_f32_hswish_ukernel__sse_x8);
278 }
279 }
280
281 TEST(F32_HSWISH__SSE_X8, batch_gt_8) {
282 TEST_REQUIRES_X86_SSE;
283 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
284 HSwishMicrokernelTester()
285 .batch_size(batch_size)
286 .Test(xnn_f32_hswish_ukernel__sse_x8);
287 }
288 }
289
290 TEST(F32_HSWISH__SSE_X8, inplace) {
291 TEST_REQUIRES_X86_SSE;
292 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
293 HSwishMicrokernelTester()
294 .batch_size(batch_size)
295 .inplace(true)
296 .Test(xnn_f32_hswish_ukernel__sse_x8);
297 }
298 }
299#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
300
301
302#if XNN_ARCH_X86 || XNN_ARCH_X86_64
303 TEST(F32_HSWISH__AVX_X8, batch_eq_8) {
304 TEST_REQUIRES_X86_AVX;
305 HSwishMicrokernelTester()
306 .batch_size(8)
307 .Test(xnn_f32_hswish_ukernel__avx_x8);
308 }
309
310 TEST(F32_HSWISH__AVX_X8, batch_div_8) {
311 TEST_REQUIRES_X86_AVX;
312 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
313 HSwishMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_f32_hswish_ukernel__avx_x8);
316 }
317 }
318
319 TEST(F32_HSWISH__AVX_X8, batch_lt_8) {
320 TEST_REQUIRES_X86_AVX;
321 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
322 HSwishMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_f32_hswish_ukernel__avx_x8);
325 }
326 }
327
328 TEST(F32_HSWISH__AVX_X8, batch_gt_8) {
329 TEST_REQUIRES_X86_AVX;
330 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
331 HSwishMicrokernelTester()
332 .batch_size(batch_size)
333 .Test(xnn_f32_hswish_ukernel__avx_x8);
334 }
335 }
336
337 TEST(F32_HSWISH__AVX_X8, inplace) {
338 TEST_REQUIRES_X86_AVX;
339 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
340 HSwishMicrokernelTester()
341 .batch_size(batch_size)
342 .inplace(true)
343 .Test(xnn_f32_hswish_ukernel__avx_x8);
344 }
345 }
346#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
347
348
349#if XNN_ARCH_X86 || XNN_ARCH_X86_64
350 TEST(F32_HSWISH__AVX_X16, batch_eq_16) {
351 TEST_REQUIRES_X86_AVX;
352 HSwishMicrokernelTester()
353 .batch_size(16)
354 .Test(xnn_f32_hswish_ukernel__avx_x16);
355 }
356
357 TEST(F32_HSWISH__AVX_X16, batch_div_16) {
358 TEST_REQUIRES_X86_AVX;
359 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
360 HSwishMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_f32_hswish_ukernel__avx_x16);
363 }
364 }
365
366 TEST(F32_HSWISH__AVX_X16, batch_lt_16) {
367 TEST_REQUIRES_X86_AVX;
368 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
369 HSwishMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_f32_hswish_ukernel__avx_x16);
372 }
373 }
374
375 TEST(F32_HSWISH__AVX_X16, batch_gt_16) {
376 TEST_REQUIRES_X86_AVX;
377 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
378 HSwishMicrokernelTester()
379 .batch_size(batch_size)
380 .Test(xnn_f32_hswish_ukernel__avx_x16);
381 }
382 }
383
384 TEST(F32_HSWISH__AVX_X16, inplace) {
385 TEST_REQUIRES_X86_AVX;
386 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
387 HSwishMicrokernelTester()
388 .batch_size(batch_size)
389 .inplace(true)
390 .Test(xnn_f32_hswish_ukernel__avx_x16);
391 }
392 }
393#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
394
395
396#if XNN_ARCH_X86 || XNN_ARCH_X86_64
397 TEST(F32_HSWISH__FMA3_X8, batch_eq_8) {
398 TEST_REQUIRES_X86_FMA3;
399 HSwishMicrokernelTester()
400 .batch_size(8)
401 .Test(xnn_f32_hswish_ukernel__fma3_x8);
402 }
403
404 TEST(F32_HSWISH__FMA3_X8, batch_div_8) {
405 TEST_REQUIRES_X86_FMA3;
406 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
407 HSwishMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_f32_hswish_ukernel__fma3_x8);
410 }
411 }
412
413 TEST(F32_HSWISH__FMA3_X8, batch_lt_8) {
414 TEST_REQUIRES_X86_FMA3;
415 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
416 HSwishMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_f32_hswish_ukernel__fma3_x8);
419 }
420 }
421
422 TEST(F32_HSWISH__FMA3_X8, batch_gt_8) {
423 TEST_REQUIRES_X86_FMA3;
424 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
425 HSwishMicrokernelTester()
426 .batch_size(batch_size)
427 .Test(xnn_f32_hswish_ukernel__fma3_x8);
428 }
429 }
430
431 TEST(F32_HSWISH__FMA3_X8, inplace) {
432 TEST_REQUIRES_X86_FMA3;
433 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
434 HSwishMicrokernelTester()
435 .batch_size(batch_size)
436 .inplace(true)
437 .Test(xnn_f32_hswish_ukernel__fma3_x8);
438 }
439 }
440#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
441
442
443#if XNN_ARCH_X86 || XNN_ARCH_X86_64
444 TEST(F32_HSWISH__FMA3_X16, batch_eq_16) {
445 TEST_REQUIRES_X86_FMA3;
446 HSwishMicrokernelTester()
447 .batch_size(16)
448 .Test(xnn_f32_hswish_ukernel__fma3_x16);
449 }
450
451 TEST(F32_HSWISH__FMA3_X16, batch_div_16) {
452 TEST_REQUIRES_X86_FMA3;
453 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
454 HSwishMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_f32_hswish_ukernel__fma3_x16);
457 }
458 }
459
460 TEST(F32_HSWISH__FMA3_X16, batch_lt_16) {
461 TEST_REQUIRES_X86_FMA3;
462 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
463 HSwishMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_f32_hswish_ukernel__fma3_x16);
466 }
467 }
468
469 TEST(F32_HSWISH__FMA3_X16, batch_gt_16) {
470 TEST_REQUIRES_X86_FMA3;
471 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
472 HSwishMicrokernelTester()
473 .batch_size(batch_size)
474 .Test(xnn_f32_hswish_ukernel__fma3_x16);
475 }
476 }
477
478 TEST(F32_HSWISH__FMA3_X16, inplace) {
479 TEST_REQUIRES_X86_FMA3;
480 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
481 HSwishMicrokernelTester()
482 .batch_size(batch_size)
483 .inplace(true)
484 .Test(xnn_f32_hswish_ukernel__fma3_x16);
485 }
486 }
487#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
488
489
490#if XNN_ARCH_X86 || XNN_ARCH_X86_64
491 TEST(F32_HSWISH__AVX512F_X16, batch_eq_16) {
492 TEST_REQUIRES_X86_AVX512F;
493 HSwishMicrokernelTester()
494 .batch_size(16)
495 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
496 }
497
498 TEST(F32_HSWISH__AVX512F_X16, batch_div_16) {
499 TEST_REQUIRES_X86_AVX512F;
500 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
501 HSwishMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
504 }
505 }
506
507 TEST(F32_HSWISH__AVX512F_X16, batch_lt_16) {
508 TEST_REQUIRES_X86_AVX512F;
509 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
510 HSwishMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
513 }
514 }
515
516 TEST(F32_HSWISH__AVX512F_X16, batch_gt_16) {
517 TEST_REQUIRES_X86_AVX512F;
518 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
519 HSwishMicrokernelTester()
520 .batch_size(batch_size)
521 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
522 }
523 }
524
525 TEST(F32_HSWISH__AVX512F_X16, inplace) {
526 TEST_REQUIRES_X86_AVX512F;
527 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
528 HSwishMicrokernelTester()
529 .batch_size(batch_size)
530 .inplace(true)
531 .Test(xnn_f32_hswish_ukernel__avx512f_x16);
532 }
533 }
534#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
535
536
537#if XNN_ARCH_X86 || XNN_ARCH_X86_64
538 TEST(F32_HSWISH__AVX512F_X32, batch_eq_32) {
539 TEST_REQUIRES_X86_AVX512F;
540 HSwishMicrokernelTester()
541 .batch_size(32)
542 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
543 }
544
545 TEST(F32_HSWISH__AVX512F_X32, batch_div_32) {
546 TEST_REQUIRES_X86_AVX512F;
547 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
548 HSwishMicrokernelTester()
549 .batch_size(batch_size)
550 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
551 }
552 }
553
554 TEST(F32_HSWISH__AVX512F_X32, batch_lt_32) {
555 TEST_REQUIRES_X86_AVX512F;
556 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
557 HSwishMicrokernelTester()
558 .batch_size(batch_size)
559 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
560 }
561 }
562
563 TEST(F32_HSWISH__AVX512F_X32, batch_gt_32) {
564 TEST_REQUIRES_X86_AVX512F;
565 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
566 HSwishMicrokernelTester()
567 .batch_size(batch_size)
568 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
569 }
570 }
571
572 TEST(F32_HSWISH__AVX512F_X32, inplace) {
573 TEST_REQUIRES_X86_AVX512F;
574 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
575 HSwishMicrokernelTester()
576 .batch_size(batch_size)
577 .inplace(true)
578 .Test(xnn_f32_hswish_ukernel__avx512f_x32);
579 }
580 }
581#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
582
583
584#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
585 TEST(F32_HSWISH__PSIMD_X4, batch_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700586 TEST_REQUIRES_PSIMD;
587 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800588 .batch_size(4)
589 .Test(xnn_f32_hswish_ukernel__psimd_x4, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700590 }
591
Marat Dukhan662faa02019-12-09 22:48:16 -0800592 TEST(F32_HSWISH__PSIMD_X4, batch_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700593 TEST_REQUIRES_PSIMD;
Marat Dukhan662faa02019-12-09 22:48:16 -0800594 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700595 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800596 .batch_size(batch_size)
597 .Test(xnn_f32_hswish_ukernel__psimd_x4, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700598 }
599 }
600
Marat Dukhan662faa02019-12-09 22:48:16 -0800601 TEST(F32_HSWISH__PSIMD_X4, batch_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700602 TEST_REQUIRES_PSIMD;
Marat Dukhan662faa02019-12-09 22:48:16 -0800603 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700604 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800605 .batch_size(batch_size)
606 .Test(xnn_f32_hswish_ukernel__psimd_x4, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700607 }
608 }
609
Marat Dukhan662faa02019-12-09 22:48:16 -0800610 TEST(F32_HSWISH__PSIMD_X4, batch_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700611 TEST_REQUIRES_PSIMD;
Marat Dukhan662faa02019-12-09 22:48:16 -0800612 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700613 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800614 .batch_size(batch_size)
615 .Test(xnn_f32_hswish_ukernel__psimd_x4, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700616 }
617 }
618
Marat Dukhan662faa02019-12-09 22:48:16 -0800619 TEST(F32_HSWISH__PSIMD_X4, inplace) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700620 TEST_REQUIRES_PSIMD;
Marat Dukhan662faa02019-12-09 22:48:16 -0800621 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700622 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800623 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700624 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800625 .Test(xnn_f32_hswish_ukernel__psimd_x4, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700626 }
627 }
Marat Dukhan662faa02019-12-09 22:48:16 -0800628#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
629
630
631#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
632 TEST(F32_HSWISH__PSIMD_X8, batch_eq_8) {
633 TEST_REQUIRES_PSIMD;
634 HSwishMicrokernelTester()
635 .batch_size(8)
636 .Test(xnn_f32_hswish_ukernel__psimd_x8, HSwishMicrokernelTester::Variant::Scalar);
637 }
638
639 TEST(F32_HSWISH__PSIMD_X8, batch_div_8) {
640 TEST_REQUIRES_PSIMD;
641 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
642 HSwishMicrokernelTester()
643 .batch_size(batch_size)
644 .Test(xnn_f32_hswish_ukernel__psimd_x8, HSwishMicrokernelTester::Variant::Scalar);
645 }
646 }
647
648 TEST(F32_HSWISH__PSIMD_X8, batch_lt_8) {
649 TEST_REQUIRES_PSIMD;
650 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
651 HSwishMicrokernelTester()
652 .batch_size(batch_size)
653 .Test(xnn_f32_hswish_ukernel__psimd_x8, HSwishMicrokernelTester::Variant::Scalar);
654 }
655 }
656
657 TEST(F32_HSWISH__PSIMD_X8, batch_gt_8) {
658 TEST_REQUIRES_PSIMD;
659 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
660 HSwishMicrokernelTester()
661 .batch_size(batch_size)
662 .Test(xnn_f32_hswish_ukernel__psimd_x8, HSwishMicrokernelTester::Variant::Scalar);
663 }
664 }
665
666 TEST(F32_HSWISH__PSIMD_X8, inplace) {
667 TEST_REQUIRES_PSIMD;
668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
669 HSwishMicrokernelTester()
670 .batch_size(batch_size)
671 .inplace(true)
672 .Test(xnn_f32_hswish_ukernel__psimd_x8, HSwishMicrokernelTester::Variant::Scalar);
673 }
674 }
675#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
XNNPACK Teamb455b122019-09-27 18:10:33 -0700676
677
Marat Dukhan436ebe62019-12-04 15:10:12 -0800678#if XNN_ARCH_WASM
Marat Dukhan662faa02019-12-09 22:48:16 -0800679 TEST(F32_HSWISH__WASM_X1, batch_eq_1) {
Marat Dukhan436ebe62019-12-04 15:10:12 -0800680 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800681 .batch_size(1)
682 .Test(xnn_f32_hswish_ukernel__wasm_x1, HSwishMicrokernelTester::Variant::Scalar);
Marat Dukhan436ebe62019-12-04 15:10:12 -0800683 }
684
Marat Dukhan662faa02019-12-09 22:48:16 -0800685 TEST(F32_HSWISH__WASM_X1, batch_gt_1) {
686 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
Marat Dukhan436ebe62019-12-04 15:10:12 -0800687 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800688 .batch_size(batch_size)
689 .Test(xnn_f32_hswish_ukernel__wasm_x1, HSwishMicrokernelTester::Variant::Scalar);
Marat Dukhan436ebe62019-12-04 15:10:12 -0800690 }
691 }
692
Marat Dukhan662faa02019-12-09 22:48:16 -0800693 TEST(F32_HSWISH__WASM_X1, inplace) {
694 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
Marat Dukhan436ebe62019-12-04 15:10:12 -0800695 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800696 .batch_size(batch_size)
Marat Dukhan436ebe62019-12-04 15:10:12 -0800697 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800698 .Test(xnn_f32_hswish_ukernel__wasm_x1, HSwishMicrokernelTester::Variant::Scalar);
Marat Dukhan436ebe62019-12-04 15:10:12 -0800699 }
700 }
701#endif // XNN_ARCH_WASM
702
703
Marat Dukhan662faa02019-12-09 22:48:16 -0800704#if XNN_ARCH_WASM
705 TEST(F32_HSWISH__WASM_X2, batch_eq_2) {
706 HSwishMicrokernelTester()
707 .batch_size(2)
708 .Test(xnn_f32_hswish_ukernel__wasm_x2, HSwishMicrokernelTester::Variant::Scalar);
709 }
710
711 TEST(F32_HSWISH__WASM_X2, batch_div_2) {
712 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
713 HSwishMicrokernelTester()
714 .batch_size(batch_size)
715 .Test(xnn_f32_hswish_ukernel__wasm_x2, HSwishMicrokernelTester::Variant::Scalar);
716 }
717 }
718
719 TEST(F32_HSWISH__WASM_X2, batch_lt_2) {
720 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
721 HSwishMicrokernelTester()
722 .batch_size(batch_size)
723 .Test(xnn_f32_hswish_ukernel__wasm_x2, HSwishMicrokernelTester::Variant::Scalar);
724 }
725 }
726
727 TEST(F32_HSWISH__WASM_X2, batch_gt_2) {
728 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
729 HSwishMicrokernelTester()
730 .batch_size(batch_size)
731 .Test(xnn_f32_hswish_ukernel__wasm_x2, HSwishMicrokernelTester::Variant::Scalar);
732 }
733 }
734
735 TEST(F32_HSWISH__WASM_X2, inplace) {
736 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
737 HSwishMicrokernelTester()
738 .batch_size(batch_size)
739 .inplace(true)
740 .Test(xnn_f32_hswish_ukernel__wasm_x2, HSwishMicrokernelTester::Variant::Scalar);
741 }
742 }
743#endif // XNN_ARCH_WASM
744
745
746#if XNN_ARCH_WASM
747 TEST(F32_HSWISH__WASM_X4, batch_eq_4) {
748 HSwishMicrokernelTester()
749 .batch_size(4)
750 .Test(xnn_f32_hswish_ukernel__wasm_x4, HSwishMicrokernelTester::Variant::Scalar);
751 }
752
753 TEST(F32_HSWISH__WASM_X4, batch_div_4) {
754 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
755 HSwishMicrokernelTester()
756 .batch_size(batch_size)
757 .Test(xnn_f32_hswish_ukernel__wasm_x4, HSwishMicrokernelTester::Variant::Scalar);
758 }
759 }
760
761 TEST(F32_HSWISH__WASM_X4, batch_lt_4) {
762 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
763 HSwishMicrokernelTester()
764 .batch_size(batch_size)
765 .Test(xnn_f32_hswish_ukernel__wasm_x4, HSwishMicrokernelTester::Variant::Scalar);
766 }
767 }
768
769 TEST(F32_HSWISH__WASM_X4, batch_gt_4) {
770 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
771 HSwishMicrokernelTester()
772 .batch_size(batch_size)
773 .Test(xnn_f32_hswish_ukernel__wasm_x4, HSwishMicrokernelTester::Variant::Scalar);
774 }
775 }
776
777 TEST(F32_HSWISH__WASM_X4, inplace) {
778 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
779 HSwishMicrokernelTester()
780 .batch_size(batch_size)
781 .inplace(true)
782 .Test(xnn_f32_hswish_ukernel__wasm_x4, HSwishMicrokernelTester::Variant::Scalar);
783 }
784 }
785#endif // XNN_ARCH_WASM
786
787
788TEST(F32_HSWISH__SCALAR_X1, batch_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700789 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800790 .batch_size(1)
791 .Test(xnn_f32_hswish_ukernel__scalar_x1, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700792}
793
Marat Dukhan662faa02019-12-09 22:48:16 -0800794TEST(F32_HSWISH__SCALAR_X1, batch_gt_1) {
795 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700796 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800797 .batch_size(batch_size)
798 .Test(xnn_f32_hswish_ukernel__scalar_x1, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700799 }
800}
801
Marat Dukhan662faa02019-12-09 22:48:16 -0800802TEST(F32_HSWISH__SCALAR_X1, inplace) {
803 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700804 HSwishMicrokernelTester()
Marat Dukhan662faa02019-12-09 22:48:16 -0800805 .batch_size(batch_size)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700806 .inplace(true)
Marat Dukhan662faa02019-12-09 22:48:16 -0800807 .Test(xnn_f32_hswish_ukernel__scalar_x1, HSwishMicrokernelTester::Variant::Scalar);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700808 }
809}
Marat Dukhan662faa02019-12-09 22:48:16 -0800810
811TEST(F32_HSWISH__SCALAR_X2, batch_eq_2) {
812 HSwishMicrokernelTester()
813 .batch_size(2)
814 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
815}
816
817TEST(F32_HSWISH__SCALAR_X2, batch_div_2) {
818 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
819 HSwishMicrokernelTester()
820 .batch_size(batch_size)
821 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
822 }
823}
824
825TEST(F32_HSWISH__SCALAR_X2, batch_lt_2) {
826 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
827 HSwishMicrokernelTester()
828 .batch_size(batch_size)
829 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
830 }
831}
832
833TEST(F32_HSWISH__SCALAR_X2, batch_gt_2) {
834 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
835 HSwishMicrokernelTester()
836 .batch_size(batch_size)
837 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
838 }
839}
840
841TEST(F32_HSWISH__SCALAR_X2, inplace) {
842 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
843 HSwishMicrokernelTester()
844 .batch_size(batch_size)
845 .inplace(true)
846 .Test(xnn_f32_hswish_ukernel__scalar_x2, HSwishMicrokernelTester::Variant::Scalar);
847 }
848}
849
850TEST(F32_HSWISH__SCALAR_X4, batch_eq_4) {
851 HSwishMicrokernelTester()
852 .batch_size(4)
853 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
854}
855
856TEST(F32_HSWISH__SCALAR_X4, batch_div_4) {
857 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
858 HSwishMicrokernelTester()
859 .batch_size(batch_size)
860 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
861 }
862}
863
864TEST(F32_HSWISH__SCALAR_X4, batch_lt_4) {
865 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
866 HSwishMicrokernelTester()
867 .batch_size(batch_size)
868 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
869 }
870}
871
872TEST(F32_HSWISH__SCALAR_X4, batch_gt_4) {
873 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
874 HSwishMicrokernelTester()
875 .batch_size(batch_size)
876 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
877 }
878}
879
880TEST(F32_HSWISH__SCALAR_X4, inplace) {
881 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
882 HSwishMicrokernelTester()
883 .batch_size(batch_size)
884 .inplace(true)
885 .Test(xnn_f32_hswish_ukernel__scalar_x4, HSwishMicrokernelTester::Variant::Scalar);
886 }
887}