blob: a5e099ab85aa5b93466ace5fe5650532b2dfb4d8 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
XNNPACK Teamb455b122019-09-27 18:10:33 -07006#include <gtest/gtest.h>
7
Marat Dukhan1dadbf72019-10-01 10:46:20 -07008#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -07009#include <xnnpack/isa-checks.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
Marat Dukhan1dadbf72019-10-01 10:46:20 -070011#include <xnnpack/clamp.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070012#include "clamp-microkernel-tester.h"
13
14
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070016 TEST(F32_CLAMP__NEON, n_eq_4) {
17 TEST_REQUIRES_ARM_NEON;
18 ClampMicrokernelTester()
19 .n(4)
20 .Test(xnn_f32_clamp_ukernel__neon);
21 }
22
23 TEST(F32_CLAMP__NEON, n_div_4) {
24 TEST_REQUIRES_ARM_NEON;
25 for (size_t n = 4; n < 256; n += 4) {
26 ClampMicrokernelTester()
27 .n(n)
28 .Test(xnn_f32_clamp_ukernel__neon);
29 }
30 }
31
32 TEST(F32_CLAMP__NEON, n_gt_4) {
33 TEST_REQUIRES_ARM_NEON;
34 for (size_t n = 5; n < 8; n++) {
35 ClampMicrokernelTester()
36 .n(n)
37 .Test(xnn_f32_clamp_ukernel__neon);
38 }
39 }
40
41 TEST(F32_CLAMP__NEON, n_lt_4) {
42 TEST_REQUIRES_ARM_NEON;
43 for (size_t n = 1; n < 4; n++) {
44 ClampMicrokernelTester()
45 .n(n)
46 .Test(xnn_f32_clamp_ukernel__neon);
47 }
48 }
49
50 TEST(F32_CLAMP__NEON, inplace) {
51 TEST_REQUIRES_ARM_NEON;
52 for (size_t n = 1; n < 64; n += 3) {
53 ClampMicrokernelTester()
54 .iterations(1)
55 .n(n)
56 .inplace(true)
57 .Test(xnn_f32_clamp_ukernel__neon);
58 }
59 }
60
61 TEST(F32_CLAMP__NEON, qmin) {
62 TEST_REQUIRES_ARM_NEON;
63 for (size_t n = 1; n < 64; n += 5) {
64 for (uint8_t qmin = 1; qmin < 255; qmin++) {
65 ClampMicrokernelTester()
66 .iterations(1)
67 .n(n)
68 .qmin(qmin)
69 .qmax(255)
70 .Test(xnn_f32_clamp_ukernel__neon);
71 }
72 }
73 }
74
75 TEST(F32_CLAMP__NEON, qmax) {
76 TEST_REQUIRES_ARM_NEON;
77 for (size_t n = 1; n < 64; n += 5) {
78 for (uint8_t qmax = 1; qmax < 255; qmax++) {
79 ClampMicrokernelTester()
80 .iterations(1)
81 .n(n)
82 .qmin(0)
83 .qmax(qmax)
84 .Test(xnn_f32_clamp_ukernel__neon);
85 }
86 }
87 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -070088#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070089
Marat Dukhan1dadbf72019-10-01 10:46:20 -070090#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -070091 TEST(F32_CLAMP__SSE, n_eq_4) {
92 TEST_REQUIRES_X86_SSE2;
93 ClampMicrokernelTester()
94 .n(4)
95 .Test(xnn_f32_clamp_ukernel__sse);
96 }
97
98 TEST(F32_CLAMP__SSE, n_div_4) {
99 TEST_REQUIRES_X86_SSE2;
100 for (size_t n = 4; n < 256; n += 4) {
101 ClampMicrokernelTester()
102 .n(n)
103 .Test(xnn_f32_clamp_ukernel__sse);
104 }
105 }
106
107 TEST(F32_CLAMP__SSE, n_gt_4) {
108 TEST_REQUIRES_X86_SSE2;
109 for (size_t n = 5; n < 8; n++) {
110 ClampMicrokernelTester()
111 .n(n)
112 .Test(xnn_f32_clamp_ukernel__sse);
113 }
114 }
115
116 TEST(F32_CLAMP__SSE, n_lt_4) {
117 TEST_REQUIRES_X86_SSE2;
118 for (size_t n = 1; n < 4; n++) {
119 ClampMicrokernelTester()
120 .n(n)
121 .Test(xnn_f32_clamp_ukernel__sse);
122 }
123 }
124
125 TEST(F32_CLAMP__SSE, inplace) {
126 TEST_REQUIRES_X86_SSE2;
127 for (size_t n = 1; n < 64; n += 3) {
128 ClampMicrokernelTester()
129 .iterations(1)
130 .n(n)
131 .inplace(true)
132 .Test(xnn_f32_clamp_ukernel__sse);
133 }
134 }
135
136 TEST(F32_CLAMP__SSE, qmin) {
137 TEST_REQUIRES_X86_SSE2;
138 for (size_t n = 1; n < 64; n += 5) {
139 for (uint8_t qmin = 1; qmin < 255; qmin++) {
140 ClampMicrokernelTester()
141 .iterations(1)
142 .n(n)
143 .qmin(qmin)
144 .qmax(255)
145 .Test(xnn_f32_clamp_ukernel__sse);
146 }
147 }
148 }
149
150 TEST(F32_CLAMP__SSE, qmax) {
151 TEST_REQUIRES_X86_SSE2;
152 for (size_t n = 1; n < 64; n += 5) {
153 for (uint8_t qmax = 1; qmax < 255; qmax++) {
154 ClampMicrokernelTester()
155 .iterations(1)
156 .n(n)
157 .qmin(0)
158 .qmax(qmax)
159 .Test(xnn_f32_clamp_ukernel__sse);
160 }
161 }
162 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700163#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhane2c3f292019-11-27 15:40:54 -0800164
165#if XNN_ARCH_X86 || XNN_ARCH_X86_64
166 TEST(F32_CLAMP__AVX, n_eq_8) {
167 TEST_REQUIRES_X86_AVX;
168 ClampMicrokernelTester()
169 .n(8)
170 .Test(xnn_f32_clamp_ukernel__avx);
171 }
172
173 TEST(F32_CLAMP__AVX, n_div_8) {
174 TEST_REQUIRES_X86_AVX;
175 for (size_t n = 8; n < 512; n += 8) {
176 ClampMicrokernelTester()
177 .n(n)
178 .Test(xnn_f32_clamp_ukernel__avx);
179 }
180 }
181
182 TEST(F32_CLAMP__AVX, n_gt_8) {
183 TEST_REQUIRES_X86_AVX;
184 for (size_t n = 9; n < 16; n++) {
185 ClampMicrokernelTester()
186 .n(n)
187 .Test(xnn_f32_clamp_ukernel__avx);
188 }
189 }
190
191 TEST(F32_CLAMP__AVX, n_lt_8) {
192 TEST_REQUIRES_X86_AVX;
193 for (size_t n = 1; n < 8; n++) {
194 ClampMicrokernelTester()
195 .n(n)
196 .Test(xnn_f32_clamp_ukernel__avx);
197 }
198 }
199
200 TEST(F32_CLAMP__AVX, inplace) {
201 TEST_REQUIRES_X86_AVX;
202 for (size_t n = 1; n < 128; n += 7) {
203 ClampMicrokernelTester()
204 .iterations(1)
205 .n(n)
206 .inplace(true)
207 .Test(xnn_f32_clamp_ukernel__avx);
208 }
209 }
210
211 TEST(F32_CLAMP__AVX, qmin) {
212 TEST_REQUIRES_X86_AVX;
213 for (size_t n = 1; n < 128; n += 7) {
214 for (uint8_t qmin = 1; qmin < 255; qmin++) {
215 ClampMicrokernelTester()
216 .iterations(1)
217 .n(n)
218 .qmin(qmin)
219 .qmax(255)
220 .Test(xnn_f32_clamp_ukernel__avx);
221 }
222 }
223 }
224
225 TEST(F32_CLAMP__AVX, qmax) {
226 TEST_REQUIRES_X86_AVX;
227 for (size_t n = 1; n < 128; n += 7) {
228 for (uint8_t qmax = 1; qmax < 255; qmax++) {
229 ClampMicrokernelTester()
230 .iterations(1)
231 .n(n)
232 .qmin(0)
233 .qmax(qmax)
234 .Test(xnn_f32_clamp_ukernel__avx);
235 }
236 }
237 }
238#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
239
240#if XNN_ARCH_X86 || XNN_ARCH_X86_64
241 TEST(F32_CLAMP__AVX512F, n_eq_16) {
242 TEST_REQUIRES_X86_AVX512F;
243 ClampMicrokernelTester()
244 .n(16)
245 .Test(xnn_f32_clamp_ukernel__avx512f);
246 }
247
248 TEST(F32_CLAMP__AVX512F, n_div_16) {
249 TEST_REQUIRES_X86_AVX512F;
250 for (size_t n = 16; n < 1024; n += 16) {
251 ClampMicrokernelTester()
252 .n(n)
253 .Test(xnn_f32_clamp_ukernel__avx512f);
254 }
255 }
256
257 TEST(F32_CLAMP__AVX512F, n_gt_16) {
258 TEST_REQUIRES_X86_AVX512F;
259 for (size_t n = 17; n < 32; n++) {
260 ClampMicrokernelTester()
261 .n(n)
262 .Test(xnn_f32_clamp_ukernel__avx512f);
263 }
264 }
265
266 TEST(F32_CLAMP__AVX512F, n_lt_16) {
267 TEST_REQUIRES_X86_AVX512F;
268 for (size_t n = 1; n < 16; n++) {
269 ClampMicrokernelTester()
270 .n(n)
271 .Test(xnn_f32_clamp_ukernel__avx512f);
272 }
273 }
274
275 TEST(F32_CLAMP__AVX512F, inplace) {
276 TEST_REQUIRES_X86_AVX512F;
277 for (size_t n = 1; n < 256; n += 15) {
278 ClampMicrokernelTester()
279 .iterations(1)
280 .n(n)
281 .inplace(true)
282 .Test(xnn_f32_clamp_ukernel__avx512f);
283 }
284 }
285
286 TEST(F32_CLAMP__AVX512F, qmin) {
287 TEST_REQUIRES_X86_AVX512F;
288 for (size_t n = 1; n < 256; n += 15) {
289 for (uint8_t qmin = 1; qmin < 255; qmin++) {
290 ClampMicrokernelTester()
291 .iterations(1)
292 .n(n)
293 .qmin(qmin)
294 .qmax(255)
295 .Test(xnn_f32_clamp_ukernel__avx512f);
296 }
297 }
298 }
299
300 TEST(F32_CLAMP__AVX512F, qmax) {
301 TEST_REQUIRES_X86_AVX512F;
302 for (size_t n = 1; n < 256; n += 15) {
303 for (uint8_t qmax = 1; qmax < 255; qmax++) {
304 ClampMicrokernelTester()
305 .iterations(1)
306 .n(n)
307 .qmin(0)
308 .qmax(qmax)
309 .Test(xnn_f32_clamp_ukernel__avx512f);
310 }
311 }
312 }
313#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
314
315#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
316 TEST(F32_CLAMP__PSIMD, n_eq_4) {
317 TEST_REQUIRES_PSIMD;
318 ClampMicrokernelTester()
319 .n(4)
320 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
321 }
322
323 TEST(F32_CLAMP__PSIMD, n_div_4) {
324 TEST_REQUIRES_PSIMD;
325 for (size_t n = 4; n < 256; n += 4) {
326 ClampMicrokernelTester()
327 .n(n)
328 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
329 }
330 }
331
332 TEST(F32_CLAMP__PSIMD, n_gt_4) {
333 TEST_REQUIRES_PSIMD;
334 for (size_t n = 5; n < 8; n++) {
335 ClampMicrokernelTester()
336 .n(n)
337 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
338 }
339 }
340
341 TEST(F32_CLAMP__PSIMD, n_lt_4) {
342 TEST_REQUIRES_PSIMD;
343 for (size_t n = 1; n < 4; n++) {
344 ClampMicrokernelTester()
345 .n(n)
346 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
347 }
348 }
349
350 TEST(F32_CLAMP__PSIMD, inplace) {
351 TEST_REQUIRES_PSIMD;
352 for (size_t n = 1; n < 64; n += 3) {
353 ClampMicrokernelTester()
354 .iterations(1)
355 .n(n)
356 .inplace(true)
357 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
358 }
359 }
360
361 TEST(F32_CLAMP__PSIMD, qmin) {
362 TEST_REQUIRES_PSIMD;
363 for (size_t n = 1; n < 64; n += 5) {
364 for (uint8_t qmin = 1; qmin < 255; qmin++) {
365 ClampMicrokernelTester()
366 .iterations(1)
367 .n(n)
368 .qmin(qmin)
369 .qmax(255)
370 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
371 }
372 }
373 }
374
375 TEST(F32_CLAMP__PSIMD, qmax) {
376 TEST_REQUIRES_PSIMD;
377 for (size_t n = 1; n < 64; n += 5) {
378 for (uint8_t qmax = 1; qmax < 255; qmax++) {
379 ClampMicrokernelTester()
380 .iterations(1)
381 .n(n)
382 .qmin(0)
383 .qmax(qmax)
384 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
385 }
386 }
387 }
388#endif // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
389
390
Marat Dukhan436ebe62019-12-04 15:10:12 -0800391#if XNN_ARCH_WASM
392 TEST(F32_CLAMP__WASM, n_eq_2) {
393 ClampMicrokernelTester()
394 .n(2)
395 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
396 }
397
398 TEST(F32_CLAMP__WASM, n_div_2) {
399 for (size_t n = 4; n < 128; n += 2) {
400 ClampMicrokernelTester()
401 .n(n)
402 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
403 }
404 }
405
406 TEST(F32_CLAMP__WASM, n_gt_2) {
407 for (size_t n = 3; n < 4; n++) {
408 ClampMicrokernelTester()
409 .n(n)
410 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
411 }
412 }
413
414 TEST(F32_CLAMP__WASM, n_lt_2) {
415 for (size_t n = 1; n < 2; n++) {
416 ClampMicrokernelTester()
417 .n(n)
418 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
419 }
420 }
421
422 TEST(F32_CLAMP__WASM, inplace) {
423 for (size_t n = 1; n < 32; n += 3) {
424 ClampMicrokernelTester()
425 .iterations(1)
426 .n(n)
427 .inplace(true)
428 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
429 }
430 }
431
432 TEST(F32_CLAMP__WASM, qmin) {
433 for (size_t n = 1; n < 32; n += 3) {
434 for (uint8_t qmin = 1; qmin < 255; qmin++) {
435 ClampMicrokernelTester()
436 .iterations(1)
437 .n(n)
438 .qmin(qmin)
439 .qmax(255)
440 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
441 }
442 }
443 }
444
445 TEST(F32_CLAMP__WASM, qmax) {
446 for (size_t n = 1; n < 32; n += 3) {
447 for (uint8_t qmax = 1; qmax < 255; qmax++) {
448 ClampMicrokernelTester()
449 .iterations(1)
450 .n(n)
451 .qmin(0)
452 .qmax(qmax)
453 .Test(xnn_f32_clamp_ukernel__wasm, ClampMicrokernelTester::Variant::Scalar);
454 }
455 }
456 }
457#endif // XNN_ARCH_WASM
458
459
Marat Dukhane2c3f292019-11-27 15:40:54 -0800460TEST(F32_CLAMP__SCALAR, n_eq_2) {
461 ClampMicrokernelTester()
462 .n(2)
463 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
464}
465
466TEST(F32_CLAMP__SCALAR, n_div_2) {
467 for (size_t n = 4; n < 128; n += 2) {
468 ClampMicrokernelTester()
469 .n(n)
470 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
471 }
472}
473
474TEST(F32_CLAMP__SCALAR, n_gt_2) {
475 for (size_t n = 3; n < 4; n++) {
476 ClampMicrokernelTester()
477 .n(n)
478 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
479 }
480}
481
482TEST(F32_CLAMP__SCALAR, n_lt_2) {
483 for (size_t n = 1; n < 2; n++) {
484 ClampMicrokernelTester()
485 .n(n)
486 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
487 }
488}
489
490TEST(F32_CLAMP__SCALAR, inplace) {
491 for (size_t n = 1; n < 32; n += 3) {
492 ClampMicrokernelTester()
493 .iterations(1)
494 .n(n)
495 .inplace(true)
496 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
497 }
498}
499
500TEST(F32_CLAMP__SCALAR, qmin) {
501 for (size_t n = 1; n < 32; n += 3) {
502 for (uint8_t qmin = 1; qmin < 255; qmin++) {
503 ClampMicrokernelTester()
504 .iterations(1)
505 .n(n)
506 .qmin(qmin)
507 .qmax(255)
508 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
509 }
510 }
511}
512
513TEST(F32_CLAMP__SCALAR, qmax) {
514 for (size_t n = 1; n < 32; n += 3) {
515 for (uint8_t qmax = 1; qmax < 255; qmax++) {
516 ClampMicrokernelTester()
517 .iterations(1)
518 .n(n)
519 .qmin(0)
520 .qmax(qmax)
521 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
522 }
523 }
524}