blob: 017667be77311749e7367f4e13563bac66d2f913 [file] [log] [blame]
Marat Dukhand67539d2021-09-08 23:06:03 -07001// Copyright 2021 Google LLC
XNNPACK Teamb455b122019-09-27 18:10:33 -07002//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
Marat Dukhand67539d2021-09-08 23:06:03 -07005//
6// Auto-generated file. Do not edit!
7// Specification: test/x8-lut.yaml
8// Generator: tools/generate-lut-test.py
9
XNNPACK Teamb455b122019-09-27 18:10:33 -070010
11#include <gtest/gtest.h>
12
Marat Dukhand67539d2021-09-08 23:06:03 -070013#include <xnnpack/common.h>
14#include <xnnpack/isa-checks.h>
15
XNNPACK Teamb455b122019-09-27 18:10:33 -070016#include <xnnpack/lut.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017#include "lut-microkernel-tester.h"
18
19
Marat Dukhand67539d2021-09-08 23:06:03 -070020TEST(X8_LUT__SCALAR_X1, batch_eq_1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070021 LUTMicrokernelTester()
Marat Dukhand67539d2021-09-08 23:06:03 -070022 .batch_size(1)
23 .Test(xnn_x8_lut_ukernel__scalar_x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -070024}
25
Marat Dukhand67539d2021-09-08 23:06:03 -070026TEST(X8_LUT__SCALAR_X1, batch_gt_1) {
27 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070028 LUTMicrokernelTester()
Marat Dukhand67539d2021-09-08 23:06:03 -070029 .batch_size(batch_size)
30 .Test(xnn_x8_lut_ukernel__scalar_x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -070031 }
32}
33
Marat Dukhand67539d2021-09-08 23:06:03 -070034TEST(X8_LUT__SCALAR_X1, inplace) {
35 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070036 LUTMicrokernelTester()
Marat Dukhand67539d2021-09-08 23:06:03 -070037 .batch_size(batch_size)
38 .inplace(true)
39 .Test(xnn_x8_lut_ukernel__scalar_x1);
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 }
41}
42
Marat Dukhand67539d2021-09-08 23:06:03 -070043TEST(X8_LUT__SCALAR_X2, batch_eq_2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 LUTMicrokernelTester()
Marat Dukhand67539d2021-09-08 23:06:03 -070045 .batch_size(2)
46 .Test(xnn_x8_lut_ukernel__scalar_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -070047}
48
Marat Dukhand67539d2021-09-08 23:06:03 -070049TEST(X8_LUT__SCALAR_X2, batch_div_2) {
50 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070051 LUTMicrokernelTester()
Marat Dukhand67539d2021-09-08 23:06:03 -070052 .batch_size(batch_size)
53 .Test(xnn_x8_lut_ukernel__scalar_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -070054 }
55}
56
Marat Dukhand67539d2021-09-08 23:06:03 -070057TEST(X8_LUT__SCALAR_X2, batch_lt_2) {
58 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070059 LUTMicrokernelTester()
Marat Dukhand67539d2021-09-08 23:06:03 -070060 .batch_size(batch_size)
61 .Test(xnn_x8_lut_ukernel__scalar_x2);
XNNPACK Teamb455b122019-09-27 18:10:33 -070062 }
63}
Marat Dukhand67539d2021-09-08 23:06:03 -070064
65TEST(X8_LUT__SCALAR_X2, batch_gt_2) {
66 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
67 LUTMicrokernelTester()
68 .batch_size(batch_size)
69 .Test(xnn_x8_lut_ukernel__scalar_x2);
70 }
71}
72
73TEST(X8_LUT__SCALAR_X2, inplace) {
74 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
75 LUTMicrokernelTester()
76 .batch_size(batch_size)
77 .inplace(true)
78 .Test(xnn_x8_lut_ukernel__scalar_x2);
79 }
80}
81
82TEST(X8_LUT__SCALAR_X4, batch_eq_4) {
83 LUTMicrokernelTester()
84 .batch_size(4)
85 .Test(xnn_x8_lut_ukernel__scalar_x4);
86}
87
88TEST(X8_LUT__SCALAR_X4, batch_div_4) {
89 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
90 LUTMicrokernelTester()
91 .batch_size(batch_size)
92 .Test(xnn_x8_lut_ukernel__scalar_x4);
93 }
94}
95
96TEST(X8_LUT__SCALAR_X4, batch_lt_4) {
97 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
98 LUTMicrokernelTester()
99 .batch_size(batch_size)
100 .Test(xnn_x8_lut_ukernel__scalar_x4);
101 }
102}
103
104TEST(X8_LUT__SCALAR_X4, batch_gt_4) {
105 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
106 LUTMicrokernelTester()
107 .batch_size(batch_size)
108 .Test(xnn_x8_lut_ukernel__scalar_x4);
109 }
110}
111
112TEST(X8_LUT__SCALAR_X4, inplace) {
113 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
114 LUTMicrokernelTester()
115 .batch_size(batch_size)
116 .inplace(true)
117 .Test(xnn_x8_lut_ukernel__scalar_x4);
118 }
119}
120
121TEST(X8_LUT__SCALAR_X8, batch_eq_8) {
122 LUTMicrokernelTester()
123 .batch_size(8)
124 .Test(xnn_x8_lut_ukernel__scalar_x8);
125}
126
127TEST(X8_LUT__SCALAR_X8, batch_div_8) {
128 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
129 LUTMicrokernelTester()
130 .batch_size(batch_size)
131 .Test(xnn_x8_lut_ukernel__scalar_x8);
132 }
133}
134
135TEST(X8_LUT__SCALAR_X8, batch_lt_8) {
136 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
137 LUTMicrokernelTester()
138 .batch_size(batch_size)
139 .Test(xnn_x8_lut_ukernel__scalar_x8);
140 }
141}
142
143TEST(X8_LUT__SCALAR_X8, batch_gt_8) {
144 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
145 LUTMicrokernelTester()
146 .batch_size(batch_size)
147 .Test(xnn_x8_lut_ukernel__scalar_x8);
148 }
149}
150
151TEST(X8_LUT__SCALAR_X8, inplace) {
152 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
153 LUTMicrokernelTester()
154 .batch_size(batch_size)
155 .inplace(true)
156 .Test(xnn_x8_lut_ukernel__scalar_x8);
157 }
158}
159
160TEST(X8_LUT__SCALAR_X16, batch_eq_16) {
161 LUTMicrokernelTester()
162 .batch_size(16)
163 .Test(xnn_x8_lut_ukernel__scalar_x16);
164}
165
166TEST(X8_LUT__SCALAR_X16, batch_div_16) {
167 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
168 LUTMicrokernelTester()
169 .batch_size(batch_size)
170 .Test(xnn_x8_lut_ukernel__scalar_x16);
171 }
172}
173
174TEST(X8_LUT__SCALAR_X16, batch_lt_16) {
175 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
176 LUTMicrokernelTester()
177 .batch_size(batch_size)
178 .Test(xnn_x8_lut_ukernel__scalar_x16);
179 }
180}
181
182TEST(X8_LUT__SCALAR_X16, batch_gt_16) {
183 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
184 LUTMicrokernelTester()
185 .batch_size(batch_size)
186 .Test(xnn_x8_lut_ukernel__scalar_x16);
187 }
188}
189
190TEST(X8_LUT__SCALAR_X16, inplace) {
191 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
192 LUTMicrokernelTester()
193 .batch_size(batch_size)
194 .inplace(true)
195 .Test(xnn_x8_lut_ukernel__scalar_x16);
196 }
Marat Dukhanf7182322021-09-09 18:53:46 -0700197}
198
199#if XNN_ARCH_ARM64
200 TEST(X8_LUT__NEON_TBX128X4_X16, batch_eq_16) {
201 TEST_REQUIRES_ARM_NEON;
202 LUTMicrokernelTester()
203 .batch_size(16)
204 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
205 }
206
207 TEST(X8_LUT__NEON_TBX128X4_X16, batch_div_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
210 LUTMicrokernelTester()
211 .batch_size(batch_size)
212 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
213 }
214 }
215
216 TEST(X8_LUT__NEON_TBX128X4_X16, batch_lt_16) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
219 LUTMicrokernelTester()
220 .batch_size(batch_size)
221 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
222 }
223 }
224
225 TEST(X8_LUT__NEON_TBX128X4_X16, batch_gt_16) {
226 TEST_REQUIRES_ARM_NEON;
227 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
228 LUTMicrokernelTester()
229 .batch_size(batch_size)
230 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
231 }
232 }
233
234 TEST(X8_LUT__NEON_TBX128X4_X16, inplace) {
235 TEST_REQUIRES_ARM_NEON;
236 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
237 LUTMicrokernelTester()
238 .batch_size(batch_size)
239 .inplace(true)
240 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
241 }
242 }
243#endif // XNN_ARCH_ARM64
244
245
246#if XNN_ARCH_ARM64
247 TEST(X8_LUT__NEON_TBX128X4_X32, batch_eq_32) {
248 TEST_REQUIRES_ARM_NEON;
249 LUTMicrokernelTester()
250 .batch_size(32)
251 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
252 }
253
254 TEST(X8_LUT__NEON_TBX128X4_X32, batch_div_32) {
255 TEST_REQUIRES_ARM_NEON;
256 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
257 LUTMicrokernelTester()
258 .batch_size(batch_size)
259 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
260 }
261 }
262
263 TEST(X8_LUT__NEON_TBX128X4_X32, batch_lt_32) {
264 TEST_REQUIRES_ARM_NEON;
265 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
266 LUTMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
269 }
270 }
271
272 TEST(X8_LUT__NEON_TBX128X4_X32, batch_gt_32) {
273 TEST_REQUIRES_ARM_NEON;
274 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
275 LUTMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
278 }
279 }
280
281 TEST(X8_LUT__NEON_TBX128X4_X32, inplace) {
282 TEST_REQUIRES_ARM_NEON;
283 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
284 LUTMicrokernelTester()
285 .batch_size(batch_size)
286 .inplace(true)
287 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
288 }
289 }
290#endif // XNN_ARCH_ARM64
291
292
293#if XNN_ARCH_ARM64
294 TEST(X8_LUT__NEON_TBX128X4_X48, batch_eq_48) {
295 TEST_REQUIRES_ARM_NEON;
296 LUTMicrokernelTester()
297 .batch_size(48)
298 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
299 }
300
301 TEST(X8_LUT__NEON_TBX128X4_X48, batch_div_48) {
302 TEST_REQUIRES_ARM_NEON;
303 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
304 LUTMicrokernelTester()
305 .batch_size(batch_size)
306 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
307 }
308 }
309
310 TEST(X8_LUT__NEON_TBX128X4_X48, batch_lt_48) {
311 TEST_REQUIRES_ARM_NEON;
312 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
313 LUTMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
316 }
317 }
318
319 TEST(X8_LUT__NEON_TBX128X4_X48, batch_gt_48) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
322 LUTMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
325 }
326 }
327
328 TEST(X8_LUT__NEON_TBX128X4_X48, inplace) {
329 TEST_REQUIRES_ARM_NEON;
330 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
331 LUTMicrokernelTester()
332 .batch_size(batch_size)
333 .inplace(true)
334 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
335 }
336 }
337#endif // XNN_ARCH_ARM64
338
339
340#if XNN_ARCH_ARM64
341 TEST(X8_LUT__NEON_TBX128X4_X64, batch_eq_64) {
342 TEST_REQUIRES_ARM_NEON;
343 LUTMicrokernelTester()
344 .batch_size(64)
345 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
346 }
347
348 TEST(X8_LUT__NEON_TBX128X4_X64, batch_div_64) {
349 TEST_REQUIRES_ARM_NEON;
350 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
351 LUTMicrokernelTester()
352 .batch_size(batch_size)
353 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
354 }
355 }
356
357 TEST(X8_LUT__NEON_TBX128X4_X64, batch_lt_64) {
358 TEST_REQUIRES_ARM_NEON;
359 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
360 LUTMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
363 }
364 }
365
366 TEST(X8_LUT__NEON_TBX128X4_X64, batch_gt_64) {
367 TEST_REQUIRES_ARM_NEON;
368 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
369 LUTMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
372 }
373 }
374
375 TEST(X8_LUT__NEON_TBX128X4_X64, inplace) {
376 TEST_REQUIRES_ARM_NEON;
377 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
378 LUTMicrokernelTester()
379 .batch_size(batch_size)
380 .inplace(true)
381 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
382 }
383 }
384#endif // XNN_ARCH_ARM64
Marat Dukhan7c478e32021-09-10 09:48:13 -0700385
386
387#if XNN_ARCH_X86 || XNN_ARCH_X86_64
388 TEST(X8_LUT__SSSE3_X16, batch_eq_16) {
389 TEST_REQUIRES_X86_SSSE3;
390 LUTMicrokernelTester()
391 .batch_size(16)
392 .Test(xnn_x8_lut_ukernel__ssse3_x16);
393 }
394
395 TEST(X8_LUT__SSSE3_X16, batch_div_16) {
396 TEST_REQUIRES_X86_SSSE3;
397 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
398 LUTMicrokernelTester()
399 .batch_size(batch_size)
400 .Test(xnn_x8_lut_ukernel__ssse3_x16);
401 }
402 }
403
404 TEST(X8_LUT__SSSE3_X16, batch_lt_16) {
405 TEST_REQUIRES_X86_SSSE3;
406 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
407 LUTMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_x8_lut_ukernel__ssse3_x16);
410 }
411 }
412
413 TEST(X8_LUT__SSSE3_X16, batch_gt_16) {
414 TEST_REQUIRES_X86_SSSE3;
415 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
416 LUTMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_x8_lut_ukernel__ssse3_x16);
419 }
420 }
421
422 TEST(X8_LUT__SSSE3_X16, inplace) {
423 TEST_REQUIRES_X86_SSSE3;
424 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
425 LUTMicrokernelTester()
426 .batch_size(batch_size)
427 .inplace(true)
428 .Test(xnn_x8_lut_ukernel__ssse3_x16);
429 }
430 }
431#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
432
433
434#if XNN_ARCH_X86 || XNN_ARCH_X86_64
435 TEST(X8_LUT__SSSE3_X32, batch_eq_32) {
436 TEST_REQUIRES_X86_SSSE3;
437 LUTMicrokernelTester()
438 .batch_size(32)
439 .Test(xnn_x8_lut_ukernel__ssse3_x32);
440 }
441
442 TEST(X8_LUT__SSSE3_X32, batch_div_32) {
443 TEST_REQUIRES_X86_SSSE3;
444 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
445 LUTMicrokernelTester()
446 .batch_size(batch_size)
447 .Test(xnn_x8_lut_ukernel__ssse3_x32);
448 }
449 }
450
451 TEST(X8_LUT__SSSE3_X32, batch_lt_32) {
452 TEST_REQUIRES_X86_SSSE3;
453 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
454 LUTMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_x8_lut_ukernel__ssse3_x32);
457 }
458 }
459
460 TEST(X8_LUT__SSSE3_X32, batch_gt_32) {
461 TEST_REQUIRES_X86_SSSE3;
462 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
463 LUTMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_x8_lut_ukernel__ssse3_x32);
466 }
467 }
468
469 TEST(X8_LUT__SSSE3_X32, inplace) {
470 TEST_REQUIRES_X86_SSSE3;
471 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
472 LUTMicrokernelTester()
473 .batch_size(batch_size)
474 .inplace(true)
475 .Test(xnn_x8_lut_ukernel__ssse3_x32);
476 }
477 }
478#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
479
480
481#if XNN_ARCH_X86 || XNN_ARCH_X86_64
482 TEST(X8_LUT__AVX_X16, batch_eq_16) {
483 TEST_REQUIRES_X86_AVX;
484 LUTMicrokernelTester()
485 .batch_size(16)
486 .Test(xnn_x8_lut_ukernel__avx_x16);
487 }
488
489 TEST(X8_LUT__AVX_X16, batch_div_16) {
490 TEST_REQUIRES_X86_AVX;
491 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
492 LUTMicrokernelTester()
493 .batch_size(batch_size)
494 .Test(xnn_x8_lut_ukernel__avx_x16);
495 }
496 }
497
498 TEST(X8_LUT__AVX_X16, batch_lt_16) {
499 TEST_REQUIRES_X86_AVX;
500 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
501 LUTMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_x8_lut_ukernel__avx_x16);
504 }
505 }
506
507 TEST(X8_LUT__AVX_X16, batch_gt_16) {
508 TEST_REQUIRES_X86_AVX;
509 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
510 LUTMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_x8_lut_ukernel__avx_x16);
513 }
514 }
515
516 TEST(X8_LUT__AVX_X16, inplace) {
517 TEST_REQUIRES_X86_AVX;
518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
519 LUTMicrokernelTester()
520 .batch_size(batch_size)
521 .inplace(true)
522 .Test(xnn_x8_lut_ukernel__avx_x16);
523 }
524 }
525#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
526
527
528#if XNN_ARCH_X86 || XNN_ARCH_X86_64
529 TEST(X8_LUT__AVX_X32, batch_eq_32) {
530 TEST_REQUIRES_X86_AVX;
531 LUTMicrokernelTester()
532 .batch_size(32)
533 .Test(xnn_x8_lut_ukernel__avx_x32);
534 }
535
536 TEST(X8_LUT__AVX_X32, batch_div_32) {
537 TEST_REQUIRES_X86_AVX;
538 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
539 LUTMicrokernelTester()
540 .batch_size(batch_size)
541 .Test(xnn_x8_lut_ukernel__avx_x32);
542 }
543 }
544
545 TEST(X8_LUT__AVX_X32, batch_lt_32) {
546 TEST_REQUIRES_X86_AVX;
547 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
548 LUTMicrokernelTester()
549 .batch_size(batch_size)
550 .Test(xnn_x8_lut_ukernel__avx_x32);
551 }
552 }
553
554 TEST(X8_LUT__AVX_X32, batch_gt_32) {
555 TEST_REQUIRES_X86_AVX;
556 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
557 LUTMicrokernelTester()
558 .batch_size(batch_size)
559 .Test(xnn_x8_lut_ukernel__avx_x32);
560 }
561 }
562
563 TEST(X8_LUT__AVX_X32, inplace) {
564 TEST_REQUIRES_X86_AVX;
565 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
566 LUTMicrokernelTester()
567 .batch_size(batch_size)
568 .inplace(true)
569 .Test(xnn_x8_lut_ukernel__avx_x32);
570 }
571 }
572#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
573
574
575#if XNN_ARCH_X86 || XNN_ARCH_X86_64
576 TEST(X8_LUT__AVX_X48, batch_eq_48) {
577 TEST_REQUIRES_X86_AVX;
578 LUTMicrokernelTester()
579 .batch_size(48)
580 .Test(xnn_x8_lut_ukernel__avx_x48);
581 }
582
583 TEST(X8_LUT__AVX_X48, batch_div_48) {
584 TEST_REQUIRES_X86_AVX;
585 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
586 LUTMicrokernelTester()
587 .batch_size(batch_size)
588 .Test(xnn_x8_lut_ukernel__avx_x48);
589 }
590 }
591
592 TEST(X8_LUT__AVX_X48, batch_lt_48) {
593 TEST_REQUIRES_X86_AVX;
594 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
595 LUTMicrokernelTester()
596 .batch_size(batch_size)
597 .Test(xnn_x8_lut_ukernel__avx_x48);
598 }
599 }
600
601 TEST(X8_LUT__AVX_X48, batch_gt_48) {
602 TEST_REQUIRES_X86_AVX;
603 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
604 LUTMicrokernelTester()
605 .batch_size(batch_size)
606 .Test(xnn_x8_lut_ukernel__avx_x48);
607 }
608 }
609
610 TEST(X8_LUT__AVX_X48, inplace) {
611 TEST_REQUIRES_X86_AVX;
612 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
613 LUTMicrokernelTester()
614 .batch_size(batch_size)
615 .inplace(true)
616 .Test(xnn_x8_lut_ukernel__avx_x48);
617 }
618 }
619#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
620
621
622#if XNN_ARCH_X86 || XNN_ARCH_X86_64
623 TEST(X8_LUT__AVX_X64, batch_eq_64) {
624 TEST_REQUIRES_X86_AVX;
625 LUTMicrokernelTester()
626 .batch_size(64)
627 .Test(xnn_x8_lut_ukernel__avx_x64);
628 }
629
630 TEST(X8_LUT__AVX_X64, batch_div_64) {
631 TEST_REQUIRES_X86_AVX;
632 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
633 LUTMicrokernelTester()
634 .batch_size(batch_size)
635 .Test(xnn_x8_lut_ukernel__avx_x64);
636 }
637 }
638
639 TEST(X8_LUT__AVX_X64, batch_lt_64) {
640 TEST_REQUIRES_X86_AVX;
641 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
642 LUTMicrokernelTester()
643 .batch_size(batch_size)
644 .Test(xnn_x8_lut_ukernel__avx_x64);
645 }
646 }
647
648 TEST(X8_LUT__AVX_X64, batch_gt_64) {
649 TEST_REQUIRES_X86_AVX;
650 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
651 LUTMicrokernelTester()
652 .batch_size(batch_size)
653 .Test(xnn_x8_lut_ukernel__avx_x64);
654 }
655 }
656
657 TEST(X8_LUT__AVX_X64, inplace) {
658 TEST_REQUIRES_X86_AVX;
659 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
660 LUTMicrokernelTester()
661 .batch_size(batch_size)
662 .inplace(true)
663 .Test(xnn_x8_lut_ukernel__avx_x64);
664 }
665 }
666#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
667
668
669#if XNN_ARCH_X86 || XNN_ARCH_X86_64
670 TEST(X8_LUT__AVX2_X32, batch_eq_32) {
671 TEST_REQUIRES_X86_AVX2;
672 LUTMicrokernelTester()
673 .batch_size(32)
674 .Test(xnn_x8_lut_ukernel__avx2_x32);
675 }
676
677 TEST(X8_LUT__AVX2_X32, batch_div_32) {
678 TEST_REQUIRES_X86_AVX2;
679 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
680 LUTMicrokernelTester()
681 .batch_size(batch_size)
682 .Test(xnn_x8_lut_ukernel__avx2_x32);
683 }
684 }
685
686 TEST(X8_LUT__AVX2_X32, batch_lt_32) {
687 TEST_REQUIRES_X86_AVX2;
688 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
689 LUTMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_x8_lut_ukernel__avx2_x32);
692 }
693 }
694
695 TEST(X8_LUT__AVX2_X32, batch_gt_32) {
696 TEST_REQUIRES_X86_AVX2;
697 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
698 LUTMicrokernelTester()
699 .batch_size(batch_size)
700 .Test(xnn_x8_lut_ukernel__avx2_x32);
701 }
702 }
703
704 TEST(X8_LUT__AVX2_X32, inplace) {
705 TEST_REQUIRES_X86_AVX2;
706 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
707 LUTMicrokernelTester()
708 .batch_size(batch_size)
709 .inplace(true)
710 .Test(xnn_x8_lut_ukernel__avx2_x32);
711 }
712 }
713#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
714
715
716#if XNN_ARCH_X86 || XNN_ARCH_X86_64
717 TEST(X8_LUT__AVX2_X64, batch_eq_64) {
718 TEST_REQUIRES_X86_AVX2;
719 LUTMicrokernelTester()
720 .batch_size(64)
721 .Test(xnn_x8_lut_ukernel__avx2_x64);
722 }
723
724 TEST(X8_LUT__AVX2_X64, batch_div_64) {
725 TEST_REQUIRES_X86_AVX2;
726 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
727 LUTMicrokernelTester()
728 .batch_size(batch_size)
729 .Test(xnn_x8_lut_ukernel__avx2_x64);
730 }
731 }
732
733 TEST(X8_LUT__AVX2_X64, batch_lt_64) {
734 TEST_REQUIRES_X86_AVX2;
735 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
736 LUTMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_x8_lut_ukernel__avx2_x64);
739 }
740 }
741
742 TEST(X8_LUT__AVX2_X64, batch_gt_64) {
743 TEST_REQUIRES_X86_AVX2;
744 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
745 LUTMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_x8_lut_ukernel__avx2_x64);
748 }
749 }
750
751 TEST(X8_LUT__AVX2_X64, inplace) {
752 TEST_REQUIRES_X86_AVX2;
753 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
754 LUTMicrokernelTester()
755 .batch_size(batch_size)
756 .inplace(true)
757 .Test(xnn_x8_lut_ukernel__avx2_x64);
758 }
759 }
760#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
761
762
763#if XNN_ARCH_X86 || XNN_ARCH_X86_64
764 TEST(X8_LUT__AVX2_X96, batch_eq_96) {
765 TEST_REQUIRES_X86_AVX2;
766 LUTMicrokernelTester()
767 .batch_size(96)
768 .Test(xnn_x8_lut_ukernel__avx2_x96);
769 }
770
771 TEST(X8_LUT__AVX2_X96, batch_div_96) {
772 TEST_REQUIRES_X86_AVX2;
773 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
774 LUTMicrokernelTester()
775 .batch_size(batch_size)
776 .Test(xnn_x8_lut_ukernel__avx2_x96);
777 }
778 }
779
780 TEST(X8_LUT__AVX2_X96, batch_lt_96) {
781 TEST_REQUIRES_X86_AVX2;
782 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
783 LUTMicrokernelTester()
784 .batch_size(batch_size)
785 .Test(xnn_x8_lut_ukernel__avx2_x96);
786 }
787 }
788
789 TEST(X8_LUT__AVX2_X96, batch_gt_96) {
790 TEST_REQUIRES_X86_AVX2;
791 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
792 LUTMicrokernelTester()
793 .batch_size(batch_size)
794 .Test(xnn_x8_lut_ukernel__avx2_x96);
795 }
796 }
797
798 TEST(X8_LUT__AVX2_X96, inplace) {
799 TEST_REQUIRES_X86_AVX2;
800 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
801 LUTMicrokernelTester()
802 .batch_size(batch_size)
803 .inplace(true)
804 .Test(xnn_x8_lut_ukernel__avx2_x96);
805 }
806 }
807#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
808
809
810#if XNN_ARCH_X86 || XNN_ARCH_X86_64
811 TEST(X8_LUT__AVX2_X128, batch_eq_128) {
812 TEST_REQUIRES_X86_AVX2;
813 LUTMicrokernelTester()
814 .batch_size(128)
815 .Test(xnn_x8_lut_ukernel__avx2_x128);
816 }
817
818 TEST(X8_LUT__AVX2_X128, batch_div_128) {
819 TEST_REQUIRES_X86_AVX2;
820 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
821 LUTMicrokernelTester()
822 .batch_size(batch_size)
823 .Test(xnn_x8_lut_ukernel__avx2_x128);
824 }
825 }
826
827 TEST(X8_LUT__AVX2_X128, batch_lt_128) {
828 TEST_REQUIRES_X86_AVX2;
829 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
830 LUTMicrokernelTester()
831 .batch_size(batch_size)
832 .Test(xnn_x8_lut_ukernel__avx2_x128);
833 }
834 }
835
836 TEST(X8_LUT__AVX2_X128, batch_gt_128) {
837 TEST_REQUIRES_X86_AVX2;
838 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
839 LUTMicrokernelTester()
840 .batch_size(batch_size)
841 .Test(xnn_x8_lut_ukernel__avx2_x128);
842 }
843 }
844
845 TEST(X8_LUT__AVX2_X128, inplace) {
846 TEST_REQUIRES_X86_AVX2;
847 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
848 LUTMicrokernelTester()
849 .batch_size(batch_size)
850 .inplace(true)
851 .Test(xnn_x8_lut_ukernel__avx2_x128);
852 }
853 }
854#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan2b3c4102021-09-10 19:05:37 -0700855
856
857#if XNN_ARCH_X86 || XNN_ARCH_X86_64
858 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_eq_64) {
859 TEST_REQUIRES_X86_AVX512SKX;
860 LUTMicrokernelTester()
861 .batch_size(64)
862 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
863 }
864
865 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_div_64) {
866 TEST_REQUIRES_X86_AVX512SKX;
867 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
868 LUTMicrokernelTester()
869 .batch_size(batch_size)
870 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
871 }
872 }
873
874 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_lt_64) {
875 TEST_REQUIRES_X86_AVX512SKX;
876 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
877 LUTMicrokernelTester()
878 .batch_size(batch_size)
879 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
880 }
881 }
882
883 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_gt_64) {
884 TEST_REQUIRES_X86_AVX512SKX;
885 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
886 LUTMicrokernelTester()
887 .batch_size(batch_size)
888 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
889 }
890 }
891
892 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, inplace) {
893 TEST_REQUIRES_X86_AVX512SKX;
894 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
895 LUTMicrokernelTester()
896 .batch_size(batch_size)
897 .inplace(true)
898 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
899 }
900 }
901#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
902
903
904#if XNN_ARCH_X86 || XNN_ARCH_X86_64
905 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_eq_128) {
906 TEST_REQUIRES_X86_AVX512SKX;
907 LUTMicrokernelTester()
908 .batch_size(128)
909 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
910 }
911
912 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_div_128) {
913 TEST_REQUIRES_X86_AVX512SKX;
914 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
915 LUTMicrokernelTester()
916 .batch_size(batch_size)
917 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
918 }
919 }
920
921 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_lt_128) {
922 TEST_REQUIRES_X86_AVX512SKX;
923 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
924 LUTMicrokernelTester()
925 .batch_size(batch_size)
926 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
927 }
928 }
929
930 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_gt_128) {
931 TEST_REQUIRES_X86_AVX512SKX;
932 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
933 LUTMicrokernelTester()
934 .batch_size(batch_size)
935 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
936 }
937 }
938
939 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, inplace) {
940 TEST_REQUIRES_X86_AVX512SKX;
941 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
942 LUTMicrokernelTester()
943 .batch_size(batch_size)
944 .inplace(true)
945 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
946 }
947 }
948#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
949
950
951#if XNN_ARCH_X86 || XNN_ARCH_X86_64
952 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_eq_192) {
953 TEST_REQUIRES_X86_AVX512SKX;
954 LUTMicrokernelTester()
955 .batch_size(192)
956 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
957 }
958
959 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_div_192) {
960 TEST_REQUIRES_X86_AVX512SKX;
961 for (size_t batch_size = 384; batch_size < 1920; batch_size += 192) {
962 LUTMicrokernelTester()
963 .batch_size(batch_size)
964 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
965 }
966 }
967
968 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_lt_192) {
969 TEST_REQUIRES_X86_AVX512SKX;
970 for (size_t batch_size = 1; batch_size < 192; batch_size++) {
971 LUTMicrokernelTester()
972 .batch_size(batch_size)
973 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
974 }
975 }
976
977 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_gt_192) {
978 TEST_REQUIRES_X86_AVX512SKX;
979 for (size_t batch_size = 193; batch_size < 384; batch_size++) {
980 LUTMicrokernelTester()
981 .batch_size(batch_size)
982 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
983 }
984 }
985
986 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, inplace) {
987 TEST_REQUIRES_X86_AVX512SKX;
988 for (size_t batch_size = 1; batch_size <= 960; batch_size += 191) {
989 LUTMicrokernelTester()
990 .batch_size(batch_size)
991 .inplace(true)
992 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
993 }
994 }
995#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
996
997
998#if XNN_ARCH_X86 || XNN_ARCH_X86_64
999 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_eq_256) {
1000 TEST_REQUIRES_X86_AVX512SKX;
1001 LUTMicrokernelTester()
1002 .batch_size(256)
1003 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1004 }
1005
1006 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_div_256) {
1007 TEST_REQUIRES_X86_AVX512SKX;
1008 for (size_t batch_size = 512; batch_size < 2560; batch_size += 256) {
1009 LUTMicrokernelTester()
1010 .batch_size(batch_size)
1011 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1012 }
1013 }
1014
1015 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_lt_256) {
1016 TEST_REQUIRES_X86_AVX512SKX;
1017 for (size_t batch_size = 1; batch_size < 256; batch_size++) {
1018 LUTMicrokernelTester()
1019 .batch_size(batch_size)
1020 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1021 }
1022 }
1023
1024 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_gt_256) {
1025 TEST_REQUIRES_X86_AVX512SKX;
1026 for (size_t batch_size = 257; batch_size < 512; batch_size++) {
1027 LUTMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1030 }
1031 }
1032
1033 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, inplace) {
1034 TEST_REQUIRES_X86_AVX512SKX;
1035 for (size_t batch_size = 1; batch_size <= 1280; batch_size += 255) {
1036 LUTMicrokernelTester()
1037 .batch_size(batch_size)
1038 .inplace(true)
1039 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1040 }
1041 }
1042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhana4ad9882021-09-18 08:06:04 -07001043
1044
Marat Dukhan4c617792021-12-21 15:47:58 -08001045#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001046 TEST(X8_LUT__WASMSIMD_X16, batch_eq_16) {
1047 LUTMicrokernelTester()
1048 .batch_size(16)
1049 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1050 }
1051
1052 TEST(X8_LUT__WASMSIMD_X16, batch_div_16) {
1053 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1054 LUTMicrokernelTester()
1055 .batch_size(batch_size)
1056 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1057 }
1058 }
1059
1060 TEST(X8_LUT__WASMSIMD_X16, batch_lt_16) {
1061 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1062 LUTMicrokernelTester()
1063 .batch_size(batch_size)
1064 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1065 }
1066 }
1067
1068 TEST(X8_LUT__WASMSIMD_X16, batch_gt_16) {
1069 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1070 LUTMicrokernelTester()
1071 .batch_size(batch_size)
1072 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1073 }
1074 }
1075
1076 TEST(X8_LUT__WASMSIMD_X16, inplace) {
1077 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078 LUTMicrokernelTester()
1079 .batch_size(batch_size)
1080 .inplace(true)
1081 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1082 }
1083 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001084#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001085
1086
Marat Dukhan4c617792021-12-21 15:47:58 -08001087#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001088 TEST(X8_LUT__WASMSIMD_X32, batch_eq_32) {
1089 LUTMicrokernelTester()
1090 .batch_size(32)
1091 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1092 }
1093
1094 TEST(X8_LUT__WASMSIMD_X32, batch_div_32) {
1095 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1096 LUTMicrokernelTester()
1097 .batch_size(batch_size)
1098 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1099 }
1100 }
1101
1102 TEST(X8_LUT__WASMSIMD_X32, batch_lt_32) {
1103 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1104 LUTMicrokernelTester()
1105 .batch_size(batch_size)
1106 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1107 }
1108 }
1109
1110 TEST(X8_LUT__WASMSIMD_X32, batch_gt_32) {
1111 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1112 LUTMicrokernelTester()
1113 .batch_size(batch_size)
1114 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1115 }
1116 }
1117
1118 TEST(X8_LUT__WASMSIMD_X32, inplace) {
1119 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1120 LUTMicrokernelTester()
1121 .batch_size(batch_size)
1122 .inplace(true)
1123 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1124 }
1125 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001126#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001127
1128
Marat Dukhan4c617792021-12-21 15:47:58 -08001129#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001130 TEST(X8_LUT__WASMSIMD_X48, batch_eq_48) {
1131 LUTMicrokernelTester()
1132 .batch_size(48)
1133 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1134 }
1135
1136 TEST(X8_LUT__WASMSIMD_X48, batch_div_48) {
1137 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1138 LUTMicrokernelTester()
1139 .batch_size(batch_size)
1140 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1141 }
1142 }
1143
1144 TEST(X8_LUT__WASMSIMD_X48, batch_lt_48) {
1145 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1146 LUTMicrokernelTester()
1147 .batch_size(batch_size)
1148 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1149 }
1150 }
1151
1152 TEST(X8_LUT__WASMSIMD_X48, batch_gt_48) {
1153 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1154 LUTMicrokernelTester()
1155 .batch_size(batch_size)
1156 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1157 }
1158 }
1159
1160 TEST(X8_LUT__WASMSIMD_X48, inplace) {
1161 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1162 LUTMicrokernelTester()
1163 .batch_size(batch_size)
1164 .inplace(true)
1165 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1166 }
1167 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001168#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001169
1170
Marat Dukhan4c617792021-12-21 15:47:58 -08001171#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana4ad9882021-09-18 08:06:04 -07001172 TEST(X8_LUT__WASMSIMD_X64, batch_eq_64) {
1173 LUTMicrokernelTester()
1174 .batch_size(64)
1175 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1176 }
1177
1178 TEST(X8_LUT__WASMSIMD_X64, batch_div_64) {
1179 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1180 LUTMicrokernelTester()
1181 .batch_size(batch_size)
1182 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1183 }
1184 }
1185
1186 TEST(X8_LUT__WASMSIMD_X64, batch_lt_64) {
1187 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1188 LUTMicrokernelTester()
1189 .batch_size(batch_size)
1190 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1191 }
1192 }
1193
1194 TEST(X8_LUT__WASMSIMD_X64, batch_gt_64) {
1195 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1196 LUTMicrokernelTester()
1197 .batch_size(batch_size)
1198 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1199 }
1200 }
1201
1202 TEST(X8_LUT__WASMSIMD_X64, inplace) {
1203 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1204 LUTMicrokernelTester()
1205 .batch_size(batch_size)
1206 .inplace(true)
1207 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1208 }
1209 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001210#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD