blob: c6e7c95afe3f4113eae5d4142c183860d9c60ac7 [file] [log] [blame]
Marat Dukhan2e23d2b2020-07-29 16:01:37 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2020 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <cmath>
10#include <cstddef>
11#include <cstdlib>
12
13#include <gtest/gtest.h>
14
15#include <xnnpack/common.h>
16
17#include <xnnpack/requantization-stubs.h>
18#include "requantization-tester.h"
19
20
21/*
22 * Precise scalar implementation using unsigned 32-bit arithmetics.
23 */
24
25TEST(QS8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26 for (uint32_t s = 1; s < 32; s++) {
27 RequantizationTester()
28 .qmin(std::numeric_limits<int8_t>::min())
29 .qmax(std::numeric_limits<int8_t>::max())
30 .s(s)
31 .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned32);
32 }
33}
34
35TEST(QS8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
36 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
37 zero_point <= std::numeric_limits<int8_t>::max();
38 zero_point++)
39 {
40 for (uint32_t s = 1; s < 32; s++) {
41 RequantizationTester()
42 .zero_point(zero_point)
43 .qmin(std::numeric_limits<int8_t>::min())
44 .qmax(std::numeric_limits<int8_t>::max())
45 .s(s)
46 .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned32);
47 }
48 }
49}
50
51TEST(QS8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
52 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
53 zero_point <= std::numeric_limits<int8_t>::max();
54 zero_point++)
55 {
56 for (uint32_t s = 1; s < 32; s++) {
57 RequantizationTester()
58 .zero_point(zero_point)
59 .qmin(std::numeric_limits<int8_t>::min())
60 .qmax(std::numeric_limits<int8_t>::max())
61 .s(s)
62 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__scalar_unsigned32);
63 }
64 }
65}
66
67TEST(QS8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
68 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
69 zero_point <= std::numeric_limits<int8_t>::max();
70 zero_point++)
71 {
72 for (uint32_t s = 1; s < 32; s++) {
73 RequantizationTester()
74 .zero_point(zero_point)
75 .qmin(std::numeric_limits<int8_t>::min())
76 .qmax(std::numeric_limits<int8_t>::max())
77 .s(s)
78 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__scalar_unsigned32);
79 }
80 }
81}
82
83TEST(QS8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
84 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
85 zero_point <= std::numeric_limits<int8_t>::max();
86 zero_point++)
87 {
88 for (uint32_t s = 1; s < 32; s++) {
89 RequantizationTester()
90 .zero_point(zero_point)
91 .qmin(std::numeric_limits<int8_t>::min())
92 .qmax(std::numeric_limits<int8_t>::max())
93 .s(s)
94 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__scalar_unsigned32);
95 }
96 }
97}
98
99TEST(QS8_PRECISE__SCALAR_UNSIGNED32, special_cases) {
100 RequantizationTester()
101 .qmin(std::numeric_limits<int8_t>::min())
102 .qmax(std::numeric_limits<int8_t>::max())
103 .TestSpecialCases(xnn_qs8_requantize_precise__scalar_unsigned32);
104}
105
106TEST(QS8_PRECISE__SCALAR_UNSIGNED32, random_cases) {
107 RequantizationTester()
108 .qmin(std::numeric_limits<int8_t>::min())
109 .qmax(std::numeric_limits<int8_t>::max())
110 .iterations(100)
111 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__scalar_unsigned32);
112}
113
114
115/*
116 * Precise scalar implementation using unsigned 64-bit arithmetics.
117 */
118
119TEST(QS8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2) {
120 for (uint32_t s = 1; s < 32; s++) {
121 RequantizationTester()
122 .qmin(std::numeric_limits<int8_t>::min())
123 .qmax(std::numeric_limits<int8_t>::max())
124 .s(s)
125 .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned64);
126 }
127}
128
129TEST(QS8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
130 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
131 zero_point <= std::numeric_limits<int8_t>::max();
132 zero_point++)
133 {
134 for (uint32_t s = 1; s < 32; s++) {
135 RequantizationTester()
136 .zero_point(zero_point)
137 .qmin(std::numeric_limits<int8_t>::min())
138 .qmax(std::numeric_limits<int8_t>::max())
139 .s(s)
140 .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned64);
141 }
142 }
143}
144
145TEST(QS8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
146 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
147 zero_point <= std::numeric_limits<int8_t>::max();
148 zero_point++)
149 {
150 for (uint32_t s = 1; s < 32; s++) {
151 RequantizationTester()
152 .zero_point(zero_point)
153 .qmin(std::numeric_limits<int8_t>::min())
154 .qmax(std::numeric_limits<int8_t>::max())
155 .s(s)
156 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__scalar_unsigned64);
157 }
158 }
159}
160
161TEST(QS8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
162 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
163 zero_point <= std::numeric_limits<int8_t>::max();
164 zero_point++)
165 {
166 for (uint32_t s = 1; s < 32; s++) {
167 RequantizationTester()
168 .zero_point(zero_point)
169 .qmin(std::numeric_limits<int8_t>::min())
170 .qmax(std::numeric_limits<int8_t>::max())
171 .s(s)
172 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__scalar_unsigned64);
173 }
174 }
175}
176
177TEST(QS8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
178 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
179 zero_point <= std::numeric_limits<int8_t>::max();
180 zero_point++)
181 {
182 for (uint32_t s = 1; s < 32; s++) {
183 RequantizationTester()
184 .zero_point(zero_point)
185 .qmin(std::numeric_limits<int8_t>::min())
186 .qmax(std::numeric_limits<int8_t>::max())
187 .s(s)
188 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__scalar_unsigned64);
189 }
190 }
191}
192
193TEST(QS8_PRECISE__SCALAR_UNSIGNED64, special_cases) {
194 RequantizationTester()
195 .qmin(std::numeric_limits<int8_t>::min())
196 .qmax(std::numeric_limits<int8_t>::max())
197 .TestSpecialCases(xnn_qs8_requantize_precise__scalar_unsigned64);
198}
199
200TEST(QS8_PRECISE__SCALAR_UNSIGNED64, random_cases) {
201 RequantizationTester()
202 .qmin(std::numeric_limits<int8_t>::min())
203 .qmax(std::numeric_limits<int8_t>::max())
204 .iterations(100)
205 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__scalar_unsigned64);
206}
207
208
209/*
210 * Precise scalar implementation using signed 64-bit arithmetics.
211 */
212
213TEST(QS8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2) {
214 for (uint32_t s = 1; s < 32; s++) {
215 RequantizationTester()
216 .qmin(std::numeric_limits<int8_t>::min())
217 .qmax(std::numeric_limits<int8_t>::max())
218 .s(s)
219 .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_signed64);
220 }
221}
222
223TEST(QS8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
224 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
225 zero_point <= std::numeric_limits<int8_t>::max();
226 zero_point++)
227 {
228 for (uint32_t s = 1; s < 32; s++) {
229 RequantizationTester()
230 .zero_point(zero_point)
231 .qmin(std::numeric_limits<int8_t>::min())
232 .qmax(std::numeric_limits<int8_t>::max())
233 .s(s)
234 .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_signed64);
235 }
236 }
237}
238
239TEST(QS8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
240 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
241 zero_point <= std::numeric_limits<int8_t>::max();
242 zero_point++)
243 {
244 for (uint32_t s = 1; s < 32; s++) {
245 RequantizationTester()
246 .zero_point(zero_point)
247 .qmin(std::numeric_limits<int8_t>::min())
248 .qmax(std::numeric_limits<int8_t>::max())
249 .s(s)
250 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__scalar_signed64);
251 }
252 }
253}
254
255TEST(QS8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
256 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
257 zero_point <= std::numeric_limits<int8_t>::max();
258 zero_point++)
259 {
260 for (uint32_t s = 1; s < 32; s++) {
261 RequantizationTester()
262 .zero_point(zero_point)
263 .qmin(std::numeric_limits<int8_t>::min())
264 .qmax(std::numeric_limits<int8_t>::max())
265 .s(s)
266 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__scalar_signed64);
267 }
268 }
269}
270
271TEST(QS8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
272 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
273 zero_point <= std::numeric_limits<int8_t>::max();
274 zero_point++)
275 {
276 for (uint32_t s = 1; s < 32; s++) {
277 RequantizationTester()
278 .zero_point(zero_point)
279 .qmin(std::numeric_limits<int8_t>::min())
280 .qmax(std::numeric_limits<int8_t>::max())
281 .s(s)
282 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__scalar_signed64);
283 }
284 }
285}
286
287TEST(QS8_PRECISE__SCALAR_SIGNED64, special_cases) {
288 RequantizationTester()
289 .qmin(std::numeric_limits<int8_t>::min())
290 .qmax(std::numeric_limits<int8_t>::max())
291 .TestSpecialCases(xnn_qs8_requantize_precise__scalar_signed64);
292}
293
294TEST(QS8_PRECISE__SCALAR_SIGNED64, random_cases) {
295 RequantizationTester()
296 .qmin(std::numeric_limits<int8_t>::min())
297 .qmax(std::numeric_limits<int8_t>::max())
298 .iterations(100)
299 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__scalar_signed64);
300}
301
302
303/*
304 * FP32-based scalar implementation using lrintf function.
305 */
306
307TEST(QS8_FP32__SCALAR_LRINTF, random_cases) {
308 RequantizationTester()
309 .qmin(std::numeric_limits<int8_t>::min())
310 .qmax(std::numeric_limits<int8_t>::max())
311 .iterations(1000)
312 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__scalar_lrintf);
313}
314
315
316/*
317 * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
318 */
319
320TEST(QS8_FP32__SCALAR_MAGIC, random_cases) {
321 RequantizationTester()
322 .qmin(std::numeric_limits<int8_t>::min())
323 .qmax(std::numeric_limits<int8_t>::max())
324 .iterations(1000)
325 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__scalar_magic);
326}
327
328
329/*
330 * Q31-based scalar implementation.
331 */
332
333TEST(QS8_Q31__SCALAR, exact_divide_by_po2) {
334 for (uint32_t s = 1; s < 32; s++) {
335 RequantizationTester()
336 .qmin(std::numeric_limits<int8_t>::min())
337 .qmax(std::numeric_limits<int8_t>::max())
338 .s(s)
339 .TestExactDivideByPO2(xnn_qs8_requantize_q31__scalar);
340 }
341}
342
343TEST(QS8_Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
344 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
345 zero_point <= std::numeric_limits<int8_t>::max();
346 zero_point++)
347 {
348 for (uint32_t s = 1; s < 32; s++) {
349 RequantizationTester()
350 .zero_point(zero_point)
351 .qmin(std::numeric_limits<int8_t>::min())
352 .qmax(std::numeric_limits<int8_t>::max())
353 .s(s)
354 .TestExactDivideByPO2(xnn_qs8_requantize_q31__scalar);
355 }
356 }
357}
358
359TEST(QS8_Q31__SCALAR, divide_by_po2_with_rounding_up) {
360 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
361 zero_point <= std::numeric_limits<int8_t>::max();
362 zero_point++)
363 {
364 for (uint32_t s = 1; s < 32; s++) {
365 RequantizationTester()
366 .zero_point(zero_point)
367 .qmin(std::numeric_limits<int8_t>::min())
368 .qmax(std::numeric_limits<int8_t>::max())
369 .s(s)
370 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__scalar);
371 }
372 }
373}
374
375/* No rounding down test - it fails because of upward bias in multiplication */
376/* No rounding away test - it fails because of upward bias in multiplication */
377
378TEST(QS8_Q31__SCALAR, special_cases) {
379 RequantizationTester()
380 .qmin(std::numeric_limits<int8_t>::min())
381 .qmax(std::numeric_limits<int8_t>::max())
382 .TestSpecialCases(xnn_qs8_requantize_q31__scalar);
383}
384
385TEST(QS8_Q31__SCALAR, random_cases) {
386 RequantizationTester()
387 .qmin(std::numeric_limits<int8_t>::min())
388 .qmax(std::numeric_limits<int8_t>::max())
389 .iterations(100)
390 .TestRandomCasesApproximate(xnn_qs8_requantize_q31__scalar);
391}
392
393
Marat Dukhan2e23d2b2020-07-29 16:01:37 -0700394#if XNN_ARCH_X86 || XNN_ARCH_X86_64
395 /*
396 * Precise SSE2 implementation using floating-point shuffle.
397 */
398
399 TEST(QS8_PRECISE__SSE2, exact_divide_by_po2) {
400 for (uint32_t s = 1; s < 32; s++) {
401 RequantizationTester()
402 .qmin(std::numeric_limits<int8_t>::min())
403 .qmax(std::numeric_limits<int8_t>::max())
404 .s(s)
405 .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse2);
406 }
407 }
408
409 TEST(QS8_PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
410 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
411 zero_point <= std::numeric_limits<int8_t>::max();
412 zero_point++)
413 {
414 for (uint32_t s = 1; s < 32; s++) {
415 RequantizationTester()
416 .zero_point(zero_point)
417 .qmin(std::numeric_limits<int8_t>::min())
418 .qmax(std::numeric_limits<int8_t>::max())
419 .s(s)
420 .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse2);
421 }
422 }
423 }
424
425 TEST(QS8_PRECISE__SSE2, divide_by_po2_with_rounding_up) {
426 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
427 zero_point <= std::numeric_limits<int8_t>::max();
428 zero_point++)
429 {
430 for (uint32_t s = 1; s < 32; s++) {
431 RequantizationTester()
432 .zero_point(zero_point)
433 .qmin(std::numeric_limits<int8_t>::min())
434 .qmax(std::numeric_limits<int8_t>::max())
435 .s(s)
436 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__sse2);
437 }
438 }
439 }
440
441 TEST(QS8_PRECISE__SSE2, divide_by_po2_with_rounding_down) {
442 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
443 zero_point <= std::numeric_limits<int8_t>::max();
444 zero_point++)
445 {
446 for (uint32_t s = 1; s < 32; s++) {
447 RequantizationTester()
448 .zero_point(zero_point)
449 .qmin(std::numeric_limits<int8_t>::min())
450 .qmax(std::numeric_limits<int8_t>::max())
451 .s(s)
452 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__sse2);
453 }
454 }
455 }
456
457 TEST(QS8_PRECISE__SSE2, divide_by_po2_with_rounding_away) {
458 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
459 zero_point <= std::numeric_limits<int8_t>::max();
460 zero_point++)
461 {
462 for (uint32_t s = 1; s < 32; s++) {
463 RequantizationTester()
464 .zero_point(zero_point)
465 .qmin(std::numeric_limits<int8_t>::min())
466 .qmax(std::numeric_limits<int8_t>::max())
467 .s(s)
468 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__sse2);
469 }
470 }
471 }
472
473 TEST(QS8_PRECISE__SSE2, special_cases) {
474 RequantizationTester()
475 .qmin(std::numeric_limits<int8_t>::min())
476 .qmax(std::numeric_limits<int8_t>::max())
477 .TestSpecialCases(xnn_qs8_requantize_precise__sse2);
478 }
479
480 TEST(QS8_PRECISE__SSE2, random_cases) {
481 RequantizationTester()
482 .qmin(std::numeric_limits<int8_t>::min())
483 .qmax(std::numeric_limits<int8_t>::max())
484 .iterations(100)
485 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__sse2);
486 }
487
488
489 /*
490 * Precise SSSE3 implementation using floating-point shuffle.
491 */
492
493 TEST(QS8_PRECISE__SSSE3, exact_divide_by_po2) {
494 for (uint32_t s = 1; s < 32; s++) {
495 RequantizationTester()
496 .qmin(std::numeric_limits<int8_t>::min())
497 .qmax(std::numeric_limits<int8_t>::max())
498 .s(s)
499 .TestExactDivideByPO2(xnn_qs8_requantize_precise__ssse3);
500 }
501 }
502
503 TEST(QS8_PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
504 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
505 zero_point <= std::numeric_limits<int8_t>::max();
506 zero_point++)
507 {
508 for (uint32_t s = 1; s < 32; s++) {
509 RequantizationTester()
510 .zero_point(zero_point)
511 .qmin(std::numeric_limits<int8_t>::min())
512 .qmax(std::numeric_limits<int8_t>::max())
513 .s(s)
514 .TestExactDivideByPO2(xnn_qs8_requantize_precise__ssse3);
515 }
516 }
517 }
518
519 TEST(QS8_PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
520 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
521 zero_point <= std::numeric_limits<int8_t>::max();
522 zero_point++)
523 {
524 for (uint32_t s = 1; s < 32; s++) {
525 RequantizationTester()
526 .zero_point(zero_point)
527 .qmin(std::numeric_limits<int8_t>::min())
528 .qmax(std::numeric_limits<int8_t>::max())
529 .s(s)
530 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__ssse3);
531 }
532 }
533 }
534
535 TEST(QS8_PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
536 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
537 zero_point <= std::numeric_limits<int8_t>::max();
538 zero_point++)
539 {
540 for (uint32_t s = 1; s < 32; s++) {
541 RequantizationTester()
542 .zero_point(zero_point)
543 .qmin(std::numeric_limits<int8_t>::min())
544 .qmax(std::numeric_limits<int8_t>::max())
545 .s(s)
546 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__ssse3);
547 }
548 }
549 }
550
551 TEST(QS8_PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
552 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
553 zero_point <= std::numeric_limits<int8_t>::max();
554 zero_point++)
555 {
556 for (uint32_t s = 1; s < 32; s++) {
557 RequantizationTester()
558 .zero_point(zero_point)
559 .qmin(std::numeric_limits<int8_t>::min())
560 .qmax(std::numeric_limits<int8_t>::max())
561 .s(s)
562 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__ssse3);
563 }
564 }
565 }
566
567 TEST(QS8_PRECISE__SSSE3, special_cases) {
568 RequantizationTester()
569 .qmin(std::numeric_limits<int8_t>::min())
570 .qmax(std::numeric_limits<int8_t>::max())
571 .TestSpecialCases(xnn_qs8_requantize_precise__ssse3);
572 }
573
574 TEST(QS8_PRECISE__SSSE3, random_cases) {
575 RequantizationTester()
576 .qmin(std::numeric_limits<int8_t>::min())
577 .qmax(std::numeric_limits<int8_t>::max())
578 .iterations(100)
579 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__ssse3);
580 }
581
582
583 /*
584 * Precise SSE4.1 implementation using static blend instruction.
585 */
586
587 TEST(QS8_PRECISE__SSE4, exact_divide_by_po2) {
588 for (uint32_t s = 1; s < 32; s++) {
589 RequantizationTester()
590 .qmin(std::numeric_limits<int8_t>::min())
591 .qmax(std::numeric_limits<int8_t>::max())
592 .s(s)
593 .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse4);
594 }
595 }
596
597 TEST(QS8_PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
598 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
599 zero_point <= std::numeric_limits<int8_t>::max();
600 zero_point++)
601 {
602 for (uint32_t s = 1; s < 32; s++) {
603 RequantizationTester()
604 .zero_point(zero_point)
605 .qmin(std::numeric_limits<int8_t>::min())
606 .qmax(std::numeric_limits<int8_t>::max())
607 .s(s)
608 .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse4);
609 }
610 }
611 }
612
613 TEST(QS8_PRECISE__SSE4, divide_by_po2_with_rounding_up) {
614 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
615 zero_point <= std::numeric_limits<int8_t>::max();
616 zero_point++)
617 {
618 for (uint32_t s = 1; s < 32; s++) {
619 RequantizationTester()
620 .zero_point(zero_point)
621 .qmin(std::numeric_limits<int8_t>::min())
622 .qmax(std::numeric_limits<int8_t>::max())
623 .s(s)
624 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__sse4);
625 }
626 }
627 }
628
629 TEST(QS8_PRECISE__SSE4, divide_by_po2_with_rounding_down) {
630 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
631 zero_point <= std::numeric_limits<int8_t>::max();
632 zero_point++)
633 {
634 for (uint32_t s = 1; s < 32; s++) {
635 RequantizationTester()
636 .zero_point(zero_point)
637 .qmin(std::numeric_limits<int8_t>::min())
638 .qmax(std::numeric_limits<int8_t>::max())
639 .s(s)
640 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__sse4);
641 }
642 }
643 }
644
645 TEST(QS8_PRECISE__SSE4, divide_by_po2_with_rounding_away) {
646 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
647 zero_point <= std::numeric_limits<int8_t>::max();
648 zero_point++)
649 {
650 for (uint32_t s = 1; s < 32; s++) {
651 RequantizationTester()
652 .zero_point(zero_point)
653 .qmin(std::numeric_limits<int8_t>::min())
654 .qmax(std::numeric_limits<int8_t>::max())
655 .s(s)
656 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__sse4);
657 }
658 }
659 }
660
661 TEST(QS8_PRECISE__SSE4, special_cases) {
662 RequantizationTester()
663 .qmin(std::numeric_limits<int8_t>::min())
664 .qmax(std::numeric_limits<int8_t>::max())
665 .TestSpecialCases(xnn_qs8_requantize_precise__sse4);
666 }
667
668 TEST(QS8_PRECISE__SSE4, random_cases) {
669 RequantizationTester()
670 .qmin(std::numeric_limits<int8_t>::min())
671 .qmax(std::numeric_limits<int8_t>::max())
672 .iterations(100)
673 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__sse4);
674 }
675
676
677 /*
678 * FP32-based x86 SSE2 implementation.
679 */
680
681 TEST(QS8_FP32__SSE2, random_cases) {
682 RequantizationTester()
683 .qmin(std::numeric_limits<int8_t>::min())
684 .qmax(std::numeric_limits<int8_t>::max())
685 .iterations(1000)
686 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__sse2);
687 }
688
689
690 /*
691 * FP32-based x86 SSE4 implementation.
692 */
693
694 TEST(QS8_FP32__SSE4, random_cases) {
695 RequantizationTester()
696 .qmin(std::numeric_limits<int8_t>::min())
697 .qmax(std::numeric_limits<int8_t>::max())
698 .iterations(1000)
699 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__sse4);
700 }
701
702
703 /*
704 * Q31-based x86 SSE2 implementation.
705 */
706
707 TEST(QS8_Q31__SSE2, exact_divide_by_po2) {
708 for (uint32_t s = 1; s < 32; s++) {
709 RequantizationTester()
710 .qmin(std::numeric_limits<int8_t>::min())
711 .qmax(std::numeric_limits<int8_t>::max())
712 .s(s)
713 .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse2);
714 }
715 }
716
717 TEST(QS8_Q31__SSE2, exact_divide_by_po2_with_zero_point) {
718 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
719 zero_point <= std::numeric_limits<int8_t>::max();
720 zero_point++)
721 {
722 for (uint32_t s = 1; s < 32; s++) {
723 RequantizationTester()
724 .zero_point(zero_point)
725 .qmin(std::numeric_limits<int8_t>::min())
726 .qmax(std::numeric_limits<int8_t>::max())
727 .s(s)
728 .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse2);
729 }
730 }
731 }
732
733 TEST(QS8_Q31__SSE2, divide_by_po2_with_rounding_up) {
734 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
735 zero_point <= std::numeric_limits<int8_t>::max();
736 zero_point++)
737 {
738 for (uint32_t s = 1; s < 32; s++) {
739 RequantizationTester()
740 .zero_point(zero_point)
741 .qmin(std::numeric_limits<int8_t>::min())
742 .qmax(std::numeric_limits<int8_t>::max())
743 .s(s)
744 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__sse2);
745 }
746 }
747 }
748
749 /* No rounding down test - it fails because of upward bias in multiplication */
750 /* No rounding away test - it fails because of upward bias in multiplication */
751
752 TEST(QS8_Q31__SSE2, special_cases) {
753 RequantizationTester()
754 .qmin(std::numeric_limits<int8_t>::min())
755 .qmax(std::numeric_limits<int8_t>::max())
756 .TestSpecialCases(xnn_qs8_requantize_q31__sse2);
757 }
758
759 TEST(QS8_Q31__SSE2, random_cases) {
760 RequantizationTester()
761 .qmin(std::numeric_limits<int8_t>::min())
762 .qmax(std::numeric_limits<int8_t>::max())
763 .iterations(100)
764 .TestRandomCasesApproximate(xnn_qs8_requantize_q31__sse2);
765 }
766
767
768 /*
769 * Q31-based x86 SSSE3 implementation.
770 */
771
772 TEST(QS8_Q31__SSSE3, exact_divide_by_po2) {
773 for (uint32_t s = 1; s < 32; s++) {
774 RequantizationTester()
775 .qmin(std::numeric_limits<int8_t>::min())
776 .qmax(std::numeric_limits<int8_t>::max())
777 .s(s)
778 .TestExactDivideByPO2(xnn_qs8_requantize_q31__ssse3);
779 }
780 }
781
782 TEST(QS8_Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
783 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
784 zero_point <= std::numeric_limits<int8_t>::max();
785 zero_point++)
786 {
787 for (uint32_t s = 1; s < 32; s++) {
788 RequantizationTester()
789 .zero_point(zero_point)
790 .qmin(std::numeric_limits<int8_t>::min())
791 .qmax(std::numeric_limits<int8_t>::max())
792 .s(s)
793 .TestExactDivideByPO2(xnn_qs8_requantize_q31__ssse3);
794 }
795 }
796 }
797
798 TEST(QS8_Q31__SSSE3, divide_by_po2_with_rounding_up) {
799 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
800 zero_point <= std::numeric_limits<int8_t>::max();
801 zero_point++)
802 {
803 for (uint32_t s = 1; s < 32; s++) {
804 RequantizationTester()
805 .zero_point(zero_point)
806 .qmin(std::numeric_limits<int8_t>::min())
807 .qmax(std::numeric_limits<int8_t>::max())
808 .s(s)
809 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__ssse3);
810 }
811 }
812 }
813
814 /* No rounding down test - it fails because of upward bias in multiplication */
815 /* No rounding away test - it fails because of upward bias in multiplication */
816
817 TEST(QS8_Q31__SSSE3, special_cases) {
818 RequantizationTester()
819 .qmin(std::numeric_limits<int8_t>::min())
820 .qmax(std::numeric_limits<int8_t>::max())
821 .TestSpecialCases(xnn_qs8_requantize_q31__ssse3);
822 }
823
824 TEST(QS8_Q31__SSSE3, random_cases) {
825 RequantizationTester()
826 .qmin(std::numeric_limits<int8_t>::min())
827 .qmax(std::numeric_limits<int8_t>::max())
828 .iterations(100)
829 .TestRandomCasesApproximate(xnn_qs8_requantize_q31__ssse3);
830 }
831
832
833 /*
834 * Q31-based x86 SSE4 implementation.
835 */
836
837 TEST(QS8_Q31__SSE4, exact_divide_by_po2) {
838 for (uint32_t s = 1; s < 32; s++) {
839 RequantizationTester()
840 .qmin(std::numeric_limits<int8_t>::min())
841 .qmax(std::numeric_limits<int8_t>::max())
842 .s(s)
843 .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse4);
844 }
845 }
846
847 TEST(QS8_Q31__SSE4, exact_divide_by_po2_with_zero_point) {
848 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
849 zero_point <= std::numeric_limits<int8_t>::max();
850 zero_point++)
851 {
852 for (uint32_t s = 1; s < 32; s++) {
853 RequantizationTester()
854 .zero_point(zero_point)
855 .qmin(std::numeric_limits<int8_t>::min())
856 .qmax(std::numeric_limits<int8_t>::max())
857 .s(s)
858 .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse4);
859 }
860 }
861 }
862
863 TEST(QS8_Q31__SSE4, divide_by_po2_with_rounding_up) {
864 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
865 zero_point <= std::numeric_limits<int8_t>::max();
866 zero_point++)
867 {
868 for (uint32_t s = 1; s < 32; s++) {
869 RequantizationTester()
870 .zero_point(zero_point)
871 .qmin(std::numeric_limits<int8_t>::min())
872 .qmax(std::numeric_limits<int8_t>::max())
873 .s(s)
874 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__sse4);
875 }
876 }
877 }
878
879 /* No rounding down test - it fails because of upward bias in multiplication */
880 /* No rounding away test - it fails because of upward bias in multiplication */
881
882 TEST(QS8_Q31__SSE4, special_cases) {
883 RequantizationTester()
884 .qmin(std::numeric_limits<int8_t>::min())
885 .qmax(std::numeric_limits<int8_t>::max())
886 .TestSpecialCases(xnn_qs8_requantize_q31__sse4);
887 }
888
889 TEST(QS8_Q31__SSE4, random_cases) {
890 RequantizationTester()
891 .qmin(std::numeric_limits<int8_t>::min())
892 .qmax(std::numeric_limits<int8_t>::max())
893 .iterations(100)
894 .TestRandomCasesApproximate(xnn_qs8_requantize_q31__sse4);
895 }
896#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
897
898#if XNN_ARCH_ARM || XNN_ARCH_ARM64
899 /*
900 * Precise ARM NEON implementation.
901 */
902
903 TEST(QS8_PRECISE__NEON, exact_divide_by_po2) {
904 for (uint32_t s = 1; s < 32; s++) {
905 RequantizationTester()
906 .s(s)
907 .qmin(std::numeric_limits<int8_t>::min())
908 .qmax(std::numeric_limits<int8_t>::max())
909 .TestExactDivideByPO2(xnn_qs8_requantize_precise__neon);
910 }
911 }
912
913 TEST(QS8_PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
914 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
915 zero_point <= std::numeric_limits<int8_t>::max();
916 zero_point++)
917 {
918 for (uint32_t s = 1; s < 32; s++) {
919 RequantizationTester()
920 .zero_point(zero_point)
921 .qmin(std::numeric_limits<int8_t>::min())
922 .qmax(std::numeric_limits<int8_t>::max())
923 .s(s)
924 .TestExactDivideByPO2(xnn_qs8_requantize_precise__neon);
925 }
926 }
927 }
928
929 TEST(QS8_PRECISE__NEON, divide_by_po2_with_rounding_up) {
930 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
931 zero_point <= std::numeric_limits<int8_t>::max();
932 zero_point++)
933 {
934 for (uint32_t s = 1; s < 32; s++) {
935 RequantizationTester()
936 .zero_point(zero_point)
937 .qmin(std::numeric_limits<int8_t>::min())
938 .qmax(std::numeric_limits<int8_t>::max())
939 .s(s)
940 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__neon);
941 }
942 }
943 }
944
945 TEST(QS8_PRECISE__NEON, divide_by_po2_with_rounding_down) {
946 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
947 zero_point <= std::numeric_limits<int8_t>::max();
948 zero_point++)
949 {
950 for (uint32_t s = 1; s < 32; s++) {
951 RequantizationTester()
952 .zero_point(zero_point)
953 .qmin(std::numeric_limits<int8_t>::min())
954 .qmax(std::numeric_limits<int8_t>::max())
955 .s(s)
956 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__neon);
957 }
958 }
959 }
960
961 TEST(QS8_PRECISE__NEON, divide_by_po2_with_rounding_away) {
962 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
963 zero_point <= std::numeric_limits<int8_t>::max();
964 zero_point++)
965 {
966 for (uint32_t s = 1; s < 32; s++) {
967 RequantizationTester()
968 .zero_point(zero_point)
969 .qmin(std::numeric_limits<int8_t>::min())
970 .qmax(std::numeric_limits<int8_t>::max())
971 .s(s)
972 .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__neon);
973 }
974 }
975 }
976
977 TEST(QS8_PRECISE__NEON, special_cases) {
978 RequantizationTester()
979 .qmin(std::numeric_limits<int8_t>::min())
980 .qmax(std::numeric_limits<int8_t>::max())
981 .TestSpecialCases(xnn_qs8_requantize_precise__neon);
982 }
983
984 TEST(QS8_PRECISE__NEON, random_cases) {
985 RequantizationTester()
986 .qmin(std::numeric_limits<int8_t>::min())
987 .qmax(std::numeric_limits<int8_t>::max())
988 .iterations(100)
989 .TestRandomCasesPrecise(xnn_qs8_requantize_precise__neon);
990 }
991
992
993 /*
994 * FP32-based ARM NEON implementation.
995 */
996
997 TEST(QS8_FP32__NEON, random_cases) {
998 RequantizationTester()
999 .qmin(std::numeric_limits<int8_t>::min())
1000 .qmax(std::numeric_limits<int8_t>::max())
1001 .iterations(1000)
1002 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__neon);
1003 }
1004
1005
1006 /*
1007 * Q31-based ARM NEON implementation.
1008 */
1009
1010 TEST(QS8_Q31__NEON, exact_divide_by_po2) {
1011 for (uint32_t s = 1; s < 32; s++) {
1012 RequantizationTester()
1013 .qmin(std::numeric_limits<int8_t>::min())
1014 .qmax(std::numeric_limits<int8_t>::max())
1015 .s(s)
1016 .TestExactDivideByPO2(xnn_qs8_requantize_q31__neon);
1017 }
1018 }
1019
1020 TEST(QS8_Q31__NEON, exact_divide_by_po2_with_zero_point) {
1021 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1022 zero_point <= std::numeric_limits<int8_t>::max();
1023 zero_point++)
1024 {
1025 for (uint32_t s = 1; s < 32; s++) {
1026 RequantizationTester()
1027 .zero_point(zero_point)
1028 .qmin(std::numeric_limits<int8_t>::min())
1029 .qmax(std::numeric_limits<int8_t>::max())
1030 .s(s)
1031 .TestExactDivideByPO2(xnn_qs8_requantize_q31__neon);
1032 }
1033 }
1034 }
1035
1036 TEST(QS8_Q31__NEON, divide_by_po2_with_rounding_up) {
1037 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1038 zero_point <= std::numeric_limits<int8_t>::max();
1039 zero_point++)
1040 {
1041 for (uint32_t s = 1; s < 32; s++) {
1042 RequantizationTester()
1043 .zero_point(zero_point)
1044 .qmin(std::numeric_limits<int8_t>::min())
1045 .qmax(std::numeric_limits<int8_t>::max())
1046 .s(s)
1047 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__neon);
1048 }
1049 }
1050 }
1051
1052 /* No rounding down test - it fails because of upward bias in multiplication */
1053 /* No rounding away test - it fails because of upward bias in multiplication */
1054
1055 TEST(QS8_Q31__NEON, special_cases) {
1056 RequantizationTester()
1057 .qmin(std::numeric_limits<int8_t>::min())
1058 .qmax(std::numeric_limits<int8_t>::max())
1059 .TestSpecialCases(xnn_qs8_requantize_q31__neon);
1060 }
1061
1062 TEST(QS8_Q31__NEON, random_cases) {
1063 RequantizationTester()
1064 .qmin(std::numeric_limits<int8_t>::min())
1065 .qmax(std::numeric_limits<int8_t>::max())
1066 .iterations(100)
1067 .TestRandomCasesApproximate(xnn_qs8_requantize_q31__neon);
1068 }
1069#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1070
1071#if XNN_ARCH_WASMSIMD
1072 /*
Marat Dukhan138560c2020-08-03 18:57:34 -07001073 * FP32-based WAsm SIMD implementation.
Marat Dukhan2e23d2b2020-07-29 16:01:37 -07001074 */
1075
1076 TEST(QS8_FP32__WASMSIMD, random_cases) {
1077 RequantizationTester()
1078 .qmin(std::numeric_limits<int8_t>::min())
1079 .qmax(std::numeric_limits<int8_t>::max())
1080 .iterations(1000)
1081 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__wasmsimd);
1082 }
Marat Dukhan138560c2020-08-03 18:57:34 -07001083
1084 /*
1085 * Q31-based WAsm SIMD implementation.
1086 */
1087
1088 TEST(QS8_Q31__WASMSIMD, exact_divide_by_po2) {
1089 for (uint32_t s = 1; s < 32; s++) {
1090 RequantizationTester()
1091 .qmin(std::numeric_limits<int8_t>::min())
1092 .qmax(std::numeric_limits<int8_t>::max())
1093 .s(s)
1094 .TestExactDivideByPO2(xnn_qs8_requantize_q31__wasmsimd);
1095 }
1096 }
1097
1098 TEST(QS8_Q31__WASMSIMD, exact_divide_by_po2_with_zero_point) {
1099 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1100 zero_point <= std::numeric_limits<int8_t>::max();
1101 zero_point++)
1102 {
1103 for (uint32_t s = 1; s < 32; s++) {
1104 RequantizationTester()
1105 .zero_point(zero_point)
1106 .qmin(std::numeric_limits<int8_t>::min())
1107 .qmax(std::numeric_limits<int8_t>::max())
1108 .s(s)
1109 .TestExactDivideByPO2(xnn_qs8_requantize_q31__wasmsimd);
1110 }
1111 }
1112 }
1113
1114 TEST(QS8_Q31__WASMSIMD, divide_by_po2_with_rounding_up) {
1115 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1116 zero_point <= std::numeric_limits<int8_t>::max();
1117 zero_point++)
1118 {
1119 for (uint32_t s = 1; s < 32; s++) {
1120 RequantizationTester()
1121 .zero_point(zero_point)
1122 .qmin(std::numeric_limits<int8_t>::min())
1123 .qmax(std::numeric_limits<int8_t>::max())
1124 .s(s)
1125 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__wasmsimd);
1126 }
1127 }
1128 }
1129
1130 /* No rounding down test - it fails because of upward bias in multiplication */
1131 /* No rounding away test - it fails because of upward bias in multiplication */
1132
1133 TEST(QS8_Q31__WASMSIMD, special_cases) {
1134 RequantizationTester()
1135 .qmin(std::numeric_limits<int8_t>::min())
1136 .qmax(std::numeric_limits<int8_t>::max())
1137 .TestSpecialCases(xnn_qs8_requantize_q31__wasmsimd);
1138 }
1139
1140 TEST(QS8_Q31__WASMSIMD, random_cases) {
1141 RequantizationTester()
1142 .qmin(std::numeric_limits<int8_t>::min())
1143 .qmax(std::numeric_limits<int8_t>::max())
1144 .iterations(100)
1145 .TestRandomCasesApproximate(xnn_qs8_requantize_q31__wasmsimd);
1146 }
Marat Dukhan2e23d2b2020-07-29 16:01:37 -07001147#endif // XNN_ARCH_WASMSIMD