blob: 2337722149cb2c8300067e89afeeac03a4543181 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <cmath>
10#include <cstddef>
11#include <cstdlib>
12
XNNPACK Teamb455b122019-09-27 18:10:33 -070013#include <gtest/gtest.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014
Marat Dukhan1dadbf72019-10-01 10:46:20 -070015#include <xnnpack/common.h>
16
17#include <xnnpack/requantization-stubs.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070018#include "requantization-tester.h"
19
20
21/*
22 * Precise scalar implementation using unsigned 32-bit arithmetics.
23 */
24
25TEST(PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26 for (uint32_t s = 1; s < 32; s++) {
27 RequantizationTester()
28 .s(s)
29 .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
30 }
31}
32
33TEST(PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
34 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
35 for (uint32_t s = 1; s < 32; s++) {
36 RequantizationTester()
37 .zeroPoint(zeroPoint)
38 .s(s)
39 .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
40 }
41 }
42}
43
44TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
45 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
46 for (uint32_t s = 1; s < 32; s++) {
47 RequantizationTester()
48 .zeroPoint(zeroPoint)
49 .s(s)
50 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned32);
51 }
52 }
53}
54
55TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
56 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
57 for (uint32_t s = 1; s < 32; s++) {
58 RequantizationTester()
59 .zeroPoint(zeroPoint)
60 .s(s)
61 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned32);
62 }
63 }
64}
65
66TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
67 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
68 for (uint32_t s = 1; s < 32; s++) {
69 RequantizationTester()
70 .zeroPoint(zeroPoint)
71 .s(s)
72 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned32);
73 }
74 }
75}
76
77TEST(PRECISE__SCALAR_UNSIGNED32, special_cases) {
78 RequantizationTester()
79 .testSpecialCases(xnn_requantize_precise__scalar_unsigned32);
80}
81
82TEST(PRECISE__SCALAR_UNSIGNED32, random_cases) {
83 RequantizationTester()
84 .iterations(100)
85 .testRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned32);
86}
87
88
89/*
90 * Precise scalar implementation using unsigned 64-bit arithmetics.
91 */
92
93TEST(PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2) {
94 for (uint32_t s = 1; s < 32; s++) {
95 RequantizationTester()
96 .s(s)
97 .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
98 }
99}
100
101TEST(PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
102 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
103 for (uint32_t s = 1; s < 32; s++) {
104 RequantizationTester()
105 .zeroPoint(zeroPoint)
106 .s(s)
107 .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
108 }
109 }
110}
111
112TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
113 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
114 for (uint32_t s = 1; s < 32; s++) {
115 RequantizationTester()
116 .zeroPoint(zeroPoint)
117 .s(s)
118 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned64);
119 }
120 }
121}
122
123TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
124 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
125 for (uint32_t s = 1; s < 32; s++) {
126 RequantizationTester()
127 .zeroPoint(zeroPoint)
128 .s(s)
129 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned64);
130 }
131 }
132}
133
134TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
135 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
136 for (uint32_t s = 1; s < 32; s++) {
137 RequantizationTester()
138 .zeroPoint(zeroPoint)
139 .s(s)
140 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned64);
141 }
142 }
143}
144
145TEST(PRECISE__SCALAR_UNSIGNED64, special_cases) {
146 RequantizationTester()
147 .testSpecialCases(xnn_requantize_precise__scalar_unsigned64);
148}
149
150TEST(PRECISE__SCALAR_UNSIGNED64, random_cases) {
151 RequantizationTester()
152 .iterations(100)
153 .testRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned64);
154}
155
156
157/*
158 * Precise scalar implementation using signed 64-bit arithmetics.
159 */
160
161TEST(PRECISE__SCALAR_SIGNED64, exact_divide_by_po2) {
162 for (uint32_t s = 1; s < 32; s++) {
163 RequantizationTester()
164 .s(s)
165 .testExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
166 }
167}
168
169TEST(PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
170 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
171 for (uint32_t s = 1; s < 32; s++) {
172 RequantizationTester()
173 .zeroPoint(zeroPoint)
174 .s(s)
175 .testExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
176 }
177 }
178}
179
180TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
181 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
182 for (uint32_t s = 1; s < 32; s++) {
183 RequantizationTester()
184 .zeroPoint(zeroPoint)
185 .s(s)
186 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_signed64);
187 }
188 }
189}
190
191TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
192 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
193 for (uint32_t s = 1; s < 32; s++) {
194 RequantizationTester()
195 .zeroPoint(zeroPoint)
196 .s(s)
197 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_signed64);
198 }
199 }
200}
201
202TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
203 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
204 for (uint32_t s = 1; s < 32; s++) {
205 RequantizationTester()
206 .zeroPoint(zeroPoint)
207 .s(s)
208 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_signed64);
209 }
210 }
211}
212
213TEST(PRECISE__SCALAR_SIGNED64, special_cases) {
214 RequantizationTester()
215 .testSpecialCases(xnn_requantize_precise__scalar_signed64);
216}
217
218TEST(PRECISE__SCALAR_SIGNED64, random_cases) {
219 RequantizationTester()
220 .iterations(100)
221 .testRandomCasesPrecise(xnn_requantize_precise__scalar_signed64);
222}
223
224
225/*
226 * FP32-based scalar implementation using lrintf function.
227 */
228
229TEST(FP32__SCALAR_LRINTF, random_cases) {
230 RequantizationTester()
231 .iterations(1000)
232 .testRandomCasesApproximate(xnn_requantize_fp32__scalar_lrintf);
233}
234
235
236/*
237 * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
238 */
239
240TEST(FP32__SCALAR_MAGIC, random_cases) {
241 RequantizationTester()
242 .iterations(1000)
243 .testRandomCasesApproximate(xnn_requantize_fp32__scalar_magic);
244}
245
246
247/*
248 * Q31-based scalar implementation.
249 */
250
251TEST(Q31__SCALAR, exact_divide_by_po2) {
252 for (uint32_t s = 1; s < 32; s++) {
253 RequantizationTester()
254 .s(s)
255 .testExactDivideByPO2(xnn_requantize_q31__scalar);
256 }
257}
258
259TEST(Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
260 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
261 for (uint32_t s = 1; s < 32; s++) {
262 RequantizationTester()
263 .zeroPoint(zeroPoint)
264 .s(s)
265 .testExactDivideByPO2(xnn_requantize_q31__scalar);
266 }
267 }
268}
269
270TEST(Q31__SCALAR, divide_by_po2_with_rounding_up) {
271 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
272 for (uint32_t s = 1; s < 32; s++) {
273 RequantizationTester()
274 .zeroPoint(zeroPoint)
275 .s(s)
276 .testDivideByPO2WithRoundingUp(xnn_requantize_q31__scalar);
277 }
278 }
279}
280
281/* No rounding down Test - it fails because of upward bias in multiplication */
282
283TEST(Q31__SCALAR, divide_by_po2_with_rounding_away) {
284 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
285 for (uint32_t s = 1; s < 32; s++) {
286 RequantizationTester()
287 .zeroPoint(zeroPoint)
288 .s(s)
289 .testDivideByPO2WithRoundingAway(xnn_requantize_q31__scalar);
290 }
291 }
292}
293
294TEST(Q31__SCALAR, special_cases) {
295 RequantizationTester()
296 .testSpecialCases(xnn_requantize_q31__scalar);
297}
298
299TEST(Q31__SCALAR, random_cases) {
300 RequantizationTester()
301 .iterations(100)
302 .testRandomCasesApproximate(xnn_requantize_q31__scalar);
303}
304
305TEST(Q31__SCALAR, random_match_gemmlowp) {
306 RequantizationTester()
307 .iterations(100)
308 .testRandomCasesAgainstReference(xnn_requantize_q31__scalar, xnn_requantize_gemmlowp__scalar);
309}
310
311
312/*
313 * Scalar implementation from gemmlowp.
314 */
315
316TEST(GEMMLOWP__SCALAR, random_cases) {
317 RequantizationTester()
318 .iterations(100)
319 .testRandomCasesApproximate(xnn_requantize_gemmlowp__scalar);
320}
321
322
323/*
324 * Precise PSIMD implementation using unsigned 32-bit arithmetics.
325 */
326
327TEST(PRECISE__PSIMD, exact_divide_by_po2) {
328 for (uint32_t s = 1; s < 32; s++) {
329 RequantizationTester()
330 .s(s)
331 .testExactDivideByPO2(xnn_requantize_precise__psimd);
332 }
333}
334
335TEST(PRECISE__PSIMD, exact_divide_by_po2_with_zero_point) {
336 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
337 for (uint32_t s = 1; s < 32; s++) {
338 RequantizationTester()
339 .zeroPoint(zeroPoint)
340 .s(s)
341 .testExactDivideByPO2(xnn_requantize_precise__psimd);
342 }
343 }
344}
345
346TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_up) {
347 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
348 for (uint32_t s = 1; s < 32; s++) {
349 RequantizationTester()
350 .zeroPoint(zeroPoint)
351 .s(s)
352 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__psimd);
353 }
354 }
355}
356
357TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_down) {
358 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
359 for (uint32_t s = 1; s < 32; s++) {
360 RequantizationTester()
361 .zeroPoint(zeroPoint)
362 .s(s)
363 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__psimd);
364 }
365 }
366}
367
368TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_away) {
369 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
370 for (uint32_t s = 1; s < 32; s++) {
371 RequantizationTester()
372 .zeroPoint(zeroPoint)
373 .s(s)
374 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__psimd);
375 }
376 }
377}
378
379TEST(PRECISE__PSIMD, special_cases) {
380 RequantizationTester()
381 .testSpecialCases(xnn_requantize_precise__psimd);
382}
383
384TEST(PRECISE__PSIMD, random_cases) {
385 RequantizationTester()
386 .iterations(100)
387 .testRandomCasesPrecise(xnn_requantize_precise__psimd);
388}
389
390
391/*
392 * FP32-based PSIMD implementation using magic trick for FP32->INT32 conversion.
393 */
394
395TEST(FP32__PSIMD, random_cases) {
396 RequantizationTester()
397 .iterations(1000)
398 .testRandomCasesApproximate(xnn_requantize_fp32__psimd);
399}
400
401
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700402#if XNN_ARCH_X86 || XNN_ARCH_X86_64
403 /*
404 * Precise SSE2 implementation using floating-point shuffle.
405 */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700406
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700407 TEST(PRECISE__SSE2, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700408 for (uint32_t s = 1; s < 32; s++) {
409 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700410 .s(s)
411 .testExactDivideByPO2(xnn_requantize_precise__sse2);
412 }
413 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700414
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700415 TEST(PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
416 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
417 for (uint32_t s = 1; s < 32; s++) {
418 RequantizationTester()
419 .zeroPoint(zeroPoint)
420 .s(s)
421 .testExactDivideByPO2(xnn_requantize_precise__sse2);
422 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700423 }
424 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700425
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700426 TEST(PRECISE__SSE2, divide_by_po2_with_rounding_up) {
427 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
428 for (uint32_t s = 1; s < 32; s++) {
429 RequantizationTester()
430 .zeroPoint(zeroPoint)
431 .s(s)
432 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
433 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700434 }
435 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700436
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700437 TEST(PRECISE__SSE2, divide_by_po2_with_rounding_down) {
438 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
439 for (uint32_t s = 1; s < 32; s++) {
440 RequantizationTester()
441 .zeroPoint(zeroPoint)
442 .s(s)
443 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
444 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700445 }
446 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700447
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700448 TEST(PRECISE__SSE2, divide_by_po2_with_rounding_away) {
449 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
450 for (uint32_t s = 1; s < 32; s++) {
451 RequantizationTester()
452 .zeroPoint(zeroPoint)
453 .s(s)
454 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
455 }
456 }
457 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700458
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700459 TEST(PRECISE__SSE2, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700460 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700461 .testSpecialCases(xnn_requantize_precise__sse2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700462 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700463
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700464 TEST(PRECISE__SSE2, random_cases) {
465 RequantizationTester()
466 .iterations(100)
467 .testRandomCasesPrecise(xnn_requantize_precise__sse2);
468 }
469
470
471 /*
472 * Precise SSSE3 implementation using floating-point shuffle.
473 */
474
475 TEST(PRECISE__SSSE3, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700476 for (uint32_t s = 1; s < 32; s++) {
477 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700478 .s(s)
479 .testExactDivideByPO2(xnn_requantize_precise__ssse3);
480 }
481 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700482
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700483 TEST(PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
484 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
485 for (uint32_t s = 1; s < 32; s++) {
486 RequantizationTester()
487 .zeroPoint(zeroPoint)
488 .s(s)
489 .testExactDivideByPO2(xnn_requantize_precise__ssse3);
490 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700491 }
492 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700493
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700494 TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
495 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
496 for (uint32_t s = 1; s < 32; s++) {
497 RequantizationTester()
498 .zeroPoint(zeroPoint)
499 .s(s)
500 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
501 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700502 }
503 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700504
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700505 TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
506 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
507 for (uint32_t s = 1; s < 32; s++) {
508 RequantizationTester()
509 .zeroPoint(zeroPoint)
510 .s(s)
511 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
512 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700513 }
514 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700515
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700516 TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
517 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
518 for (uint32_t s = 1; s < 32; s++) {
519 RequantizationTester()
520 .zeroPoint(zeroPoint)
521 .s(s)
522 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
523 }
524 }
525 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700526
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700527 TEST(PRECISE__SSSE3, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700528 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700529 .testSpecialCases(xnn_requantize_precise__ssse3);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700530 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700531
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700532 TEST(PRECISE__SSSE3, random_cases) {
533 RequantizationTester()
534 .iterations(100)
535 .testRandomCasesPrecise(xnn_requantize_precise__ssse3);
536 }
537
538
539 /*
540 * Precise SSE4.1 implementation using static blend instruction.
541 */
542
543 TEST(PRECISE__SSE4, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700544 for (uint32_t s = 1; s < 32; s++) {
545 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700546 .s(s)
547 .testExactDivideByPO2(xnn_requantize_precise__sse4);
548 }
549 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700550
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700551 TEST(PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
552 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
553 for (uint32_t s = 1; s < 32; s++) {
554 RequantizationTester()
555 .zeroPoint(zeroPoint)
556 .s(s)
557 .testExactDivideByPO2(xnn_requantize_precise__sse4);
558 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700559 }
560 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700561
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700562 TEST(PRECISE__SSE4, divide_by_po2_with_rounding_up) {
563 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
564 for (uint32_t s = 1; s < 32; s++) {
565 RequantizationTester()
566 .zeroPoint(zeroPoint)
567 .s(s)
568 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
569 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570 }
571 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700572
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700573 TEST(PRECISE__SSE4, divide_by_po2_with_rounding_down) {
574 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
575 for (uint32_t s = 1; s < 32; s++) {
576 RequantizationTester()
577 .zeroPoint(zeroPoint)
578 .s(s)
579 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
580 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700581 }
582 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700583
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700584 TEST(PRECISE__SSE4, divide_by_po2_with_rounding_away) {
585 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
586 for (uint32_t s = 1; s < 32; s++) {
587 RequantizationTester()
588 .zeroPoint(zeroPoint)
589 .s(s)
590 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
591 }
592 }
593 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700594
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700595 TEST(PRECISE__SSE4, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700596 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700597 .testSpecialCases(xnn_requantize_precise__sse4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700598 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700599
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700600 TEST(PRECISE__SSE4, random_cases) {
601 RequantizationTester()
602 .iterations(100)
603 .testRandomCasesPrecise(xnn_requantize_precise__sse4);
604 }
605
606
607 /*
608 * FP32-based x86 SSE2 implementation.
609 */
610
611 TEST(FP32__SSE2, random_cases) {
612 RequantizationTester()
613 .iterations(1000)
614 .testRandomCasesApproximate(xnn_requantize_fp32__sse2);
615 }
616
617
618 /*
619 * Q31-based x86 SSE2 implementation.
620 */
621
622 TEST(Q31__SSE2, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700623 for (uint32_t s = 1; s < 32; s++) {
624 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700625 .s(s)
626 .testExactDivideByPO2(xnn_requantize_q31__sse2);
627 }
628 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700629
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700630 TEST(Q31__SSE2, exact_divide_by_po2_with_zero_point) {
631 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
632 for (uint32_t s = 1; s < 32; s++) {
633 RequantizationTester()
634 .zeroPoint(zeroPoint)
635 .s(s)
636 .testExactDivideByPO2(xnn_requantize_q31__sse2);
637 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700638 }
639 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700640
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700641 TEST(Q31__SSE2, divide_by_po2_with_rounding_up) {
642 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
643 for (uint32_t s = 1; s < 32; s++) {
644 RequantizationTester()
645 .zeroPoint(zeroPoint)
646 .s(s)
647 .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
648 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700649 }
650 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700651
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700652 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700653
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700654 TEST(Q31__SSE2, divide_by_po2_with_rounding_away) {
655 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
656 for (uint32_t s = 1; s < 32; s++) {
657 RequantizationTester()
658 .zeroPoint(zeroPoint)
659 .s(s)
660 .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
661 }
662 }
663 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700664
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700665 TEST(Q31__SSE2, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700666 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700667 .testSpecialCases(xnn_requantize_q31__sse2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700668 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700669
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700670 TEST(Q31__SSE2, random_cases) {
671 RequantizationTester()
672 .iterations(100)
673 .testRandomCasesApproximate(xnn_requantize_q31__sse2);
674 }
675
676 TEST(Q31__SSE2, random_match_gemmlowp) {
677 RequantizationTester()
678 .iterations(100)
679 .testRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
680 }
681
682
683 /*
684 * Q31-based x86 SSSE3 implementation.
685 */
686
687 TEST(Q31__SSSE3, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700688 for (uint32_t s = 1; s < 32; s++) {
689 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700690 .s(s)
691 .testExactDivideByPO2(xnn_requantize_q31__ssse3);
692 }
693 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700694
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700695 TEST(Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
696 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
697 for (uint32_t s = 1; s < 32; s++) {
698 RequantizationTester()
699 .zeroPoint(zeroPoint)
700 .s(s)
701 .testExactDivideByPO2(xnn_requantize_q31__ssse3);
702 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700703 }
704 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700705
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700706 TEST(Q31__SSSE3, divide_by_po2_with_rounding_up) {
707 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
708 for (uint32_t s = 1; s < 32; s++) {
709 RequantizationTester()
710 .zeroPoint(zeroPoint)
711 .s(s)
712 .testDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
713 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700714 }
715 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700716
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700717 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700718
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700719 TEST(Q31__SSSE3, divide_by_po2_with_rounding_away) {
720 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
721 for (uint32_t s = 1; s < 32; s++) {
722 RequantizationTester()
723 .zeroPoint(zeroPoint)
724 .s(s)
725 .testDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
726 }
727 }
728 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700729
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700730 TEST(Q31__SSSE3, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700731 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700732 .testSpecialCases(xnn_requantize_q31__ssse3);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700733 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700734
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700735 TEST(Q31__SSSE3, random_cases) {
736 RequantizationTester()
737 .iterations(100)
738 .testRandomCasesApproximate(xnn_requantize_q31__ssse3);
739 }
740
741 TEST(Q31__SSSE3, random_match_gemmlowp) {
742 RequantizationTester()
743 .iterations(100)
744 .testRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
745 }
746
747
748 /*
749 * Q31-based x86 SSE4 implementation.
750 */
751
752 TEST(Q31__SSE4, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700753 for (uint32_t s = 1; s < 32; s++) {
754 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700755 .s(s)
756 .testExactDivideByPO2(xnn_requantize_q31__sse4);
757 }
758 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700759
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700760 TEST(Q31__SSE4, exact_divide_by_po2_with_zero_point) {
761 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
762 for (uint32_t s = 1; s < 32; s++) {
763 RequantizationTester()
764 .zeroPoint(zeroPoint)
765 .s(s)
766 .testExactDivideByPO2(xnn_requantize_q31__sse4);
767 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700768 }
769 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700770
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700771 TEST(Q31__SSE4, divide_by_po2_with_rounding_up) {
772 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
773 for (uint32_t s = 1; s < 32; s++) {
774 RequantizationTester()
775 .zeroPoint(zeroPoint)
776 .s(s)
777 .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
778 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700779 }
780 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700781
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700782 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700783
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700784 TEST(Q31__SSE4, divide_by_po2_with_rounding_away) {
785 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
786 for (uint32_t s = 1; s < 32; s++) {
787 RequantizationTester()
788 .zeroPoint(zeroPoint)
789 .s(s)
790 .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
791 }
792 }
793 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700794
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700795 TEST(Q31__SSE4, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700796 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700797 .testSpecialCases(xnn_requantize_q31__sse4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700798 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700799
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700800 TEST(Q31__SSE4, random_cases) {
801 RequantizationTester()
802 .iterations(100)
803 .testRandomCasesApproximate(xnn_requantize_q31__sse4);
804 }
805
806 TEST(Q31__SSE4, random_match_gemmlowp) {
807 RequantizationTester()
808 .iterations(100)
809 .testRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
810 }
811
812
813 /*
814 * x86 SSE2 implementation from gemmlowp.
815 */
816
817 TEST(GEMMLOWP__SSE2, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700818 for (uint32_t s = 1; s < 32; s++) {
819 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700820 .s(s)
821 .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
822 }
823 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700824
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700825 TEST(GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
826 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
827 for (uint32_t s = 1; s < 32; s++) {
828 RequantizationTester()
829 .zeroPoint(zeroPoint)
830 .s(s)
831 .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
832 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700833 }
834 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700835
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700836 TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
837 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
838 for (uint32_t s = 1; s < 32; s++) {
839 RequantizationTester()
840 .zeroPoint(zeroPoint)
841 .s(s)
842 .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
843 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700844 }
845 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700846
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700847 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700848
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700849 TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_away) {
850 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
851 for (uint32_t s = 1; s < 32; s++) {
852 RequantizationTester()
853 .zeroPoint(zeroPoint)
854 .s(s)
855 .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
856 }
857 }
858 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700859
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700860 TEST(GEMMLOWP__SSE2, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700861 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700862 .testSpecialCases(xnn_requantize_gemmlowp__sse2);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700863 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700864
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700865 TEST(GEMMLOWP__SSE2, random_cases) {
866 RequantizationTester()
867 .iterations(100)
868 .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
869 }
870
871
872 /*
873 * x86 SSSE3 implementation from gemmlowp.
874 */
875
876 TEST(GEMMLOWP__SSSE3, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700877 for (uint32_t s = 1; s < 32; s++) {
878 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700879 .s(s)
880 .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
881 }
882 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700883
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700884 TEST(GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
885 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
886 for (uint32_t s = 1; s < 32; s++) {
887 RequantizationTester()
888 .zeroPoint(zeroPoint)
889 .s(s)
890 .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
891 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700892 }
893 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700894
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700895 TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
896 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
897 for (uint32_t s = 1; s < 32; s++) {
898 RequantizationTester()
899 .zeroPoint(zeroPoint)
900 .s(s)
901 .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
902 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700903 }
904 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700905
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700906 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700907
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700908 TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_away) {
909 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
910 for (uint32_t s = 1; s < 32; s++) {
911 RequantizationTester()
912 .zeroPoint(zeroPoint)
913 .s(s)
914 .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
915 }
916 }
917 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700918
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700919 TEST(GEMMLOWP__SSSE3, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700920 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700921 .testSpecialCases(xnn_requantize_gemmlowp__ssse3);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700922 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700923
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700924 TEST(GEMMLOWP__SSSE3, random_cases) {
925 RequantizationTester()
926 .iterations(100)
927 .testRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
928 }
929
930
931 /*
932 * x86 SSE4 implementation from gemmlowp.
933 */
934
935 TEST(GEMMLOWP__SSE4, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700936 for (uint32_t s = 1; s < 32; s++) {
937 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700938 .s(s)
939 .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
940 }
941 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700942
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700943 TEST(GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
944 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
945 for (uint32_t s = 1; s < 32; s++) {
946 RequantizationTester()
947 .zeroPoint(zeroPoint)
948 .s(s)
949 .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
950 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700951 }
952 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700953
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700954 TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
955 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
956 for (uint32_t s = 1; s < 32; s++) {
957 RequantizationTester()
958 .zeroPoint(zeroPoint)
959 .s(s)
960 .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
961 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700962 }
963 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700964
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700965 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -0700966
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700967 TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_away) {
968 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
969 for (uint32_t s = 1; s < 32; s++) {
970 RequantizationTester()
971 .zeroPoint(zeroPoint)
972 .s(s)
973 .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
974 }
975 }
976 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700977
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700978 TEST(GEMMLOWP__SSE4, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700979 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700980 .testSpecialCases(xnn_requantize_gemmlowp__sse4);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700981 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700982
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700983 TEST(GEMMLOWP__SSE4, random_cases) {
984 RequantizationTester()
985 .iterations(100)
986 .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
987 }
988#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
989
990#if XNN_ARCH_ARM || XNN_ARCH_ARM64
991 /*
992 * Precise ARM NEON implementation.
993 */
994
995 TEST(PRECISE__NEON, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700996 for (uint32_t s = 1; s < 32; s++) {
997 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -0700998 .s(s)
999 .testExactDivideByPO2(xnn_requantize_precise__neon);
1000 }
1001 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001002
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001003 TEST(PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
1004 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
1005 for (uint32_t s = 1; s < 32; s++) {
1006 RequantizationTester()
1007 .zeroPoint(zeroPoint)
1008 .s(s)
1009 .testExactDivideByPO2(xnn_requantize_precise__neon);
1010 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001011 }
1012 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001013
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001014 TEST(PRECISE__NEON, divide_by_po2_with_rounding_up) {
1015 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
1016 for (uint32_t s = 1; s < 32; s++) {
1017 RequantizationTester()
1018 .zeroPoint(zeroPoint)
1019 .s(s)
1020 .testDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
1021 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001022 }
1023 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001024
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001025 TEST(PRECISE__NEON, divide_by_po2_with_rounding_down) {
1026 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
1027 for (uint32_t s = 1; s < 32; s++) {
1028 RequantizationTester()
1029 .zeroPoint(zeroPoint)
1030 .s(s)
1031 .testDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
1032 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001033 }
1034 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001035
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001036 TEST(PRECISE__NEON, divide_by_po2_with_rounding_away) {
1037 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
1038 for (uint32_t s = 1; s < 32; s++) {
1039 RequantizationTester()
1040 .zeroPoint(zeroPoint)
1041 .s(s)
1042 .testDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
1043 }
1044 }
1045 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001046
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001047 TEST(PRECISE__NEON, special_cases) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001048 RequantizationTester()
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001049 .testSpecialCases(xnn_requantize_precise__neon);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001050 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001051
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001052 TEST(PRECISE__NEON, random_cases) {
1053 RequantizationTester()
1054 .iterations(100)
1055 .testRandomCasesPrecise(xnn_requantize_precise__neon);
1056 }
1057
1058
1059 /*
1060 * FP32-based ARM NEON implementation.
1061 */
1062
1063 TEST(FP32__NEON, random_cases) {
1064 RequantizationTester()
1065 .iterations(1000)
1066 .testRandomCasesApproximate(xnn_requantize_fp32__neon);
1067 }
1068
1069
1070 /*
1071 * Q31-based ARM NEON implementation.
1072 */
1073
1074 TEST(Q31__NEON, exact_divide_by_po2) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001075 for (uint32_t s = 1; s < 32; s++) {
1076 RequantizationTester()
XNNPACK Teamb455b122019-09-27 18:10:33 -07001077 .s(s)
1078 .testExactDivideByPO2(xnn_requantize_q31__neon);
1079 }
1080 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001081
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001082 TEST(Q31__NEON, exact_divide_by_po2_with_zero_point) {
1083 for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
1084 for (uint32_t s = 1; s < 32; s++) {
1085 RequantizationTester()
1086 .zeroPoint(zeroPoint)
1087 .s(s)
1088 .testExactDivideByPO2(xnn_requantize_q31__neon);
1089 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001090 }
1091 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001092
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001093 TEST(Q31__NEON, divide_by_po2_with_rounding_up) {
1094 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
1095 for (uint32_t s = 1; s < 32; s++) {
1096 RequantizationTester()
1097 .zeroPoint(zeroPoint)
1098 .s(s)
1099 .testDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
1100 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001101 }
1102 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001103
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001104 /* No rounding down Test - it fails because of upward bias in multiplication */
XNNPACK Teamb455b122019-09-27 18:10:33 -07001105
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001106 TEST(Q31__NEON, divide_by_po2_with_rounding_away) {
1107 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
1108 for (uint32_t s = 1; s < 32; s++) {
1109 RequantizationTester()
1110 .zeroPoint(zeroPoint)
1111 .s(s)
1112 .testDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
1113 }
1114 }
1115 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001116
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001117 TEST(Q31__NEON, special_cases) {
1118 RequantizationTester()
1119 .testSpecialCases(xnn_requantize_q31__neon);
1120 }
1121
1122 TEST(Q31__NEON, random_cases) {
1123 RequantizationTester()
1124 .iterations(100)
1125 .testRandomCasesApproximate(xnn_requantize_q31__neon);
1126 }
1127
1128 TEST(Q31__NEON, random_match_gemmlowp) {
1129 RequantizationTester()
1130 .iterations(100)
1131 .testRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
1132 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001133
1134
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001135 /*
1136 * ARM NEON implementation from gemmlowp.
1137 */
XNNPACK Teamb455b122019-09-27 18:10:33 -07001138
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001139 TEST(GEMMLOWP__NEON, random_cases) {
1140 RequantizationTester()
1141 .iterations(100)
1142 .testRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
1143 }
1144#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64