blob: 289f9268d33ca5abd1925709f43ed1945ed5c40b [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <cstddef>
14#include <cstdlib>
15
16#include <algorithm>
17#include <cfloat>
18#include <cmath>
19#include <functional>
20#include <random>
21#include <vector>
22
23#include <xnnpack/params.h>
24#include <xnnpack/scalar-utils.h>
25
26
27class RequantizationTester {
28 public:
29 inline RequantizationTester& s(uint32_t s) {
30 this->s_ = s;
31 return *this;
32 }
33
34 inline uint32_t s() const {
35 return this->s_;
36 }
37
38 inline float scale() const {
39 return ldexpf(1.0f, -s());
40 }
41
42 inline RequantizationTester& zeroPoint(int32_t zeroPoint) {
43 this->zeroPoint_ = zeroPoint;
44 return *this;
45 }
46
47 inline int32_t zeroPoint() const {
48 return this->zeroPoint_;
49 }
50
51 inline RequantizationTester& qmin(uint8_t qmin) {
52 this->qmin_ = qmin;
53 return *this;
54 }
55
56 inline uint8_t qmin() const {
57 return this->qmin_;
58 }
59
60 inline RequantizationTester& qmax(uint8_t qmax) {
61 this->qmax_ = qmax;
62 return *this;
63 }
64
65 inline uint8_t qmax() const {
66 return this->qmax_;
67 }
68
69 inline RequantizationTester& iterations(size_t iterations) {
70 this->iterations_ = iterations;
71 return *this;
72 }
73
74 inline size_t iterations() const {
75 return this->iterations_;
76 }
77
78 /*
79 * Test that requantization of numbers ((i - zero point) * 2**s) with
80 * - scale = exp2(-s)
81 * - zero point in [0, 255]
82 * - no output clamping
83 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
84 */
85 void testExactDivideByPO2(requantization_function requantize) const {
86 ASSERT_GE(zeroPoint(), 0);
87 ASSERT_LE(zeroPoint(), 255);
88
89 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
90 ASSERT_GE(s(), 1);
91 ASSERT_LT(s(), 32);
92
93 std::vector<int32_t> inputs(256);
94 std::vector<uint8_t> outputs(inputs.size());
95 const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zeroPoint();
96 const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zeroPoint();
97 for (int32_t i = 0; i < 256; i++) {
98 const int32_t clampedI = std::max(minI, std::min(maxI, i));
99 inputs[i] = int32_t(uint32_t(clampedI - zeroPoint()) << s());
100 }
101 requantize(inputs.size(), inputs.data(),
102 scale(), zeroPoint(), qmin(), qmax(),
103 outputs.data());
104 for (int32_t i = 0; i < 256; i++) {
105 const int32_t clampedI = std::max(minI, std::min(maxI, i));
106 ASSERT_EQ(clampedI, outputs[i]) << "i = " << i << ", clamped i = " << clampedI <<
107 ", min i = " << minI << ", max i = " << maxI <<
108 ", s = " << s() << ", zero point = " << zeroPoint();
109 }
110 }
111
112 /*
113 * Test that requantization of numbers (i * 2**s + sign(i - zero point) * 2**(s-1)) with
114 * - scale = exp2(-s)
115 * - zero point in [1, 255]
116 * - no output clamping
117 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
118 */
119 void testDivideByPO2WithRoundingUp(requantization_function requantize) {
120 ASSERT_GE(zeroPoint(), 0);
121 ASSERT_LE(zeroPoint(), 255);
122
123 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
124 ASSERT_GE(s(), 1);
125 ASSERT_LT(s(), 32);
126
127 std::vector<int32_t> inputs(256);
128 std::vector<uint8_t> outputs(inputs.size());
129 for (int32_t i = 0; i < 256; i++) {
130 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
131 (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
132 inputs[i] = int32_t(input);
133 }
134 requantize(inputs.size(), inputs.data(),
135 scale(), zeroPoint(), qmin(), qmax(),
136 outputs.data());
137 for (int32_t i = 0; i < 256; i++) {
138 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
139 (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
140 if (int32_t(input) == input) {
141 ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
142 ", s = " << s() << ", zero point = " << zeroPoint();
143 }
144 }
145 }
146
147 /*
148 * Test that requantization of numbers (i * 2**s + sign(i - zero point) * 2**(s-1)) with
149 * - scale = exp2(-s)
150 * - zero point in [1, 255]
151 * - no output clamping
152 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
153 */
154 void testDivideByPO2WithRoundingDown(requantization_function requantize) {
155 ASSERT_GE(zeroPoint(), 0);
156 ASSERT_LE(zeroPoint(), 255);
157
158 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
159 ASSERT_GE(s(), 1);
160 ASSERT_LT(s(), 32);
161
162 std::vector<int32_t> inputs(256);
163 std::vector<uint8_t> outputs(inputs.size());
164 for (int32_t i = 0; i < 256; i++) {
165 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
166 (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
167 inputs[i] = int32_t(input);
168 }
169 requantize(inputs.size(), inputs.data(),
170 scale(), zeroPoint(), qmin(), qmax(),
171 outputs.data());
172 for (int32_t i = 0; i < 256; i++) {
173 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
174 (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
175 if (int32_t(input) == input) {
176 ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
177 ", s = " << s() << ", zero point = " << zeroPoint();
178 }
179 }
180 }
181
182 void testDivideByPO2WithRoundingAway(requantization_function requantize) {
183 ASSERT_GE(zeroPoint(), 0);
184 ASSERT_LE(zeroPoint(), 255);
185
186 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
187 ASSERT_GE(s(), 1);
188 ASSERT_LT(s(), 32);
189
190 std::vector<int32_t> inputs(256);
191 std::vector<uint8_t> outputs(inputs.size());
192 for (int32_t i = 0; i < 256; i++) {
193 int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
194 if (input > 0) {
195 input -= INT64_C(1) << (s() - 1);
196 } else if (input < 0) {
197 input += INT64_C(1) << (s() - 1);
198 }
199 inputs[i] = int32_t(input);
200 }
201 requantize(inputs.size(), inputs.data(),
202 scale(), zeroPoint(), qmin(), qmax(),
203 outputs.data());
204 for (uint32_t i = 0; i < 256; i++) {
205 int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
206 if (input > 0) {
207 input -= INT64_C(1) << (s() - 1);
208 } else if (input < 0) {
209 input += INT64_C(1) << (s() - 1);
210 }
211 if (int32_t(input) == input) {
212 ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
213 ", s = " << s() << ", zero point = " << zeroPoint();
214 }
215 }
216 }
217
218 void testSpecialCases(requantization_function requantize) {
219 std::vector<int32_t> inputs(256);
220 std::vector<uint8_t> outputs(inputs.size());
221
222 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min());
223 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
224 requantize(
225 inputs.size(),
226 inputs.data(),
227 ldexpf(1.0f, -32) /* scale */,
228 zeroPoint /* zero point */,
229 std::numeric_limits<uint8_t>::min(),
230 std::numeric_limits<uint8_t>::max(),
231 outputs.data());
232 ASSERT_EQ(std::max(int32_t(0), zeroPoint - 1), *std::min_element(outputs.cbegin(), outputs.cend()));
233 }
234
235 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max());
236 requantize(
237 inputs.size(),
238 inputs.data(),
239 0x1.FFFFFEp-1f /* scale */,
240 std::numeric_limits<uint8_t>::max() /* zero point */,
241 std::numeric_limits<uint8_t>::min(),
242 std::numeric_limits<uint8_t>::max(),
243 outputs.data());
244 for (size_t i = 0; i < inputs.size(); i++) {
245 ASSERT_EQ(std::numeric_limits<uint8_t>::max(), outputs[i]);
246 }
247 }
248
249 void testRandomCasesPrecise(requantization_function requantize) {
250 std::random_device random_device;
251 std::mt19937 mtRng(random_device());
252 for (size_t iteration = 0; iteration < iterations(); iteration++) {
253 auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng);
254
255 std::vector<int32_t> inputs(4096);
256 std::vector<uint8_t> outputs(inputs.size());
257
258 const uint8_t zeroPoint = UINT8_C(128);
259 std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
260 const float scale = scaleDistribution(mtRng);
261 for (size_t i = 0; i < inputs.size(); i++) {
262 const uint8_t approximateOutput = rng();
263 const int32_t input = int32_t(double(approximateOutput) / double(scale));
264 inputs[i] = input;
265 }
266
267 requantize(
268 inputs.size(), inputs.data(), scale, zeroPoint,
269 std::numeric_limits<uint8_t>::min(),
270 std::numeric_limits<uint8_t>::max(),
271 outputs.data());
272
273 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
274 ASSERT_NE(
275 *std::max_element(outputs.cbegin(), outputs.cend()),
276 *std::min_element(outputs.cbegin(), outputs.cend()));
277
278 for (size_t i = 0; i < inputs.size(); i++) {
279 const uint8_t referenceOutput =
280 scalar_requantize_precise(
281 inputs[i], scale, zeroPoint,
282 std::numeric_limits<uint8_t>::min(),
283 std::numeric_limits<uint8_t>::max());
284 ASSERT_EQ(uint32_t(referenceOutput), uint32_t(outputs[i]));
285 }
286 }
287 }
288
289 void testRandomCasesApproximate(requantization_function requantize) {
290 std::random_device random_device;
291 std::mt19937 mtRng(random_device());
292 for (size_t iteration = 0; iteration < iterations(); iteration++) {
293 auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng);
294
295 std::vector<int32_t> inputs(4096);
296 std::vector<uint8_t> outputs(inputs.size());
297
298 const uint8_t zeroPoint = UINT8_C(128);
299 std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
300 const float scale = scaleDistribution(mtRng);
301 for (size_t i = 0; i < inputs.size(); i++) {
302 const uint8_t approximateOutput = rng();
303 const int32_t input = int32_t(double(approximateOutput) / double(scale));
304 inputs[i] = input;
305 }
306
307 requantize(
308 inputs.size(), inputs.data(), scale, zeroPoint,
309 std::numeric_limits<uint8_t>::min(),
310 std::numeric_limits<uint8_t>::max(),
311 outputs.data());
312
313 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
314 ASSERT_NE(
315 *std::max_element(outputs.cbegin(), outputs.cend()),
316 *std::min_element(outputs.cbegin(), outputs.cend()));
317
318 for (size_t i = 0; i < inputs.size(); i++) {
319 const double referenceOutput =
320 RequantizationTester::requantizeApproximate(
321 inputs[i], scale, zeroPoint,
322 std::numeric_limits<uint8_t>::min(),
323 std::numeric_limits<uint8_t>::max());
324 ASSERT_LE(fabs(referenceOutput - double(outputs[i])), 0.55) <<
325 "input = " << inputs[i] <<
326 ", output = " << uint32_t(outputs[i]) << ", reference output = " << referenceOutput;
327 }
328 }
329 }
330
331 void testRandomCasesAgainstReference(requantization_function requantize, requantization_function requantizeReference) {
332 std::random_device random_device;
333 std::mt19937 mtRng(random_device());
334 for (size_t iteration = 0; iteration < iterations(); iteration++) {
335 auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng);
336
337 std::vector<int32_t> inputs(4096);
338 std::vector<uint8_t> outputs(inputs.size());
339 std::vector<uint8_t> referenceOutputs(inputs.size());
340
341 const uint8_t zeroPoint = UINT8_C(128);
342 std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
343 const float scale = scaleDistribution(mtRng);
344 for (size_t i = 0; i < inputs.size(); i++) {
345 const uint8_t approximateOutput = rng();
346 const int32_t input = int32_t(double(approximateOutput) / double(scale));
347 inputs[i] = input;
348 }
349
350 requantize(
351 inputs.size(), inputs.data(), scale, zeroPoint,
352 std::numeric_limits<uint8_t>::min(),
353 std::numeric_limits<uint8_t>::max(),
354 outputs.data());
355
356 requantizeReference(
357 inputs.size(), inputs.data(), scale, zeroPoint,
358 std::numeric_limits<uint8_t>::min(),
359 std::numeric_limits<uint8_t>::max(),
360 referenceOutputs.data());
361
362 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
363 ASSERT_NE(
364 *std::max_element(outputs.cbegin(), outputs.cend()),
365 *std::min_element(outputs.cbegin(), outputs.cend()));
366
367 for (size_t i = 0; i < inputs.size(); i++) {
368 ASSERT_EQ(uint32_t(referenceOutputs[i]), uint32_t(outputs[i]));
369 }
370 }
371 }
372
373 static inline int64_t shiftLeft(int64_t w, uint32_t n) {
374 return (int64_t) ((uint64_t) w << n);
375 }
376
377 static inline double requantizeApproximate(
378 int32_t value,
379 float scale,
380 uint8_t zeroPoint,
381 uint8_t qmin,
382 uint8_t qmax)
383 {
384 assert(scale < 1.0f);
385 assert(scale >= 0x1.0p-32f);
386
387 double clampedValue = double(value) * double(scale) + double(zeroPoint);
388
389 const double fmin = double(qmin);
390 if (clampedValue < fmin) {
391 clampedValue = fmin;
392 }
393
394 const double fmax = double(qmax);
395 if (clampedValue > fmax) {
396 clampedValue = fmax;
397 }
398
399 return clampedValue;
400 }
401
402 private:
403 size_t zeroPoint_{0};
404 size_t s_{1};
405 uint8_t qmin_{std::numeric_limits<uint8_t>::min()};
406 uint8_t qmax_{std::numeric_limits<uint8_t>::max()};
407 size_t iterations_{1};
408};