blob: b6eddf4ce12d6bb142575c813614db7585349951 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <cstddef>
14#include <cstdlib>
15#include <algorithm>
16#include <cmath>
17#include <functional>
18#include <random>
19#include <vector>
20
21#include <xnnpack.h>
22
23
24class FullyConnectedOperatorTester {
25 public:
26 inline FullyConnectedOperatorTester& input_channels(size_t input_channels) {
27 assert(input_channels >= 1);
28 this->input_channels_ = input_channels;
29 return *this;
30 }
31
32 inline size_t input_channels() const {
33 return this->input_channels_;
34 }
35
36 inline FullyConnectedOperatorTester& output_channels(size_t output_channels) {
37 assert(output_channels >= 1);
38 this->output_channels_ = output_channels;
39 return *this;
40 }
41
42 inline size_t output_channels() const {
43 return this->output_channels_;
44 }
45
46 inline FullyConnectedOperatorTester& batch_size(size_t batch_size) {
47 assert(batch_size >= 1);
48 this->batch_size_ = batch_size;
49 return *this;
50 }
51
52 inline size_t batch_size() const {
53 return this->batch_size_;
54 }
55
56 inline FullyConnectedOperatorTester& input_stride(size_t input_stride) {
57 assert(input_stride >= 1);
58 this->input_stride_ = input_stride;
59 return *this;
60 }
61
62 inline size_t input_stride() const {
63 if (this->input_stride_ == 0) {
64 return input_channels();
65 } else {
66 assert(this->input_stride_ >= input_channels());
67 return this->input_stride_;
68 }
69 }
70
71 inline FullyConnectedOperatorTester& output_stride(size_t output_stride) {
72 assert(output_stride >= 1);
73 this->output_stride_ = output_stride;
74 return *this;
75 }
76
77 inline size_t output_stride() const {
78 if (this->output_stride_ == 0) {
79 return output_channels();
80 } else {
81 assert(this->output_stride_ >= output_channels());
82 return this->output_stride_;
83 }
84 }
85
86 inline FullyConnectedOperatorTester& qmin(uint8_t qmin) {
87 this->qmin_ = qmin;
88 return *this;
89 }
90
91 inline uint8_t qmin() const {
92 return this->qmin_;
93 }
94
95 inline FullyConnectedOperatorTester& qmax(uint8_t qmax) {
96 this->qmax_ = qmax;
97 return *this;
98 }
99
100 inline uint8_t qmax() const {
101 return this->qmax_;
102 }
103
104 inline FullyConnectedOperatorTester& iterations(size_t iterations) {
105 this->iterations_ = iterations;
106 return *this;
107 }
108
109 inline size_t iterations() const {
110 return this->iterations_;
111 }
112
113 void TestQ8() const {
114 std::random_device random_device;
115 auto rng = std::mt19937(random_device());
116 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
117 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
118
119 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
120 (batch_size() - 1) * input_stride() + input_channels());
121 std::vector<uint8_t> kernel(output_channels() * input_channels());
122 std::vector<int32_t> bias(output_channels());
123 std::vector<uint8_t> output((batch_size() - 1) * output_stride() + output_channels());
124 std::vector<int32_t> accumulators(batch_size() * output_channels());
125 std::vector<double> output_ref(batch_size() * output_channels());
126
127 const uint8_t input_zero_point = 127;
128 const uint8_t kernel_zero_point = 127;
129
130 for (size_t iteration = 0; iteration < iterations(); iteration++) {
131 std::generate(input.begin(), input.end(), std::ref(u8rng));
132 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
133 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
134 std::fill(output.begin(), output.end(), 0xA5);
135 std::fill(accumulators.begin(), accumulators.end(), 0);
136
137 // Compute reference results, without renormalization.
138 for (size_t i = 0; i < batch_size(); i++) {
139 for (size_t oc = 0; oc < output_channels(); oc++) {
140 accumulators[i * output_channels() + oc] = bias[oc];
141 }
142 }
143 for (size_t i = 0; i < batch_size(); i++) {
144 for (size_t oc = 0; oc < output_channels(); oc++) {
145 for (size_t ic = 0; ic < input_channels(); ic++) {
146 accumulators[i * output_channels() + oc] +=
147 (int32_t(input[i * input_stride() + ic]) - int32_t(input_zero_point)) *
148 (int32_t(kernel[oc * input_channels() + ic]) - int32_t(kernel_zero_point));
149 }
150 }
151 }
152
153 // Compute renormalization parameters.
154 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
155 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
156
157 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
158 const uint8_t output_zero_point = uint8_t(std::max(std::min(
159 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
160 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
161
162 // Renormalize reference results.
163 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
164 [this, output_scale, output_zero_point](int32_t x) -> double {
165 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
166 });
167
168 // Create, setup, run, and destroy Fully Connected operator.
169 ASSERT_EQ(xnn_status_success, xnn_initialize());
170 xnn_operator_t fully_connected_op = nullptr;
171
172 ASSERT_EQ(xnn_status_success,
173 xnn_create_fully_connected_nc_q8(
174 input_channels(), output_channels(),
175 input_stride(), output_stride(),
176 input_zero_point, 1.0f /* input scale */,
177 kernel_zero_point, 1.0f /* kernel scale */,
178 kernel.data(), bias.data(),
179 output_zero_point, output_scale, qmin(), qmax(),
180 0, &fully_connected_op));
181
182 // Smart pointer to automatically delete fully_connected_op.
183 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_fully_connected_op(fully_connected_op, xnn_delete_operator);
184
185 ASSERT_EQ(xnn_status_success,
186 xnn_setup_fully_connected_nc_q8(
187 fully_connected_op,
188 batch_size(),
189 input.data(), output.data(),
190 nullptr /* thread pool */));
191
192 ASSERT_EQ(xnn_status_success,
193 xnn_run_operator(fully_connected_op, nullptr /* thread pool */));
194
195 // Verify results.
196 for (size_t i = 0; i < batch_size(); i++) {
197 for (size_t c = 0; c < output_channels(); c++) {
198 ASSERT_LE(int32_t(output[i * output_stride() + c]), int32_t(qmax()))
199 << "batch index = " << i << ", channel = " << c;
200 ASSERT_GE(int32_t(output[i * output_stride() + c]), int32_t(qmin()))
201 << "batch index = " << i << ", channel = " << c;
202 ASSERT_NEAR(
203 output_ref[i * output_channels() + c],
204 double(output[i * output_stride() + c]) - double(output_zero_point),
205 0.9)
206 << "batch index = " << i << ", channel = " << c;
207 }
208 }
209 }
210 }
211
212 void TestF32() const {
213 std::random_device random_device;
214 auto rng = std::mt19937(random_device());
215 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
216
217 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
218 (batch_size() - 1) * input_stride() + input_channels());
219 std::vector<float> kernel(output_channels() * input_channels());
220 std::vector<float> bias(output_channels());
221 std::vector<float> output((batch_size() - 1) * output_stride() + output_channels());
222 std::vector<float> output_ref(batch_size() * output_channels());
223
224 for (size_t iteration = 0; iteration < iterations(); iteration++) {
225 std::generate(input.begin(), input.end(), std::ref(f32rng));
226 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
227 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
228 std::fill(output.begin(), output.end(), nanf(""));
229
230 // Compute reference results, without renormalization.
231 for (size_t i = 0; i < batch_size(); i++) {
232 for (size_t oc = 0; oc < output_channels(); oc++) {
233 output_ref[i * output_channels() + oc] = bias[oc];
234 }
235 }
236 for (size_t i = 0; i < batch_size(); i++) {
237 for (size_t oc = 0; oc < output_channels(); oc++) {
238 for (size_t ic = 0; ic < input_channels(); ic++) {
239 output_ref[i * output_channels() + oc] +=
240 input[i * input_stride() + ic] * kernel[oc * input_channels() + ic];
241 }
242 }
243 }
244
245 // Compute clamping parameters.
246 const int32_t accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
247 const int32_t accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
248
249 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
250 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
251
252 // Clamp reference results.
253 for (float& value : output_ref) {
254 value = std::max(std::min(value, output_max), output_min);
255 }
256
257 // Create, setup, run, and destroy Fully Connected operator.
258 ASSERT_EQ(xnn_status_success, xnn_initialize());
259 xnn_operator_t fully_connected_op = nullptr;
260
261 ASSERT_EQ(xnn_status_success,
262 xnn_create_fully_connected_nc_f32(
263 input_channels(), output_channels(),
264 input_stride(), output_stride(),
265 kernel.data(), bias.data(),
266 output_min, output_max,
267 0, &fully_connected_op));
268
269 // Smart pointer to automatically delete fully_connected_op.
270 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_fully_connected_op(fully_connected_op, xnn_delete_operator);
271
272 ASSERT_EQ(xnn_status_success,
273 xnn_setup_fully_connected_nc_f32(
274 fully_connected_op,
275 batch_size(),
276 input.data(), output.data(),
277 nullptr /* thread pool */));
278
279 ASSERT_EQ(xnn_status_success,
280 xnn_run_operator(fully_connected_op, nullptr /* thread pool */));
281
282 // Verify results.
283 for (size_t i = 0; i < batch_size(); i++) {
284 for (size_t c = 0; c < output_channels(); c++) {
285 ASSERT_LE(output[i * output_stride() + c], output_max)
286 << "batch index = " << i << ", channel = " << c;
287 ASSERT_GE(output[i * output_stride() + c], output_min)
288 << "batch index = " << i << ", channel = " << c;
289 ASSERT_NEAR(
290 output_ref[i * output_channels() + c],
291 output[i * output_stride() + c],
292 1.0e-4 * std::abs(output_ref[i * output_channels() + c]))
293 << "batch index = " << i << ", channel = " << c;
294 }
295 }
296 }
297 }
298
299 private:
300 size_t input_channels_{1};
301 size_t input_stride_{0};
302 size_t output_channels_{1};
303 size_t output_stride_{0};
304 size_t batch_size_{1};
305 uint8_t qmin_{0};
306 uint8_t qmax_{255};
307 size_t iterations_{1};
308};