blob: e637fac1703df60c58b1a9dce0660cfa083ef5b2 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstddef>
17#include <cstdlib>
18#include <functional>
19#include <random>
20#include <vector>
21
22#include <xnnpack.h>
23
24
25class ConvolutionOperatorTester {
26 public:
Marat Dukhan8440fde2019-10-24 12:46:13 -070027 inline ConvolutionOperatorTester& padding_tf_same(bool padding_same) {
28 if (padding_same) {
29 assert(padding_top() == 0);
30 assert(padding_left() == 0);
31 assert(padding_bottom() == 0);
32 assert(padding_right() == 0);
33 }
34 this->padding_tf_same_ = padding_same;
35 return *this;
36 }
37
38 inline bool padding_tf_same() const {
39 return this->padding_tf_same_;
40 }
41
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 inline ConvolutionOperatorTester& padding(uint32_t padding) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070043 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 this->padding_top_ = padding;
45 this->padding_right_ = padding;
46 this->padding_bottom_ = padding;
47 this->padding_left_ = padding;
48 return *this;
49 }
50
51 inline ConvolutionOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070052 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070053 this->padding_top_ = padding_height;
54 this->padding_right_ = padding_width;
55 this->padding_bottom_ = padding_height;
56 this->padding_left_ = padding_width;
57 return *this;
58 }
59
60 inline ConvolutionOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070061 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070062 this->padding_top_ = padding_height;
63 this->padding_bottom_ = padding_height;
64 return *this;
65 }
66
67 inline ConvolutionOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070068 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070069 this->padding_right_ = padding_width;
70 this->padding_left_ = padding_width;
71 return *this;
72 }
73
74 inline ConvolutionOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070075 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070076 this->padding_top_ = padding_top;
77 return *this;
78 }
79
80 inline uint32_t padding_top() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070081 if (padding_tf_same()) {
82 const uint32_t total_padding_height =
83 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
84 return total_padding_height / 2;
85 } else {
86 return this->padding_top_;
87 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070088 }
89
90 inline ConvolutionOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070091 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070092 this->padding_left_ = padding_left;
93 return *this;
94 }
95
96 inline uint32_t padding_left() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070097 if (padding_tf_same()) {
98 const uint32_t total_padding_width =
99 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
100 return total_padding_width / 2;
101 } else {
102 return this->padding_left_;
103 }
104 }
105
106 inline ConvolutionOperatorTester& padding_bottom(uint32_t padding_bottom) {
107 assert(!padding_tf_same());
108 this->padding_bottom_ = padding_bottom;
109 return *this;
110 }
111
112 inline uint32_t padding_bottom() const {
113 if (padding_tf_same()) {
114 const uint32_t total_padding_height =
115 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
116 return total_padding_height - total_padding_height / 2;
117 } else {
118 return this->padding_bottom_;
119 }
120 }
121
122 inline ConvolutionOperatorTester& padding_right(uint32_t padding_right) {
123 assert(!padding_tf_same());
124 this->padding_right_ = padding_right;
125 return *this;
126 }
127
128 inline uint32_t padding_right() const {
129 if (padding_tf_same()) {
130 const uint32_t total_padding_width =
131 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
132 return total_padding_width - total_padding_width / 2;
133 } else {
134 return this->padding_right_;
135 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700136 }
137
138 inline ConvolutionOperatorTester& input_size(uint32_t input_height, uint32_t input_width) {
139 assert(input_height >= 1);
140 assert(input_width >= 1);
141 this->input_height_ = input_height;
142 this->input_width_ = input_width;
143 return *this;
144 }
145
146 inline ConvolutionOperatorTester& input_height(uint32_t input_height) {
147 assert(input_height >= 1);
148 this->input_height_ = input_height;
149 return *this;
150 }
151
152 inline uint32_t input_height() const {
153 return this->input_height_;
154 }
155
156 inline ConvolutionOperatorTester& input_width(uint32_t input_width) {
157 assert(input_width >= 1);
158 this->input_width_ = input_width;
159 return *this;
160 }
161
162 inline uint32_t input_width() const {
163 return this->input_width_;
164 }
165
166 inline ConvolutionOperatorTester& groups(uint32_t groups) {
167 assert(groups >= 1);
168 this->groups_ = groups;
169 return *this;
170 }
171
172 inline uint32_t groups() const {
173 return this->groups_;
174 }
175
176 inline ConvolutionOperatorTester& group_input_channels(size_t group_input_channels) {
177 assert(group_input_channels >= 1);
178 this->group_input_channels_ = group_input_channels;
179 return *this;
180 }
181
182 inline size_t group_input_channels() const {
183 return this->group_input_channels_;
184 }
185
186 inline ConvolutionOperatorTester& group_output_channels(size_t group_output_channels) {
187 assert(group_output_channels >= 1);
188 this->group_output_channels_ = group_output_channels;
189 return *this;
190 }
191
192 inline size_t group_output_channels() const {
193 return this->group_output_channels_;
194 }
195
196 inline ConvolutionOperatorTester& batch_size(size_t batch_size) {
197 assert(batch_size >= 1);
198 this->batch_size_ = batch_size;
199 return *this;
200 }
201
202 inline size_t batch_size() const {
203 return this->batch_size_;
204 }
205
206 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_size) {
207 assert(kernel_size >= 1);
208 this->kernel_height_ = kernel_size;
209 this->kernel_width_ = kernel_size;
210 return *this;
211 }
212
213 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_height, uint32_t kernel_width) {
214 assert(kernel_height >= 1);
215 assert(kernel_width >= 1);
216 this->kernel_height_ = kernel_height;
217 this->kernel_width_ = kernel_width;
218 return *this;
219 }
220
221 inline ConvolutionOperatorTester& kernel_height(uint32_t kernel_height) {
222 assert(kernel_height >= 1);
223 this->kernel_height_ = kernel_height;
224 return *this;
225 }
226
227 inline uint32_t kernel_height() const {
228 return this->kernel_height_;
229 }
230
231 inline ConvolutionOperatorTester& kernel_width(uint32_t kernel_width) {
232 assert(kernel_width >= 1);
233 this->kernel_width_ = kernel_width;
234 return *this;
235 }
236
237 inline uint32_t kernel_width() const {
238 return this->kernel_width_;
239 }
240
241 inline ConvolutionOperatorTester& dilation(uint32_t dilation) {
242 assert(dilation >= 1);
243 this->dilation_height_ = dilation;
244 this->dilation_width_ = dilation;
245 return *this;
246 }
247
248 inline ConvolutionOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
249 assert(dilation_height >= 1);
250 assert(dilation_width >= 1);
251 this->dilation_height_ = dilation_height;
252 this->dilation_width_ = dilation_width;
253 return *this;
254 }
255
256 inline ConvolutionOperatorTester& dilation_height(uint32_t dilation_height) {
257 assert(dilation_height >= 1);
258 this->dilation_height_ = dilation_height;
259 return *this;
260 }
261
262 inline uint32_t dilation_height() const {
263 return this->dilation_height_;
264 }
265
266 inline ConvolutionOperatorTester& dilation_width(uint32_t dilation_width) {
267 assert(dilation_width >= 1);
268 this->dilation_width_ = dilation_width;
269 return *this;
270 }
271
272 inline uint32_t dilation_width() const {
273 return this->dilation_width_;
274 }
275
276 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling) {
277 assert(subsampling >= 1);
278 this->subsampling_height_ = subsampling;
279 this->subsampling_width_ = subsampling;
280 return *this;
281 }
282
283 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling_height, uint32_t subsampling_width) {
284 assert(subsampling_height >= 1);
285 assert(subsampling_width >= 1);
286 this->subsampling_height_ = subsampling_height;
287 this->subsampling_width_ = subsampling_width;
288 return *this;
289 }
290
291 inline ConvolutionOperatorTester& subsampling_height(uint32_t subsampling_height) {
292 assert(subsampling_height >= 1);
293 this->subsampling_height_ = subsampling_height;
294 return *this;
295 }
296
297 inline uint32_t subsampling_height() const {
298 return this->subsampling_height_;
299 }
300
301 inline ConvolutionOperatorTester& subsampling_width(uint32_t subsampling_width) {
302 assert(subsampling_width >= 1);
303 this->subsampling_width_ = subsampling_width;
304 return *this;
305 }
306
307 inline uint32_t subsampling_width() const {
308 return this->subsampling_width_;
309 }
310
311 inline ConvolutionOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
312 assert(input_pixel_stride >= 1);
313 this->input_pixel_stride_ = input_pixel_stride;
314 return *this;
315 }
316
317 inline size_t input_pixel_stride() const {
318 if (this->input_pixel_stride_ == 0) {
319 return group_input_channels() * groups();
320 } else {
321 assert(this->input_pixel_stride_ >= group_input_channels() * groups());
322 return this->input_pixel_stride_;
323 }
324 }
325
326 inline ConvolutionOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
327 assert(output_pixel_stride >= 1);
328 this->output_pixel_stride_ = output_pixel_stride;
329 return *this;
330 }
331
332 inline size_t output_pixel_stride() const {
333 if (this->output_pixel_stride_ == 0) {
334 return group_output_channels() * groups();
335 } else {
336 assert(this->output_pixel_stride_ >= group_output_channels() * groups());
337 return this->output_pixel_stride_;
338 }
339 }
340
341 inline uint32_t dilated_kernel_height() const {
342 return (kernel_height() - 1) * dilation_height() + 1;
343 }
344
345 inline uint32_t dilated_kernel_width() const {
346 return (kernel_width() - 1) * dilation_width() + 1;
347 }
348
349 inline size_t output_height() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700350 if (padding_tf_same()) {
351 return (input_height() + subsampling_height() - 1) / subsampling_height();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700353 const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
354 if (padded_input_height <= dilated_kernel_height()) {
355 return 1;
356 } else {
357 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
358 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700359 }
360 }
361
362 inline size_t output_width() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700363 if (padding_tf_same()) {
364 return (input_width() + subsampling_width() - 1) / subsampling_width();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700365 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700366 const size_t padded_input_width = padding_left() + input_width() + padding_right();
367 if (padded_input_width <= dilated_kernel_width()) {
368 return 1;
369 } else {
370 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
371 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700372 }
373 }
374
375 inline ConvolutionOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
376 assert(next_input_height >= 1);
377 assert(next_input_width >= 1);
378 this->next_input_height_ = next_input_height;
379 this->next_input_width_ = next_input_width;
380 return *this;
381 }
382
383 inline ConvolutionOperatorTester& next_input_height(uint32_t next_input_height) {
384 assert(next_input_height >= 1);
385 this->next_input_height_ = next_input_height;
386 return *this;
387 }
388
389 inline uint32_t next_input_height() const {
390 if (this->next_input_height_ == 0) {
391 return input_height();
392 } else {
393 return this->next_input_height_;
394 }
395 }
396
397 inline ConvolutionOperatorTester& next_input_width(uint32_t next_input_width) {
398 assert(next_input_width >= 1);
399 this->next_input_width_ = next_input_width;
400 return *this;
401 }
402
403 inline uint32_t next_input_width() const {
404 if (this->next_input_width_ == 0) {
405 return input_width();
406 } else {
407 return this->next_input_width_;
408 }
409 }
410
411 inline size_t next_output_height() const {
412 const size_t padded_input_height = padding_top() + next_input_height() + padding_bottom();
413 if (padded_input_height <= dilated_kernel_height()) {
414 return 1;
415 } else {
416 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
417 }
418 }
419
420 inline size_t next_output_width() const {
421 const size_t padded_input_width = padding_left() + next_input_width() + padding_right();
422 if (padded_input_width <= dilated_kernel_width()) {
423 return 1;
424 } else {
425 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
426 }
427 }
428
429 inline ConvolutionOperatorTester& next_batch_size(size_t next_batch_size) {
430 assert(next_batch_size >= 1);
431 this->next_batch_size_ = next_batch_size;
432 return *this;
433 }
434
435 inline size_t next_batch_size() const {
436 if (this->next_batch_size_ == 0) {
437 return batch_size();
438 } else {
439 return this->next_batch_size_;
440 }
441 }
442
443 inline ConvolutionOperatorTester& qmin(uint8_t qmin) {
444 this->qmin_ = qmin;
445 return *this;
446 }
447
448 inline uint8_t qmin() const {
449 return this->qmin_;
450 }
451
452 inline ConvolutionOperatorTester& qmax(uint8_t qmax) {
453 this->qmax_ = qmax;
454 return *this;
455 }
456
457 inline uint8_t qmax() const {
458 return this->qmax_;
459 }
460
461 inline ConvolutionOperatorTester& depthwise_layout(bool depthwise_layout) {
462 this->depthwise_layout_ = depthwise_layout;
463 return *this;
464 }
465
466 inline bool depthwise_layout() const {
467 return this->depthwise_layout_;
468 }
469
470 inline ConvolutionOperatorTester& iterations(size_t iterations) {
471 this->iterations_ = iterations;
472 return *this;
473 }
474
475 inline size_t iterations() const {
476 return this->iterations_;
477 }
478
479 void TestQ8() const {
480 std::random_device random_device;
481 auto rng = std::mt19937(random_device());
482 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
483 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
484
485 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
486 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()) + 8);
487 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
488 std::vector<int32_t> bias(groups() * group_output_channels());
489 std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
490 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
491 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
492
493 const uint8_t input_zero_point = 127;
494 const uint8_t kernel_zero_point = 127;
495
496 for (size_t iteration = 0; iteration < iterations(); iteration++) {
497 std::generate(input.begin(), input.end(), std::ref(u8rng));
498 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
499 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
500 std::fill(output.begin(), output.end(), 0xA5);
501
502 // Compute reference results, without renormalization.
503 for (size_t i = 0; i < batch_size(); i++) {
504 for (size_t oy = 0; oy < output_height(); oy++) {
505 for (size_t ox = 0; ox < output_width(); ox++) {
506 for (size_t g = 0; g < groups(); g++) {
507 for (size_t oc = 0; oc < group_output_channels(); oc++) {
508 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
509 bias[g * group_output_channels() + oc];
510 }
511 }
512 }
513 }
514 }
515 if (depthwise_layout()) {
516 ASSERT_EQ(group_input_channels(), 1);
517
518 for (size_t i = 0; i < batch_size(); i++) {
519 for (size_t oy = 0; oy < output_height(); oy++) {
520 for (size_t ox = 0; ox < output_width(); ox++) {
521 for (size_t ky = 0; ky < kernel_height(); ky++) {
522 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
523 if (iy < input_height()) {
524 for (size_t kx = 0; kx < kernel_width(); kx++) {
525 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
526 if (ix < input_width()) {
527 for (size_t g = 0; g < groups(); g++) {
528 for (size_t oc = 0; oc < group_output_channels(); oc++) {
529 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
530 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g]) - int32_t(input_zero_point)) *
531 (int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));
532 }
533 }
534 }
535 }
536 }
537 }
538 }
539 }
540 }
541 } else {
542 for (size_t i = 0; i < batch_size(); i++) {
543 for (size_t oy = 0; oy < output_height(); oy++) {
544 for (size_t ox = 0; ox < output_width(); ox++) {
545 for (size_t ky = 0; ky < kernel_height(); ky++) {
546 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
547 if (iy < input_height()) {
548 for (size_t kx = 0; kx < kernel_width(); kx++) {
549 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
550 if (ix < input_width()) {
551 for (size_t g = 0; g < groups(); g++) {
552 for (size_t oc = 0; oc < group_output_channels(); oc++) {
553 for (size_t ic = 0; ic < group_input_channels(); ic++) {
554 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
555 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
556 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
557 }
558 }
559 }
560 }
561 }
562 }
563 }
564 }
565 }
566 }
567 }
568
569 // Compute renormalization parameters.
570 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
571 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
572
573 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
574 const uint8_t output_zero_point = uint8_t(std::max(std::min(
575 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
576 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
577
578 // Renormalize reference results.
579 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
580 [this, output_scale, output_zero_point](int32_t x) -> double {
581 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
582 });
583
584 // Create, setup, run, and destroy Convolution operator.
585 ASSERT_EQ(xnn_status_success, xnn_initialize());
586 xnn_operator_t convolution_op = nullptr;
587
588 ASSERT_EQ(xnn_status_success,
589 xnn_create_convolution2d_nhwc_q8(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700590 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
591 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700592 kernel_height(), kernel_width(),
593 subsampling_height(), subsampling_width(),
594 dilation_height(), dilation_width(),
595 groups(), group_input_channels(), group_output_channels(),
596 input_pixel_stride(), output_pixel_stride(),
597 input_zero_point, 1.0f /* input scale */,
598 kernel_zero_point, 1.0f /* kernel scale */,
599 kernel.data(), bias.data(),
600 output_zero_point, output_scale, qmin(), qmax(),
Marat Dukhan8440fde2019-10-24 12:46:13 -0700601 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700602 &convolution_op));
603
604 // Smart pointer to automatically delete convolution_op.
605 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
606
607 ASSERT_EQ(xnn_status_success,
608 xnn_setup_convolution2d_nhwc_q8(
609 convolution_op,
610 batch_size(), input_height(), input_width(),
611 input.data(), output.data(),
612 nullptr /* thread pool */));
613
614 ASSERT_EQ(xnn_status_success,
615 xnn_run_operator(convolution_op, nullptr /* thread pool */));
616
617 // Verify results.
618 for (size_t i = 0; i < batch_size(); i++) {
619 for (size_t y = 0; y < output_height(); y++) {
620 for (size_t x = 0; x < output_width(); x++) {
621 for (size_t g = 0; g < groups(); g++) {
622 for (size_t c = 0; c < group_output_channels(); c++) {
623 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
624 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
625 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
626 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
627 ASSERT_NEAR(
628 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
629 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
630 0.9)
631 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
632 }
633 }
634 }
635 }
636 }
637 }
638 }
639
640 void TestF32() const {
641 std::random_device random_device;
642 auto rng = std::mt19937(random_device());
643 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
644
645 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
646 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));
647 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
648 std::vector<float> bias(groups() * group_output_channels());
649 std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
650 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
651
652 for (size_t iteration = 0; iteration < iterations(); iteration++) {
653 std::generate(input.begin(), input.end(), std::ref(f32rng));
654 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
655 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
656 std::fill(output.begin(), output.end(), nanf(""));
657
658 // Compute reference results, without clamping.
659 for (size_t i = 0; i < batch_size(); i++) {
660 for (size_t oy = 0; oy < output_height(); oy++) {
661 for (size_t ox = 0; ox < output_width(); ox++) {
662 for (size_t g = 0; g < groups(); g++) {
663 for (size_t oc = 0; oc < group_output_channels(); oc++) {
664 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
665 bias[g * group_output_channels() + oc];
666 }
667 }
668 }
669 }
670 }
671 if (depthwise_layout()) {
672 ASSERT_EQ(group_input_channels(), 1);
673
674 for (size_t i = 0; i < batch_size(); i++) {
675 for (size_t oy = 0; oy < output_height(); oy++) {
676 for (size_t ox = 0; ox < output_width(); ox++) {
677 for (size_t ky = 0; ky < kernel_height(); ky++) {
678 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
679 if (iy < input_height()) {
680 for (size_t kx = 0; kx < kernel_width(); kx++) {
681 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
682 if (ix < input_width()) {
683 for (size_t g = 0; g < groups(); g++) {
684 for (size_t oc = 0; oc < group_output_channels(); oc++) {
685 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
686 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g] *
687 kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];
688 }
689 }
690 }
691 }
692 }
693 }
694 }
695 }
696 }
697 } else {
698 for (size_t i = 0; i < batch_size(); i++) {
699 for (size_t oy = 0; oy < output_height(); oy++) {
700 for (size_t ox = 0; ox < output_width(); ox++) {
701 for (size_t ky = 0; ky < kernel_height(); ky++) {
702 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
703 if (iy < input_height()) {
704 for (size_t kx = 0; kx < kernel_width(); kx++) {
705 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
706 if (ix < input_width()) {
707 for (size_t g = 0; g < groups(); g++) {
708 for (size_t oc = 0; oc < group_output_channels(); oc++) {
709 for (size_t ic = 0; ic < group_input_channels(); ic++) {
710 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
711 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
712 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
713 }
714 }
715 }
716 }
717 }
718 }
719 }
720 }
721 }
722 }
723 }
724
725 // Compute clamping parameters.
726 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
727 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
728
729 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
730 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
731
732 // Clamp reference results.
733 for (float& value : output_ref) {
734 value = std::max(std::min(value, output_max), output_min);
735 }
736
737 // Create, setup, run, and destroy Convolution operator.
738 ASSERT_EQ(xnn_status_success, xnn_initialize());
739 xnn_operator_t convolution_op = nullptr;
740
741 ASSERT_EQ(xnn_status_success,
742 xnn_create_convolution2d_nhwc_f32(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700743 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
744 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700745 kernel_height(), kernel_width(),
746 subsampling_height(), subsampling_width(),
747 dilation_height(), dilation_width(),
748 groups(), group_input_channels(), group_output_channels(),
749 input_pixel_stride(), output_pixel_stride(),
750 kernel.data(), bias.data(),
751 output_min, output_max,
Marat Dukhan8440fde2019-10-24 12:46:13 -0700752 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700753 &convolution_op));
754
755 // Smart pointer to automatically delete convolution_op.
756 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
757
758 ASSERT_EQ(xnn_status_success,
759 xnn_setup_convolution2d_nhwc_f32(
760 convolution_op,
761 batch_size(), input_height(), input_width(),
762 input.data(), output.data(),
763 nullptr /* thread pool */));
764
765 ASSERT_EQ(xnn_status_success,
766 xnn_run_operator(convolution_op, nullptr /* thread pool */));
767
768 // Verify results.
769 for (size_t i = 0; i < batch_size(); i++) {
770 for (size_t y = 0; y < output_height(); y++) {
771 for (size_t x = 0; x < output_width(); x++) {
772 for (size_t g = 0; g < groups(); g++) {
773 for (size_t c = 0; c < group_output_channels(); c++) {
774 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
775 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
776 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
777 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
778 ASSERT_NEAR(
779 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
780 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
781 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
782 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
783 }
784 }
785 }
786 }
787 }
788 }
789 }
790
791 void TestSetupQ8() const {
792 ASSERT_FALSE(depthwise_layout());
793
794 std::random_device random_device;
795 auto rng = std::mt19937(random_device());
796 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
797 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
798
799 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
800 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
801 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())) + 8);
802 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
803 std::vector<int32_t> bias(groups() * group_output_channels());
804 std::vector<uint8_t> output(std::max(
805 batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
806 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
807 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
808 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
809 std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
810 std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
811
812 const uint8_t input_zero_point = 127;
813 const uint8_t kernel_zero_point = 127;
814
815 for (size_t iteration = 0; iteration < iterations(); iteration++) {
816 std::generate(input.begin(), input.end(), std::ref(u8rng));
817 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
818 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
819 std::fill(output.begin(), output.end(), 0xA5);
820
821 // Compute reference results, without renormalization.
822 for (size_t i = 0; i < batch_size(); i++) {
823 for (size_t oy = 0; oy < output_height(); oy++) {
824 for (size_t ox = 0; ox < output_width(); ox++) {
825 for (size_t g = 0; g < groups(); g++) {
826 for (size_t oc = 0; oc < group_output_channels(); oc++) {
827 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
828 bias[g * group_output_channels() + oc];
829 }
830 }
831 }
832 }
833 }
834 for (size_t i = 0; i < batch_size(); i++) {
835 for (size_t oy = 0; oy < output_height(); oy++) {
836 for (size_t ox = 0; ox < output_width(); ox++) {
837 for (size_t ky = 0; ky < kernel_height(); ky++) {
838 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
839 if (iy < input_height()) {
840 for (size_t kx = 0; kx < kernel_width(); kx++) {
841 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
842 if (ix < input_width()) {
843 for (size_t g = 0; g < groups(); g++) {
844 for (size_t oc = 0; oc < group_output_channels(); oc++) {
845 for (size_t ic = 0; ic < group_input_channels(); ic++) {
846 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
847 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
848 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
849 }
850 }
851 }
852 }
853 }
854 }
855 }
856 }
857 }
858 }
859
860 // Compute renormalization parameters.
861 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
862 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
863
864 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
865 const uint8_t output_zero_point = uint8_t(std::max(std::min(
866 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
867 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
868
869 // Renormalize reference results.
870 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
871 [this, output_scale, output_zero_point](int32_t x) -> double {
872 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
873 });
874
875 // Create, setup, and run Convolution operator once.
876 ASSERT_EQ(xnn_status_success, xnn_initialize());
877 xnn_operator_t convolution_op = nullptr;
878
879 ASSERT_EQ(xnn_status_success,
880 xnn_create_convolution2d_nhwc_q8(
881 padding_top(), padding_right(), padding_bottom(), padding_left(),
882 kernel_height(), kernel_width(),
883 subsampling_height(), subsampling_width(),
884 dilation_height(), dilation_width(),
885 groups(), group_input_channels(), group_output_channels(),
886 input_pixel_stride(), output_pixel_stride(),
887 input_zero_point, 1.0f /* input scale */,
888 kernel_zero_point, 1.0f /* kernel scale */,
889 kernel.data(), bias.data(),
890 output_zero_point, output_scale, qmin(), qmax(),
891 0, &convolution_op));
892
893 // Smart pointer to automatically delete convolution_op.
894 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
895
896 ASSERT_EQ(xnn_status_success,
897 xnn_setup_convolution2d_nhwc_q8(
898 convolution_op,
899 batch_size(), input_height(), input_width(),
900 input.data(), output.data(),
901 nullptr /* thread pool */));
902
903 ASSERT_EQ(xnn_status_success,
904 xnn_run_operator(convolution_op, nullptr /* thread pool */));
905
906 // Verify results of the first run.
907 for (size_t i = 0; i < batch_size(); i++) {
908 for (size_t y = 0; y < output_height(); y++) {
909 for (size_t x = 0; x < output_width(); x++) {
910 for (size_t g = 0; g < groups(); g++) {
911 for (size_t c = 0; c < group_output_channels(); c++) {
912 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
913 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
914 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
915 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
916 ASSERT_NEAR(
917 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
918 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
919 0.9)
920 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
921 }
922 }
923 }
924 }
925 }
926
927 // Re-generate data for the second run.
928 std::generate(input.begin(), input.end(), std::ref(u8rng));
929 std::fill(output.begin(), output.end(), 0xA5);
930
931 // Compute reference results for the second run, including renormalization.
932 for (size_t i = 0; i < next_batch_size(); i++) {
933 for (size_t oy = 0; oy < next_output_height(); oy++) {
934 for (size_t ox = 0; ox < next_output_width(); ox++) {
935 for (size_t g = 0; g < groups(); g++) {
936 for (size_t oc = 0; oc < group_output_channels(); oc++) {
937 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
938 bias[g * group_output_channels() + oc];
939 }
940 }
941 }
942 }
943 }
944 for (size_t i = 0; i < next_batch_size(); i++) {
945 for (size_t oy = 0; oy < next_output_height(); oy++) {
946 for (size_t ox = 0; ox < next_output_width(); ox++) {
947 for (size_t ky = 0; ky < kernel_height(); ky++) {
948 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
949 if (iy < next_input_height()) {
950 for (size_t kx = 0; kx < kernel_width(); kx++) {
951 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
952 if (ix < next_input_width()) {
953 for (size_t g = 0; g < groups(); g++) {
954 for (size_t oc = 0; oc < group_output_channels(); oc++) {
955 for (size_t ic = 0; ic < group_input_channels(); ic++) {
956 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
957 (int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
958 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
959 }
960 }
961 }
962 }
963 }
964 }
965 }
966 }
967 }
968 }
969 std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),
970 [this, output_scale, output_zero_point](int32_t x) -> double {
971 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
972 });
973
974 // Setup and run Convolution operator the second time, and destroy the operator.
975 ASSERT_EQ(xnn_status_success,
976 xnn_setup_convolution2d_nhwc_q8(
977 convolution_op,
978 next_batch_size(), next_input_height(), next_input_width(),
979 input.data(), output.data(),
980 nullptr /* thread pool */));
981
982 ASSERT_EQ(xnn_status_success,
983 xnn_run_operator(convolution_op, nullptr /* thread pool */));
984
985 // Verify results of the second run.
986 for (size_t i = 0; i < next_batch_size(); i++) {
987 for (size_t y = 0; y < next_output_height(); y++) {
988 for (size_t x = 0; x < next_output_width(); x++) {
989 for (size_t g = 0; g < groups(); g++) {
990 for (size_t c = 0; c < group_output_channels(); c++) {
991 ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
992 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
993 ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
994 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
995 ASSERT_NEAR(
996 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
997 double(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
998 0.9)
999 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1000 }
1001 }
1002 }
1003 }
1004 }
1005 }
1006 }
1007
1008 void TestSetupF32() const {
1009 ASSERT_FALSE(depthwise_layout());
1010
1011 std::random_device random_device;
1012 auto rng = std::mt19937(random_device());
1013 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
1014
1015 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
1016 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
1017 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())));
1018 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1019 std::vector<float> bias(groups() * group_output_channels());
1020 std::vector<float> output(std::max(
1021 batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
1022 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
1023 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1024 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1025
1026 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1027 std::generate(input.begin(), input.end(), std::ref(f32rng));
1028 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
1029 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
1030 std::fill(output.begin(), output.end(), nanf(""));
1031
1032 // Compute reference results, without clamping.
1033 for (size_t i = 0; i < batch_size(); i++) {
1034 for (size_t oy = 0; oy < output_height(); oy++) {
1035 for (size_t ox = 0; ox < output_width(); ox++) {
1036 for (size_t g = 0; g < groups(); g++) {
1037 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1038 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1039 bias[g * group_output_channels() + oc];
1040 }
1041 }
1042 }
1043 }
1044 }
1045 for (size_t i = 0; i < batch_size(); i++) {
1046 for (size_t oy = 0; oy < output_height(); oy++) {
1047 for (size_t ox = 0; ox < output_width(); ox++) {
1048 for (size_t ky = 0; ky < kernel_height(); ky++) {
1049 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1050 if (iy < input_height()) {
1051 for (size_t kx = 0; kx < kernel_width(); kx++) {
1052 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1053 if (ix < input_width()) {
1054 for (size_t g = 0; g < groups(); g++) {
1055 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1056 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1057 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1058 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1059 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1060 }
1061 }
1062 }
1063 }
1064 }
1065 }
1066 }
1067 }
1068 }
1069 }
1070
1071 // Compute clamping parameters.
1072 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1073 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1074
1075 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
1076 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
1077
1078 // Clamp reference results.
1079 for (float& value : output_ref) {
1080 value = std::max(std::min(value, output_max), output_min);
1081 }
1082
1083 // Create, setup, and run Convolution operator once.
1084 ASSERT_EQ(xnn_status_success, xnn_initialize());
1085 xnn_operator_t convolution_op = nullptr;
1086
1087 ASSERT_EQ(xnn_status_success,
1088 xnn_create_convolution2d_nhwc_f32(
1089 padding_top(), padding_right(), padding_bottom(), padding_left(),
1090 kernel_height(), kernel_width(),
1091 subsampling_height(), subsampling_width(),
1092 dilation_height(), dilation_width(),
1093 groups(), group_input_channels(), group_output_channels(),
1094 input_pixel_stride(), output_pixel_stride(),
1095 kernel.data(), bias.data(),
1096 output_min, output_max,
1097 0, &convolution_op));
1098
1099 // Smart pointer to automatically delete convolution_op.
1100 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1101
1102 ASSERT_EQ(xnn_status_success,
1103 xnn_setup_convolution2d_nhwc_f32(
1104 convolution_op,
1105 batch_size(), input_height(), input_width(),
1106 input.data(), output.data(),
1107 nullptr /* thread pool */));
1108
1109 ASSERT_EQ(xnn_status_success,
1110 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1111
1112 // Verify results of the first run.
1113 for (size_t i = 0; i < batch_size(); i++) {
1114 for (size_t y = 0; y < output_height(); y++) {
1115 for (size_t x = 0; x < output_width(); x++) {
1116 for (size_t g = 0; g < groups(); g++) {
1117 for (size_t c = 0; c < group_output_channels(); c++) {
1118 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1119 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1120 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1121 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1122 ASSERT_NEAR(
1123 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1124 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1125 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
1126 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1127 }
1128 }
1129 }
1130 }
1131 }
1132
1133 // Re-generate data for the second run.
1134 std::generate(input.begin(), input.end(), std::ref(f32rng));
1135 std::fill(output.begin(), output.end(), nanf(""));
1136
1137 // Compute reference results for the second run, including clamping.
1138 for (size_t i = 0; i < next_batch_size(); i++) {
1139 for (size_t oy = 0; oy < next_output_height(); oy++) {
1140 for (size_t ox = 0; ox < next_output_width(); ox++) {
1141 for (size_t g = 0; g < groups(); g++) {
1142 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1143 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1144 bias[g * group_output_channels() + oc];
1145 }
1146 }
1147 }
1148 }
1149 }
1150 for (size_t i = 0; i < next_batch_size(); i++) {
1151 for (size_t oy = 0; oy < next_output_height(); oy++) {
1152 for (size_t ox = 0; ox < next_output_width(); ox++) {
1153 for (size_t ky = 0; ky < kernel_height(); ky++) {
1154 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1155 if (iy < next_input_height()) {
1156 for (size_t kx = 0; kx < kernel_width(); kx++) {
1157 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1158 if (ix < next_input_width()) {
1159 for (size_t g = 0; g < groups(); g++) {
1160 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1161 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1162 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1163 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1164 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1165 }
1166 }
1167 }
1168 }
1169 }
1170 }
1171 }
1172 }
1173 }
1174 }
1175 for (float& value : next_output_ref) {
1176 value = std::max(std::min(value, output_max), output_min);
1177 }
1178
1179 // Setup and run Convolution operator the second time, and destroy the operator.
1180 ASSERT_EQ(xnn_status_success,
1181 xnn_setup_convolution2d_nhwc_f32(
1182 convolution_op,
1183 next_batch_size(), next_input_height(), next_input_width(),
1184 input.data(), output.data(),
1185 nullptr /* thread pool */));
1186
1187 ASSERT_EQ(xnn_status_success,
1188 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1189
1190 // Verify results of the second run.
1191 for (size_t i = 0; i < next_batch_size(); i++) {
1192 for (size_t y = 0; y < next_output_height(); y++) {
1193 for (size_t x = 0; x < next_output_width(); x++) {
1194 for (size_t g = 0; g < groups(); g++) {
1195 for (size_t c = 0; c < group_output_channels(); c++) {
1196 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1197 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1198 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1199 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1200 ASSERT_NEAR(
1201 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1202 output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1203 1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
1204 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1205 }
1206 }
1207 }
1208 }
1209 }
1210 }
1211 }
1212
1213 private:
1214 uint32_t padding_top_{0};
1215 uint32_t padding_right_{0};
1216 uint32_t padding_bottom_{0};
1217 uint32_t padding_left_{0};
Marat Dukhan8440fde2019-10-24 12:46:13 -07001218 bool padding_tf_same_{false};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001219 size_t input_height_{1};
1220 size_t input_width_{1};
1221 uint32_t groups_{1};
1222 size_t group_input_channels_{1};
1223 size_t input_pixel_stride_{0};
1224 size_t group_output_channels_{1};
1225 size_t output_pixel_stride_{0};
1226 size_t batch_size_{1};
1227 uint32_t kernel_height_{1};
1228 uint32_t kernel_width_{1};
1229 uint32_t dilation_height_{1};
1230 uint32_t dilation_width_{1};
1231 uint32_t subsampling_height_{1};
1232 uint32_t subsampling_width_{1};
1233 size_t next_input_height_{0};
1234 size_t next_input_width_{0};
1235 size_t next_batch_size_{0};
1236 uint8_t qmin_{0};
1237 uint8_t qmax_{255};
1238 bool depthwise_layout_{false};
1239 size_t iterations_{1};
1240};