blob: 9e9212b336df4f873f75eab626eddbfe712f7c24 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstddef>
17#include <cstdlib>
18#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070019#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <random>
21#include <vector>
22
23#include <xnnpack.h>
24
Marat Dukhan58717032020-04-28 15:03:28 -070025namespace {
26
27template<class T>
28inline T doz(T a, T b) {
29 return a > b ? a - b : T(0);
30}
31
32} // namespace
XNNPACK Teamb455b122019-09-27 18:10:33 -070033
34class DeconvolutionOperatorTester {
35 public:
Marat Dukhan58717032020-04-28 15:03:28 -070036 inline DeconvolutionOperatorTester& padding_tf_same(bool padding_same) {
37 if (padding_same) {
38 assert(padding_top() == 0);
39 assert(padding_left() == 0);
40 assert(padding_bottom() == 0);
41 assert(padding_right() == 0);
42 }
43 this->padding_tf_same_ = padding_same;
44 return *this;
45 }
46
47 inline bool padding_tf_same() const {
48 return this->padding_tf_same_;
49 }
50
XNNPACK Teamb455b122019-09-27 18:10:33 -070051 inline DeconvolutionOperatorTester& padding(uint32_t padding) {
Marat Dukhan58717032020-04-28 15:03:28 -070052 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070053 this->padding_top_ = padding;
54 this->padding_right_ = padding;
55 this->padding_bottom_ = padding;
56 this->padding_left_ = padding;
57 return *this;
58 }
59
60 inline DeconvolutionOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan58717032020-04-28 15:03:28 -070061 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070062 this->padding_top_ = padding_height;
63 this->padding_right_ = padding_width;
64 this->padding_bottom_ = padding_height;
65 this->padding_left_ = padding_width;
66 return *this;
67 }
68
69 inline DeconvolutionOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan58717032020-04-28 15:03:28 -070070 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070071 this->padding_top_ = padding_height;
72 this->padding_bottom_ = padding_height;
73 return *this;
74 }
75
76 inline uint32_t padding_height() const {
Marat Dukhan58717032020-04-28 15:03:28 -070077 if (padding_tf_same()) {
78 return doz(dilated_kernel_height() - 1, static_cast<uint32_t>((input_height() - 1) % stride_height()));
79 } else {
80 return this->padding_top_ + this->padding_bottom_;
81 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070082 }
83
84 inline DeconvolutionOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan58717032020-04-28 15:03:28 -070085 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070086 this->padding_right_ = padding_width;
87 this->padding_left_ = padding_width;
88 return *this;
89 }
90
91 inline uint32_t padding_width() const {
Marat Dukhan58717032020-04-28 15:03:28 -070092 if (padding_tf_same()) {
93 return doz(dilated_kernel_width() - 1, static_cast<uint32_t>((input_width() - 1) % stride_width()));
94 } else {
95 return this->padding_left_ + this->padding_right_;
96 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070097 }
98
99 inline DeconvolutionOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan58717032020-04-28 15:03:28 -0700100 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700101 this->padding_top_ = padding_top;
102 return *this;
103 }
104
105 inline uint32_t padding_top() const {
Marat Dukhan58717032020-04-28 15:03:28 -0700106 if (padding_tf_same()) {
107 return padding_height() / 2;
108 } else {
109 return this->padding_top_;
110 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700111 }
112
113 inline DeconvolutionOperatorTester& padding_right(uint32_t padding_right) {
Marat Dukhan58717032020-04-28 15:03:28 -0700114 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700115 this->padding_right_ = padding_right;
116 return *this;
117 }
118
119 inline uint32_t padding_right() const {
Marat Dukhan58717032020-04-28 15:03:28 -0700120 if (padding_tf_same()) {
121 return padding_width() - padding_left();
122 } else {
123 return this->padding_right_;
124 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 }
126
127 inline DeconvolutionOperatorTester& padding_bottom(uint32_t padding_bottom) {
Marat Dukhan58717032020-04-28 15:03:28 -0700128 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129 this->padding_bottom_ = padding_bottom;
130 return *this;
131 }
132
133 inline uint32_t padding_bottom() const {
Marat Dukhan58717032020-04-28 15:03:28 -0700134 if (padding_tf_same()) {
135 return padding_height() - padding_top();
136 } else {
137 return this->padding_bottom_;
138 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700139 }
140
141 inline DeconvolutionOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan58717032020-04-28 15:03:28 -0700142 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700143 this->padding_left_ = padding_left;
144 return *this;
145 }
146
147 inline uint32_t padding_left() const {
Marat Dukhan58717032020-04-28 15:03:28 -0700148 if (padding_tf_same()) {
149 return padding_width() / 2;
150 } else {
151 return this->padding_left_;
152 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700153 }
154
155 inline DeconvolutionOperatorTester& adjustment_height(uint32_t adjustment_height) {
156 this->adjustment_height_ = adjustment_height;
157 return *this;
158 }
159
160 inline uint32_t adjustment_height() const {
161 return this->adjustment_height_;
162 }
163
164 inline DeconvolutionOperatorTester& adjustment_width(uint32_t adjustment_width) {
165 this->adjustment_width_ = adjustment_width;
166 return *this;
167 }
168
169 inline uint32_t adjustment_width() const {
170 return this->adjustment_width_;
171 }
172
173 inline DeconvolutionOperatorTester& input_size(uint32_t input_height, uint32_t input_width) {
174 assert(input_height >= 1);
175 assert(input_width >= 1);
176 this->input_height_ = input_height;
177 this->input_width_ = input_width;
178 return *this;
179 }
180
181 inline DeconvolutionOperatorTester& input_height(uint32_t input_height) {
182 assert(input_height >= 1);
183 this->input_height_ = input_height;
184 return *this;
185 }
186
187 inline uint32_t input_height() const {
188 return this->input_height_;
189 }
190
191 inline DeconvolutionOperatorTester& input_width(uint32_t input_width) {
192 assert(input_width >= 1);
193 this->input_width_ = input_width;
194 return *this;
195 }
196
197 inline uint32_t input_width() const {
198 return this->input_width_;
199 }
200
201 inline DeconvolutionOperatorTester& groups(uint32_t groups) {
202 assert(groups >= 1);
203 this->groups_ = groups;
204 return *this;
205 }
206
207 inline uint32_t groups() const {
208 return this->groups_;
209 }
210
211 inline DeconvolutionOperatorTester& group_input_channels(size_t group_input_channels) {
212 assert(group_input_channels >= 1);
213 this->group_input_channels_ = group_input_channels;
214 return *this;
215 }
216
217 inline size_t group_input_channels() const {
218 return this->group_input_channels_;
219 }
220
221 inline DeconvolutionOperatorTester& group_output_channels(size_t group_output_channels) {
222 assert(group_output_channels >= 1);
223 this->group_output_channels_ = group_output_channels;
224 return *this;
225 }
226
227 inline size_t group_output_channels() const {
228 return this->group_output_channels_;
229 }
230
231 inline DeconvolutionOperatorTester& batch_size(size_t batch_size) {
232 assert(batch_size >= 1);
233 this->batch_size_ = batch_size;
234 return *this;
235 }
236
237 inline size_t batch_size() const {
238 return this->batch_size_;
239 }
240
241 inline DeconvolutionOperatorTester& kernel_size(uint32_t kernel_size) {
242 assert(kernel_size >= 1);
243 this->kernel_height_ = kernel_size;
244 this->kernel_width_ = kernel_size;
245 return *this;
246 }
247
248 inline DeconvolutionOperatorTester& kernel_size(uint32_t kernel_height, uint32_t kernel_width) {
249 assert(kernel_height >= 1);
250 assert(kernel_width >= 1);
251 this->kernel_height_ = kernel_height;
252 this->kernel_width_ = kernel_width;
253 return *this;
254 }
255
256 inline DeconvolutionOperatorTester& kernel_height(uint32_t kernel_height) {
257 assert(kernel_height >= 1);
258 this->kernel_height_ = kernel_height;
259 return *this;
260 }
261
262 inline uint32_t kernel_height() const {
263 return this->kernel_height_;
264 }
265
266 inline DeconvolutionOperatorTester& kernel_width(uint32_t kernel_width) {
267 assert(kernel_width >= 1);
268 this->kernel_width_ = kernel_width;
269 return *this;
270 }
271
272 inline uint32_t kernel_width() const {
273 return this->kernel_width_;
274 }
275
276 inline DeconvolutionOperatorTester& dilation(uint32_t dilation) {
277 assert(dilation >= 1);
278 this->dilation_height_ = dilation;
279 this->dilation_width_ = dilation;
280 return *this;
281 }
282
283 inline DeconvolutionOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
284 assert(dilation_height >= 1);
285 assert(dilation_width >= 1);
286 this->dilation_height_ = dilation_height;
287 this->dilation_width_ = dilation_width;
288 return *this;
289 }
290
291 inline DeconvolutionOperatorTester& dilation_height(uint32_t dilation_height) {
292 assert(dilation_height >= 1);
293 this->dilation_height_ = dilation_height;
294 return *this;
295 }
296
297 inline uint32_t dilation_height() const {
298 return this->dilation_height_;
299 }
300
301 inline DeconvolutionOperatorTester& dilation_width(uint32_t dilation_width) {
302 assert(dilation_width >= 1);
303 this->dilation_width_ = dilation_width;
304 return *this;
305 }
306
307 inline uint32_t dilation_width() const {
308 return this->dilation_width_;
309 }
310
311 inline DeconvolutionOperatorTester& stride(uint32_t stride) {
312 assert(stride >= 1);
313 this->stride_height_ = stride;
314 this->stride_width_ = stride;
315 return *this;
316 }
317
318 inline DeconvolutionOperatorTester& stride(uint32_t stride_height, uint32_t stride_width) {
319 assert(stride_height >= 1);
320 assert(stride_width >= 1);
321 this->stride_height_ = stride_height;
322 this->stride_width_ = stride_width;
323 return *this;
324 }
325
326 inline DeconvolutionOperatorTester& stride_height(uint32_t stride_height) {
327 assert(stride_height >= 1);
328 this->stride_height_ = stride_height;
329 return *this;
330 }
331
332 inline uint32_t stride_height() const {
333 return this->stride_height_;
334 }
335
336 inline DeconvolutionOperatorTester& stride_width(uint32_t stride_width) {
337 assert(stride_width >= 1);
338 this->stride_width_ = stride_width;
339 return *this;
340 }
341
342 inline uint32_t stride_width() const {
343 return this->stride_width_;
344 }
345
346 inline DeconvolutionOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
347 assert(input_pixel_stride >= 1);
348 this->input_pixel_stride_ = input_pixel_stride;
349 return *this;
350 }
351
352 inline size_t input_pixel_stride() const {
353 if (this->input_pixel_stride_ == 0) {
354 return group_input_channels() * groups();
355 } else {
356 assert(this->input_pixel_stride_ >= group_input_channels() * groups());
357 return this->input_pixel_stride_;
358 }
359 }
360
361 inline DeconvolutionOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
362 assert(output_pixel_stride >= 1);
363 this->output_pixel_stride_ = output_pixel_stride;
364 return *this;
365 }
366
367 inline size_t output_pixel_stride() const {
368 if (this->output_pixel_stride_ == 0) {
369 return group_output_channels() * groups();
370 } else {
371 assert(this->output_pixel_stride_ >= group_output_channels() * groups());
372 return this->output_pixel_stride_;
373 }
374 }
375
376 inline uint32_t dilated_kernel_height() const {
377 return (kernel_height() - 1) * dilation_height() + 1;
378 }
379
380 inline uint32_t dilated_kernel_width() const {
381 return (kernel_width() - 1) * dilation_width() + 1;
382 }
383
384 inline size_t output_height() const {
385 return stride_height() * (input_height() - 1) + adjustment_height() + dilated_kernel_height() - padding_height();
386 }
387
388 inline size_t output_width() const {
389 return stride_width() * (input_width() - 1) + adjustment_width() + dilated_kernel_width() - padding_width();
390 }
391
392 inline DeconvolutionOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
393 assert(next_input_height >= 1);
394 assert(next_input_width >= 1);
395 this->next_input_height_ = next_input_height;
396 this->next_input_width_ = next_input_width;
397 return *this;
398 }
399
400 inline DeconvolutionOperatorTester& next_input_height(uint32_t next_input_height) {
401 assert(next_input_height >= 1);
402 this->next_input_height_ = next_input_height;
403 return *this;
404 }
405
406 inline uint32_t next_input_height() const {
407 if (this->next_input_height_ == 0) {
408 return input_height();
409 } else {
410 return this->next_input_height_;
411 }
412 }
413
414 inline DeconvolutionOperatorTester& next_input_width(uint32_t next_input_width) {
415 assert(next_input_width >= 1);
416 this->next_input_width_ = next_input_width;
417 return *this;
418 }
419
420 inline uint32_t next_input_width() const {
421 if (this->next_input_width_ == 0) {
422 return input_width();
423 } else {
424 return this->next_input_width_;
425 }
426 }
427
428 inline size_t next_output_height() const {
429 return stride_height() * (next_input_height() - 1) + adjustment_height() + dilated_kernel_height() - padding_height();
430 }
431
432 inline size_t next_output_width() const {
433 return stride_width() * (next_input_width() - 1) + adjustment_width() + dilated_kernel_width() - padding_width();
434 }
435
436 inline DeconvolutionOperatorTester& next_batch_size(size_t next_batch_size) {
437 assert(next_batch_size >= 1);
438 this->next_batch_size_ = next_batch_size;
439 return *this;
440 }
441
442 inline size_t next_batch_size() const {
443 if (this->next_batch_size_ == 0) {
444 return batch_size();
445 } else {
446 return this->next_batch_size_;
447 }
448 }
449
450 inline DeconvolutionOperatorTester& qmin(uint8_t qmin) {
451 this->qmin_ = qmin;
452 return *this;
453 }
454
455 inline uint8_t qmin() const {
456 return this->qmin_;
457 }
458
459 inline DeconvolutionOperatorTester& qmax(uint8_t qmax) {
460 this->qmax_ = qmax;
461 return *this;
462 }
463
464 inline uint8_t qmax() const {
465 return this->qmax_;
466 }
467
Marat Dukhanf568f082019-10-30 09:47:07 -0700468 inline DeconvolutionOperatorTester& has_bias(bool has_bias) {
469 this->has_bias_ = has_bias;
470 return *this;
471 }
472
473 inline bool has_bias() const {
474 return this->has_bias_;
475 }
476
XNNPACK Teamb455b122019-09-27 18:10:33 -0700477 inline DeconvolutionOperatorTester& iterations(size_t iterations) {
478 this->iterations_ = iterations;
479 return *this;
480 }
481
482 inline size_t iterations() const {
483 return this->iterations_;
484 }
485
Marat Dukhan08b7a972020-07-14 18:17:29 -0700486 void TestQU8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700487 std::random_device random_device;
488 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -0700489 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700490 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700491
492 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
493 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels());
494 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
495 std::vector<int32_t> bias(groups() * group_output_channels());
496 std::vector<uint8_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels());
497 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
498 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
499
500 const uint8_t input_zero_point = 127;
501 const uint8_t kernel_zero_point = 127;
502
503 for (size_t iteration = 0; iteration < iterations(); iteration++) {
504 std::generate(input.begin(), input.end(), std::ref(u8rng));
505 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
Marat Dukhanecd83112020-08-03 21:50:28 -0700506 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700507 std::fill(output.begin(), output.end(), 0xA5);
508
509 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -0700510 if (has_bias()) {
511 for (size_t i = 0; i < batch_size(); i++) {
512 for (size_t oy = 0; oy < output_height(); oy++) {
513 for (size_t ox = 0; ox < output_width(); ox++) {
514 for (size_t g = 0; g < groups(); g++) {
515 for (size_t oc = 0; oc < group_output_channels(); oc++) {
516 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
517 bias[g * group_output_channels() + oc];
518 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700519 }
520 }
521 }
522 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700523 } else {
524 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700525 }
526 for (size_t i = 0; i < batch_size(); i++) {
527 for (size_t oy = 0; oy < output_height(); oy++) {
528 for (size_t ox = 0; ox < output_width(); ox++) {
529 for (size_t ky = 0; ky < kernel_height(); ky++) {
530 const size_t y = oy + padding_top() - ky * dilation_height();
531 const size_t iy = y / stride_height();
532 if (iy * stride_height() == y && iy < input_height()) {
533 for (size_t kx = 0; kx < kernel_width(); kx++) {
534 const size_t x = ox + padding_left() - kx * dilation_width();
535 const size_t ix = x / stride_width();
536 if (ix * stride_width() == x && ix < input_width()) {
537 for (size_t g = 0; g < groups(); g++) {
538 for (size_t oc = 0; oc < group_output_channels(); oc++) {
539 for (size_t ic = 0; ic < group_input_channels(); ic++) {
540 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
541 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
542 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
543 }
544 }
545 }
546 }
547 }
548 }
549 }
550 }
551 }
552 }
553
554 // Compute renormalization parameters.
555 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
556 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
557
558 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
559 const uint8_t output_zero_point = uint8_t(std::max(std::min(
560 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
561 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
562
563 // Renormalize reference results.
564 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
565 [this, output_scale, output_zero_point](int32_t x) -> double {
566 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
567 });
568
569 // Create, setup, run, and destroy Deconvolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800570 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700571 xnn_operator_t deconvolution_op = nullptr;
572
573 ASSERT_EQ(xnn_status_success,
Marat Dukhan08b7a972020-07-14 18:17:29 -0700574 xnn_create_deconvolution2d_nhwc_qu8(
Marat Dukhan58717032020-04-28 15:03:28 -0700575 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
576 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700577 kernel_height(), kernel_width(),
578 stride_height(), stride_width(),
579 dilation_height(), dilation_width(),
580 groups(), group_input_channels(), group_output_channels(),
581 input_pixel_stride(), output_pixel_stride(),
582 input_zero_point, 1.0f /* input scale */,
583 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -0700584 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700585 output_zero_point, output_scale, qmin(), qmax(),
Marat Dukhan58717032020-04-28 15:03:28 -0700586 padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
587 &deconvolution_op));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700588
589 // Smart pointer to automatically delete deconvolution_op.
590 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);
591
592 ASSERT_EQ(xnn_status_success,
Marat Dukhan08b7a972020-07-14 18:17:29 -0700593 xnn_setup_deconvolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700594 deconvolution_op,
595 batch_size(), input_height(), input_width(),
Marat Dukhan1898b912019-11-05 12:25:18 -0800596 adjustment_height(), adjustment_width(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700597 input.data(), output.data(),
598 nullptr /* thread pool */));
599
600 ASSERT_EQ(xnn_status_success,
601 xnn_run_operator(deconvolution_op, nullptr /* thread pool */));
602
603 // Verify results.
604 for (size_t i = 0; i < batch_size(); i++) {
605 for (size_t y = 0; y < output_height(); y++) {
606 for (size_t x = 0; x < output_width(); x++) {
607 for (size_t g = 0; g < groups(); g++) {
608 for (size_t c = 0; c < group_output_channels(); c++) {
609 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
610 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
611 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
612 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
613 ASSERT_NEAR(
614 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
615 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
616 0.9)
617 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
618 }
619 }
620 }
621 }
622 }
623 }
624 }
625
626 void TestF32() const {
627 std::random_device random_device;
628 auto rng = std::mt19937(random_device());
629 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
630
631 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
632 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels());
633 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
634 std::vector<float> bias(groups() * group_output_channels());
635 std::vector<float> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels());
636 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
637
638 for (size_t iteration = 0; iteration < iterations(); iteration++) {
639 std::generate(input.begin(), input.end(), std::ref(f32rng));
640 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
641 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
642 std::fill(output.begin(), output.end(), nanf(""));
643 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
644
645 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -0700646 if (has_bias()) {
647 for (size_t i = 0; i < batch_size(); i++) {
648 for (size_t oy = 0; oy < output_height(); oy++) {
649 for (size_t ox = 0; ox < output_width(); ox++) {
650 for (size_t g = 0; g < groups(); g++) {
651 for (size_t oc = 0; oc < group_output_channels(); oc++) {
652 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
653 bias[g * group_output_channels() + oc];
654 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700655 }
656 }
657 }
658 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700659 } else {
660 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700661 }
662 for (size_t i = 0; i < batch_size(); i++) {
663 for (size_t oy = 0; oy < output_height(); oy++) {
664 for (size_t ox = 0; ox < output_width(); ox++) {
665 for (size_t ky = 0; ky < kernel_height(); ky++) {
666 const size_t y = oy + padding_top() - ky * dilation_height();
667 const size_t iy = y / stride_height();
668 if (iy * stride_height() == y && iy < input_height()) {
669 for (size_t kx = 0; kx < kernel_width(); kx++) {
670 const size_t x = ox + padding_left() - kx * dilation_width();
671 const size_t ix = x / stride_width();
672 if (ix * stride_width() == x && ix < input_width()) {
673 for (size_t g = 0; g < groups(); g++) {
674 for (size_t oc = 0; oc < group_output_channels(); oc++) {
675 for (size_t ic = 0; ic < group_input_channels(); ic++) {
676 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
677 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
678 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
679 }
680 }
681 }
682 }
683 }
684 }
685 }
686 }
687 }
688 }
689
690 // Compute clamping parameters.
691 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
692 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
693
Marat Dukhan869c62d2020-04-09 17:17:55 -0700694 const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :
695 accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
696 const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :
697 accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700698
699 // Clamp reference results.
700 for (float& value : output_ref) {
701 value = std::max(std::min(value, output_max), output_min);
702 }
703
704 // Create, setup, run, and destroy Deconvolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800705 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700706 xnn_operator_t deconvolution_op = nullptr;
707
708 ASSERT_EQ(xnn_status_success,
709 xnn_create_deconvolution2d_nhwc_f32(
Marat Dukhan58717032020-04-28 15:03:28 -0700710 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
711 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700712 kernel_height(), kernel_width(),
713 stride_height(), stride_width(),
714 dilation_height(), dilation_width(),
715 groups(), group_input_channels(), group_output_channels(),
716 input_pixel_stride(), output_pixel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -0700717 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700718 output_min, output_max,
Marat Dukhan58717032020-04-28 15:03:28 -0700719 padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
720 &deconvolution_op));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700721
722 // Smart pointer to automatically delete deconvolution_op.
723 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);
724
725 ASSERT_EQ(xnn_status_success,
726 xnn_setup_deconvolution2d_nhwc_f32(
727 deconvolution_op,
728 batch_size(), input_height(), input_width(),
Marat Dukhan1898b912019-11-05 12:25:18 -0800729 adjustment_height(), adjustment_width(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700730 input.data(), output.data(),
731 nullptr /* thread pool */));
732
733 ASSERT_EQ(xnn_status_success,
734 xnn_run_operator(deconvolution_op, nullptr /* thread pool */));
735
736 // Verify results.
737 for (size_t i = 0; i < batch_size(); i++) {
738 for (size_t y = 0; y < output_height(); y++) {
739 for (size_t x = 0; x < output_width(); x++) {
740 for (size_t g = 0; g < groups(); g++) {
741 for (size_t c = 0; c < group_output_channels(); c++) {
742 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
743 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
744 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
745 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
746 ASSERT_NEAR(
747 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
748 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
749 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
750 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
751 }
752 }
753 }
754 }
755 }
756 }
757 }
758
Marat Dukhan08b7a972020-07-14 18:17:29 -0700759 void TestSetupQU8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700760 std::random_device random_device;
761 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -0700762 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700763 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700764
765 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
766 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels(),
767 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));
768 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
769 std::vector<int32_t> bias(groups() * group_output_channels());
770 std::vector<uint8_t> output(std::max(
771 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels(),
772 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
773 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
774 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
775 std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
776 std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
777
778 const uint8_t input_zero_point = 127;
779 const uint8_t kernel_zero_point = 127;
780
781 for (size_t iteration = 0; iteration < iterations(); iteration++) {
782 std::generate(input.begin(), input.end(), std::ref(u8rng));
783 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
Marat Dukhanecd83112020-08-03 21:50:28 -0700784 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700785 std::fill(output.begin(), output.end(), 0xA5);
786
787 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -0700788 if (has_bias()) {
789 for (size_t i = 0; i < batch_size(); i++) {
790 for (size_t oy = 0; oy < output_height(); oy++) {
791 for (size_t ox = 0; ox < output_width(); ox++) {
792 for (size_t g = 0; g < groups(); g++) {
793 for (size_t oc = 0; oc < group_output_channels(); oc++) {
794 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
795 bias[g * group_output_channels() + oc];
796 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700797 }
798 }
799 }
800 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700801 } else {
802 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700803 }
804 for (size_t i = 0; i < batch_size(); i++) {
805 for (size_t oy = 0; oy < output_height(); oy++) {
806 for (size_t ox = 0; ox < output_width(); ox++) {
807 for (size_t ky = 0; ky < kernel_height(); ky++) {
808 const size_t y = oy + padding_top() - ky * dilation_height();
809 const size_t iy = y / stride_height();
810 if (iy * stride_height() == y && iy < input_height()) {
811 for (size_t kx = 0; kx < kernel_width(); kx++) {
812 const size_t x = ox + padding_left() - kx * dilation_width();
813 const size_t ix = x / stride_width();
814 if (ix * stride_width() == x && ix < input_width()) {
815 for (size_t g = 0; g < groups(); g++) {
816 for (size_t oc = 0; oc < group_output_channels(); oc++) {
817 for (size_t ic = 0; ic < group_input_channels(); ic++) {
818 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
819 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
820 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
821 }
822 }
823 }
824 }
825 }
826 }
827 }
828 }
829 }
830 }
831
832 // Compute renormalization parameters.
833 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
834 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
835
836 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
837 const uint8_t output_zero_point = uint8_t(std::max(std::min(
838 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
839 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
840
841 // Renormalize reference results.
842 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
843 [this, output_scale, output_zero_point](int32_t x) -> double {
844 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
845 });
846
847 // Create, setup, and run Deconvolution operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800848 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700849 xnn_operator_t deconvolution_op = nullptr;
850
851 ASSERT_EQ(xnn_status_success,
Marat Dukhan08b7a972020-07-14 18:17:29 -0700852 xnn_create_deconvolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700853 padding_top(), padding_right(), padding_bottom(), padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700854 kernel_height(), kernel_width(),
855 stride_height(), stride_width(),
856 dilation_height(), dilation_width(),
857 groups(), group_input_channels(), group_output_channels(),
858 input_pixel_stride(), output_pixel_stride(),
859 input_zero_point, 1.0f /* input scale */,
860 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -0700861 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700862 output_zero_point, output_scale, qmin(), qmax(),
863 0, &deconvolution_op));
864
865 // Smart pointer to automatically delete deconvolution_op.
866 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);
867
868 ASSERT_EQ(xnn_status_success,
Marat Dukhan08b7a972020-07-14 18:17:29 -0700869 xnn_setup_deconvolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700870 deconvolution_op,
871 batch_size(), input_height(), input_width(),
Marat Dukhan1898b912019-11-05 12:25:18 -0800872 adjustment_height(), adjustment_width(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700873 input.data(), output.data(),
874 nullptr /* thread pool */));
875
876 ASSERT_EQ(xnn_status_success,
877 xnn_run_operator(deconvolution_op, nullptr /* thread pool */));
878
879 // Verify results of the first run.
880 for (size_t i = 0; i < batch_size(); i++) {
881 for (size_t y = 0; y < output_height(); y++) {
882 for (size_t x = 0; x < output_width(); x++) {
883 for (size_t g = 0; g < groups(); g++) {
884 for (size_t c = 0; c < group_output_channels(); c++) {
885 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
886 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
887 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
888 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
889 ASSERT_NEAR(
890 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
891 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
892 0.9)
893 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
894 }
895 }
896 }
897 }
898 }
899
900 // Re-generate data for the second run.
901 std::generate(input.begin(), input.end(), std::ref(u8rng));
902 std::fill(output.begin(), output.end(), 0xA5);
903
904 // Compute reference results for the second run, including renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -0700905 if (has_bias()) {
906 for (size_t i = 0; i < next_batch_size(); i++) {
907 for (size_t oy = 0; oy < next_output_height(); oy++) {
908 for (size_t ox = 0; ox < next_output_width(); ox++) {
909 for (size_t g = 0; g < groups(); g++) {
910 for (size_t oc = 0; oc < group_output_channels(); oc++) {
911 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
912 bias[g * group_output_channels() + oc];
913 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700914 }
915 }
916 }
917 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700918 } else {
919 std::fill(next_accumulators.begin(), next_accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700920 }
921 for (size_t i = 0; i < next_batch_size(); i++) {
922 for (size_t oy = 0; oy < next_output_height(); oy++) {
923 for (size_t ox = 0; ox < next_output_width(); ox++) {
924 for (size_t ky = 0; ky < kernel_height(); ky++) {
925 const size_t y = oy + padding_top() - ky * dilation_height();
926 const size_t iy = y / stride_height();
927 if (iy * stride_height() == y && iy < next_input_height()) {
928 for (size_t kx = 0; kx < kernel_width(); kx++) {
929 const size_t x = ox + padding_left() - kx * dilation_width();
930 const size_t ix = x / stride_width();
931 if (ix * stride_width() == x && ix < next_input_width()) {
932 for (size_t g = 0; g < groups(); g++) {
933 for (size_t oc = 0; oc < group_output_channels(); oc++) {
934 for (size_t ic = 0; ic < group_input_channels(); ic++) {
935 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
936 (int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
937 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
938 }
939 }
940 }
941 }
942 }
943 }
944 }
945 }
946 }
947 }
948 std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),
949 [this, output_scale, output_zero_point](int32_t x) -> double {
950 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
951 });
952
953 // Setup and run Deconvolution operator the second time, and destroy the operator.
954 ASSERT_EQ(xnn_status_success,
Marat Dukhan08b7a972020-07-14 18:17:29 -0700955 xnn_setup_deconvolution2d_nhwc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700956 deconvolution_op,
957 next_batch_size(), next_input_height(), next_input_width(),
Marat Dukhan1898b912019-11-05 12:25:18 -0800958 adjustment_height(), adjustment_width(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700959 input.data(), output.data(),
960 nullptr /* thread pool */));
961
962 ASSERT_EQ(xnn_status_success,
963 xnn_run_operator(deconvolution_op, nullptr /* thread pool */));
964
965 // Verify results of the second run.
966 for (size_t i = 0; i < next_batch_size(); i++) {
967 for (size_t y = 0; y < next_output_height(); y++) {
968 for (size_t x = 0; x < next_output_width(); x++) {
969 for (size_t g = 0; g < groups(); g++) {
970 for (size_t c = 0; c < group_output_channels(); c++) {
971 ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
972 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
973 ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
974 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
975 ASSERT_NEAR(
976 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
977 double(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
978 0.9)
979 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
980 }
981 }
982 }
983 }
984 }
985 }
986 }
987
988 void TestSetupF32() const {
989 std::random_device random_device;
990 auto rng = std::mt19937(random_device());
991 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
992
993 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
994 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels(),
995 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));
996 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
997 std::vector<float> bias(groups() * group_output_channels());
998 std::vector<float> output(std::max(
999 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels(),
1000 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
1001 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1002 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1003
1004 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1005 std::generate(input.begin(), input.end(), std::ref(f32rng));
1006 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
1007 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
1008 std::fill(output.begin(), output.end(), nanf(""));
1009
1010 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001011 if (has_bias()) {
1012 for (size_t i = 0; i < batch_size(); i++) {
1013 for (size_t oy = 0; oy < output_height(); oy++) {
1014 for (size_t ox = 0; ox < output_width(); ox++) {
1015 for (size_t g = 0; g < groups(); g++) {
1016 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1017 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1018 bias[g * group_output_channels() + oc];
1019 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001020 }
1021 }
1022 }
1023 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001024 } else {
1025 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001026 }
1027 for (size_t i = 0; i < batch_size(); i++) {
1028 for (size_t oy = 0; oy < output_height(); oy++) {
1029 for (size_t ox = 0; ox < output_width(); ox++) {
1030 for (size_t ky = 0; ky < kernel_height(); ky++) {
1031 const size_t y = oy + padding_top() - ky * dilation_height();
1032 const size_t iy = y / stride_height();
1033 if (iy * stride_height() == y && iy < input_height()) {
1034 for (size_t kx = 0; kx < kernel_width(); kx++) {
1035 const size_t x = ox + padding_left() - kx * dilation_width();
1036 const size_t ix = x / stride_width();
1037 if (ix * stride_width() == x && ix < input_width()) {
1038 for (size_t g = 0; g < groups(); g++) {
1039 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1040 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1041 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1042 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1043 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1044 }
1045 }
1046 }
1047 }
1048 }
1049 }
1050 }
1051 }
1052 }
1053 }
1054
1055 // Compute clamping parameters.
1056 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1057 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1058
1059 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
1060 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
1061
1062 // Clamp reference results.
1063 for (float& value : output_ref) {
1064 value = std::max(std::min(value, output_max), output_min);
1065 }
1066
1067 // Create, setup, and run Deconvolution operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001068 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001069 xnn_operator_t deconvolution_op = nullptr;
1070
1071 ASSERT_EQ(xnn_status_success,
1072 xnn_create_deconvolution2d_nhwc_f32(
1073 padding_top(), padding_right(), padding_bottom(), padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -07001074 kernel_height(), kernel_width(),
1075 stride_height(), stride_width(),
1076 dilation_height(), dilation_width(),
1077 groups(), group_input_channels(), group_output_channels(),
1078 input_pixel_stride(), output_pixel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -07001079 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001080 output_min, output_max,
1081 0, &deconvolution_op));
1082
1083 // Smart pointer to automatically delete deconvolution_op.
1084 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);
1085
1086 ASSERT_EQ(xnn_status_success,
1087 xnn_setup_deconvolution2d_nhwc_f32(
1088 deconvolution_op,
1089 batch_size(), input_height(), input_width(),
Marat Dukhan1898b912019-11-05 12:25:18 -08001090 adjustment_height(), adjustment_width(),
XNNPACK Teamb455b122019-09-27 18:10:33 -07001091 input.data(), output.data(),
1092 nullptr /* thread pool */));
1093
1094 ASSERT_EQ(xnn_status_success,
1095 xnn_run_operator(deconvolution_op, nullptr /* thread pool */));
1096
1097 // Verify results of the first run.
1098 for (size_t i = 0; i < batch_size(); i++) {
1099 for (size_t y = 0; y < output_height(); y++) {
1100 for (size_t x = 0; x < output_width(); x++) {
1101 for (size_t g = 0; g < groups(); g++) {
1102 for (size_t c = 0; c < group_output_channels(); c++) {
1103 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1104 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1105 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1106 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1107 ASSERT_NEAR(
1108 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1109 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1110 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
1111 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1112 }
1113 }
1114 }
1115 }
1116 }
1117
1118 // Re-generate data for the second run.
1119 std::generate(input.begin(), input.end(), std::ref(f32rng));
1120 std::fill(output.begin(), output.end(), nanf(""));
1121
1122 // Compute reference results for the second run, including clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001123 if (has_bias()) {
1124 for (size_t i = 0; i < next_batch_size(); i++) {
1125 for (size_t oy = 0; oy < next_output_height(); oy++) {
1126 for (size_t ox = 0; ox < next_output_width(); ox++) {
1127 for (size_t g = 0; g < groups(); g++) {
1128 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1129 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1130 bias[g * group_output_channels() + oc];
1131 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001132 }
1133 }
1134 }
1135 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001136 } else {
1137 std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001138 }
1139 for (size_t i = 0; i < next_batch_size(); i++) {
1140 for (size_t oy = 0; oy < next_output_height(); oy++) {
1141 for (size_t ox = 0; ox < next_output_width(); ox++) {
1142 for (size_t ky = 0; ky < kernel_height(); ky++) {
1143 const size_t y = oy + padding_top() - ky * dilation_height();
1144 const size_t iy = y / stride_height();
1145 if (iy * stride_height() == y && iy < next_input_height()) {
1146 for (size_t kx = 0; kx < kernel_width(); kx++) {
1147 const size_t x = ox + padding_left() - kx * dilation_width();
1148 const size_t ix = x / stride_width();
1149 if (ix * stride_width() == x && ix < next_input_width()) {
1150 for (size_t g = 0; g < groups(); g++) {
1151 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1152 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1153 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1154 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1155 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1156 }
1157 }
1158 }
1159 }
1160 }
1161 }
1162 }
1163 }
1164 }
1165 }
1166 for (float& value : next_output_ref) {
1167 value = std::max(std::min(value, output_max), output_min);
1168 }
1169
1170 // Setup and run Deconvolution operator the second time, and destroy the operator.
1171 ASSERT_EQ(xnn_status_success,
1172 xnn_setup_deconvolution2d_nhwc_f32(
1173 deconvolution_op,
1174 next_batch_size(), next_input_height(), next_input_width(),
Marat Dukhan1898b912019-11-05 12:25:18 -08001175 adjustment_height(), adjustment_width(),
XNNPACK Teamb455b122019-09-27 18:10:33 -07001176 input.data(), output.data(),
1177 nullptr /* thread pool */));
1178
1179 ASSERT_EQ(xnn_status_success,
1180 xnn_run_operator(deconvolution_op, nullptr /* thread pool */));
1181
1182 // Verify results of the second run.
1183 for (size_t i = 0; i < next_batch_size(); i++) {
1184 for (size_t y = 0; y < next_output_height(); y++) {
1185 for (size_t x = 0; x < next_output_width(); x++) {
1186 for (size_t g = 0; g < groups(); g++) {
1187 for (size_t c = 0; c < group_output_channels(); c++) {
1188 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1189 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1190 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1191 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1192 ASSERT_NEAR(
1193 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1194 output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1195 1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
1196 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1197 }
1198 }
1199 }
1200 }
1201 }
1202 }
1203 }
1204
1205 private:
1206 uint32_t padding_top_{0};
1207 uint32_t padding_right_{0};
1208 uint32_t padding_bottom_{0};
1209 uint32_t padding_left_{0};
Marat Dukhan58717032020-04-28 15:03:28 -07001210 bool padding_tf_same_{false};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001211 size_t input_height_{1};
1212 size_t input_width_{1};
1213 uint32_t groups_{1};
1214 size_t group_input_channels_{1};
1215 size_t input_pixel_stride_{0};
1216 size_t group_output_channels_{1};
1217 size_t output_pixel_stride_{0};
1218 size_t batch_size_{1};
1219 uint32_t kernel_height_{1};
1220 uint32_t kernel_width_{1};
1221 uint32_t adjustment_height_{0};
1222 uint32_t adjustment_width_{0};
1223 uint32_t dilation_height_{1};
1224 uint32_t dilation_width_{1};
1225 uint32_t stride_height_{1};
1226 uint32_t stride_width_{1};
1227 size_t next_input_height_{0};
1228 size_t next_input_width_{0};
1229 size_t next_batch_size_{0};
1230 uint8_t qmin_{0};
1231 uint8_t qmax_{255};
Marat Dukhanf568f082019-10-30 09:47:07 -07001232 bool has_bias_{true};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001233 size_t iterations_{1};
1234};