blob: cf97a08e0eed5e2ead39d86520f0abaf677698d5 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstddef>
17#include <cstdlib>
18#include <functional>
19#include <random>
20#include <vector>
21
22#include <xnnpack.h>
23
24
25class ConvolutionOperatorTester {
26 public:
Marat Dukhan8440fde2019-10-24 12:46:13 -070027 inline ConvolutionOperatorTester& padding_tf_same(bool padding_same) {
28 if (padding_same) {
29 assert(padding_top() == 0);
30 assert(padding_left() == 0);
31 assert(padding_bottom() == 0);
32 assert(padding_right() == 0);
33 }
34 this->padding_tf_same_ = padding_same;
35 return *this;
36 }
37
38 inline bool padding_tf_same() const {
39 return this->padding_tf_same_;
40 }
41
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 inline ConvolutionOperatorTester& padding(uint32_t padding) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070043 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 this->padding_top_ = padding;
45 this->padding_right_ = padding;
46 this->padding_bottom_ = padding;
47 this->padding_left_ = padding;
48 return *this;
49 }
50
51 inline ConvolutionOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070052 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070053 this->padding_top_ = padding_height;
54 this->padding_right_ = padding_width;
55 this->padding_bottom_ = padding_height;
56 this->padding_left_ = padding_width;
57 return *this;
58 }
59
60 inline ConvolutionOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070061 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070062 this->padding_top_ = padding_height;
63 this->padding_bottom_ = padding_height;
64 return *this;
65 }
66
67 inline ConvolutionOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070068 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070069 this->padding_right_ = padding_width;
70 this->padding_left_ = padding_width;
71 return *this;
72 }
73
74 inline ConvolutionOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070075 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070076 this->padding_top_ = padding_top;
77 return *this;
78 }
79
80 inline uint32_t padding_top() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070081 if (padding_tf_same()) {
82 const uint32_t total_padding_height =
83 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
84 return total_padding_height / 2;
85 } else {
86 return this->padding_top_;
87 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070088 }
89
90 inline ConvolutionOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070091 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070092 this->padding_left_ = padding_left;
93 return *this;
94 }
95
96 inline uint32_t padding_left() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070097 if (padding_tf_same()) {
98 const uint32_t total_padding_width =
99 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
100 return total_padding_width / 2;
101 } else {
102 return this->padding_left_;
103 }
104 }
105
106 inline ConvolutionOperatorTester& padding_bottom(uint32_t padding_bottom) {
107 assert(!padding_tf_same());
108 this->padding_bottom_ = padding_bottom;
109 return *this;
110 }
111
112 inline uint32_t padding_bottom() const {
113 if (padding_tf_same()) {
114 const uint32_t total_padding_height =
115 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
116 return total_padding_height - total_padding_height / 2;
117 } else {
118 return this->padding_bottom_;
119 }
120 }
121
122 inline ConvolutionOperatorTester& padding_right(uint32_t padding_right) {
123 assert(!padding_tf_same());
124 this->padding_right_ = padding_right;
125 return *this;
126 }
127
128 inline uint32_t padding_right() const {
129 if (padding_tf_same()) {
130 const uint32_t total_padding_width =
131 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
132 return total_padding_width - total_padding_width / 2;
133 } else {
134 return this->padding_right_;
135 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700136 }
137
138 inline ConvolutionOperatorTester& input_size(uint32_t input_height, uint32_t input_width) {
139 assert(input_height >= 1);
140 assert(input_width >= 1);
141 this->input_height_ = input_height;
142 this->input_width_ = input_width;
143 return *this;
144 }
145
146 inline ConvolutionOperatorTester& input_height(uint32_t input_height) {
147 assert(input_height >= 1);
148 this->input_height_ = input_height;
149 return *this;
150 }
151
152 inline uint32_t input_height() const {
153 return this->input_height_;
154 }
155
156 inline ConvolutionOperatorTester& input_width(uint32_t input_width) {
157 assert(input_width >= 1);
158 this->input_width_ = input_width;
159 return *this;
160 }
161
162 inline uint32_t input_width() const {
163 return this->input_width_;
164 }
165
166 inline ConvolutionOperatorTester& groups(uint32_t groups) {
167 assert(groups >= 1);
168 this->groups_ = groups;
169 return *this;
170 }
171
172 inline uint32_t groups() const {
173 return this->groups_;
174 }
175
176 inline ConvolutionOperatorTester& group_input_channels(size_t group_input_channels) {
177 assert(group_input_channels >= 1);
178 this->group_input_channels_ = group_input_channels;
179 return *this;
180 }
181
182 inline size_t group_input_channels() const {
183 return this->group_input_channels_;
184 }
185
186 inline ConvolutionOperatorTester& group_output_channels(size_t group_output_channels) {
187 assert(group_output_channels >= 1);
188 this->group_output_channels_ = group_output_channels;
189 return *this;
190 }
191
192 inline size_t group_output_channels() const {
193 return this->group_output_channels_;
194 }
195
196 inline ConvolutionOperatorTester& batch_size(size_t batch_size) {
197 assert(batch_size >= 1);
198 this->batch_size_ = batch_size;
199 return *this;
200 }
201
202 inline size_t batch_size() const {
203 return this->batch_size_;
204 }
205
206 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_size) {
207 assert(kernel_size >= 1);
208 this->kernel_height_ = kernel_size;
209 this->kernel_width_ = kernel_size;
210 return *this;
211 }
212
213 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_height, uint32_t kernel_width) {
214 assert(kernel_height >= 1);
215 assert(kernel_width >= 1);
216 this->kernel_height_ = kernel_height;
217 this->kernel_width_ = kernel_width;
218 return *this;
219 }
220
221 inline ConvolutionOperatorTester& kernel_height(uint32_t kernel_height) {
222 assert(kernel_height >= 1);
223 this->kernel_height_ = kernel_height;
224 return *this;
225 }
226
227 inline uint32_t kernel_height() const {
228 return this->kernel_height_;
229 }
230
231 inline ConvolutionOperatorTester& kernel_width(uint32_t kernel_width) {
232 assert(kernel_width >= 1);
233 this->kernel_width_ = kernel_width;
234 return *this;
235 }
236
237 inline uint32_t kernel_width() const {
238 return this->kernel_width_;
239 }
240
241 inline ConvolutionOperatorTester& dilation(uint32_t dilation) {
242 assert(dilation >= 1);
243 this->dilation_height_ = dilation;
244 this->dilation_width_ = dilation;
245 return *this;
246 }
247
248 inline ConvolutionOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
249 assert(dilation_height >= 1);
250 assert(dilation_width >= 1);
251 this->dilation_height_ = dilation_height;
252 this->dilation_width_ = dilation_width;
253 return *this;
254 }
255
256 inline ConvolutionOperatorTester& dilation_height(uint32_t dilation_height) {
257 assert(dilation_height >= 1);
258 this->dilation_height_ = dilation_height;
259 return *this;
260 }
261
262 inline uint32_t dilation_height() const {
263 return this->dilation_height_;
264 }
265
266 inline ConvolutionOperatorTester& dilation_width(uint32_t dilation_width) {
267 assert(dilation_width >= 1);
268 this->dilation_width_ = dilation_width;
269 return *this;
270 }
271
272 inline uint32_t dilation_width() const {
273 return this->dilation_width_;
274 }
275
276 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling) {
277 assert(subsampling >= 1);
278 this->subsampling_height_ = subsampling;
279 this->subsampling_width_ = subsampling;
280 return *this;
281 }
282
283 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling_height, uint32_t subsampling_width) {
284 assert(subsampling_height >= 1);
285 assert(subsampling_width >= 1);
286 this->subsampling_height_ = subsampling_height;
287 this->subsampling_width_ = subsampling_width;
288 return *this;
289 }
290
291 inline ConvolutionOperatorTester& subsampling_height(uint32_t subsampling_height) {
292 assert(subsampling_height >= 1);
293 this->subsampling_height_ = subsampling_height;
294 return *this;
295 }
296
297 inline uint32_t subsampling_height() const {
298 return this->subsampling_height_;
299 }
300
301 inline ConvolutionOperatorTester& subsampling_width(uint32_t subsampling_width) {
302 assert(subsampling_width >= 1);
303 this->subsampling_width_ = subsampling_width;
304 return *this;
305 }
306
307 inline uint32_t subsampling_width() const {
308 return this->subsampling_width_;
309 }
310
311 inline ConvolutionOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
312 assert(input_pixel_stride >= 1);
313 this->input_pixel_stride_ = input_pixel_stride;
314 return *this;
315 }
316
317 inline size_t input_pixel_stride() const {
318 if (this->input_pixel_stride_ == 0) {
319 return group_input_channels() * groups();
320 } else {
321 assert(this->input_pixel_stride_ >= group_input_channels() * groups());
322 return this->input_pixel_stride_;
323 }
324 }
325
326 inline ConvolutionOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
327 assert(output_pixel_stride >= 1);
328 this->output_pixel_stride_ = output_pixel_stride;
329 return *this;
330 }
331
332 inline size_t output_pixel_stride() const {
333 if (this->output_pixel_stride_ == 0) {
334 return group_output_channels() * groups();
335 } else {
336 assert(this->output_pixel_stride_ >= group_output_channels() * groups());
337 return this->output_pixel_stride_;
338 }
339 }
340
Marat Dukhanefc47b82019-11-18 09:25:38 -0800341 inline ConvolutionOperatorTester& input_batch_stride(size_t input_batch_stride) {
342 assert(input_batch_stride >= 1);
343 this->input_batch_stride_ = input_batch_stride;
344 return *this;
345 }
346
347 inline size_t input_batch_stride() const {
348 if (this->input_batch_stride_ == 0) {
349 return groups() * group_input_channels() * input_height() * input_width();
350 } else {
351 assert(this->input_batch_stride_ >= groups() * group_input_channels() * input_height() * input_width());
352 return this->input_batch_stride_;
353 }
354 }
355
356 inline ConvolutionOperatorTester& output_batch_stride(size_t output_batch_stride) {
357 assert(output_batch_stride >= 1);
358 this->output_batch_stride_ = output_batch_stride;
359 return *this;
360 }
361
362 inline size_t output_batch_stride() const {
363 if (this->output_batch_stride_ == 0) {
364 return groups() * group_output_channels() * output_height() * output_width();
365 } else {
366 assert(this->output_batch_stride_ >= groups() * group_output_channels() * output_height() * output_width());
367 return this->output_batch_stride_;
368 }
369 }
370
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 inline uint32_t dilated_kernel_height() const {
372 return (kernel_height() - 1) * dilation_height() + 1;
373 }
374
375 inline uint32_t dilated_kernel_width() const {
376 return (kernel_width() - 1) * dilation_width() + 1;
377 }
378
379 inline size_t output_height() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700380 if (padding_tf_same()) {
381 return (input_height() + subsampling_height() - 1) / subsampling_height();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700382 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700383 const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
384 if (padded_input_height <= dilated_kernel_height()) {
385 return 1;
386 } else {
387 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
388 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700389 }
390 }
391
392 inline size_t output_width() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700393 if (padding_tf_same()) {
394 return (input_width() + subsampling_width() - 1) / subsampling_width();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700395 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700396 const size_t padded_input_width = padding_left() + input_width() + padding_right();
397 if (padded_input_width <= dilated_kernel_width()) {
398 return 1;
399 } else {
400 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
401 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700402 }
403 }
404
405 inline ConvolutionOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
406 assert(next_input_height >= 1);
407 assert(next_input_width >= 1);
408 this->next_input_height_ = next_input_height;
409 this->next_input_width_ = next_input_width;
410 return *this;
411 }
412
413 inline ConvolutionOperatorTester& next_input_height(uint32_t next_input_height) {
414 assert(next_input_height >= 1);
415 this->next_input_height_ = next_input_height;
416 return *this;
417 }
418
419 inline uint32_t next_input_height() const {
420 if (this->next_input_height_ == 0) {
421 return input_height();
422 } else {
423 return this->next_input_height_;
424 }
425 }
426
427 inline ConvolutionOperatorTester& next_input_width(uint32_t next_input_width) {
428 assert(next_input_width >= 1);
429 this->next_input_width_ = next_input_width;
430 return *this;
431 }
432
433 inline uint32_t next_input_width() const {
434 if (this->next_input_width_ == 0) {
435 return input_width();
436 } else {
437 return this->next_input_width_;
438 }
439 }
440
441 inline size_t next_output_height() const {
442 const size_t padded_input_height = padding_top() + next_input_height() + padding_bottom();
443 if (padded_input_height <= dilated_kernel_height()) {
444 return 1;
445 } else {
446 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
447 }
448 }
449
450 inline size_t next_output_width() const {
451 const size_t padded_input_width = padding_left() + next_input_width() + padding_right();
452 if (padded_input_width <= dilated_kernel_width()) {
453 return 1;
454 } else {
455 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
456 }
457 }
458
459 inline ConvolutionOperatorTester& next_batch_size(size_t next_batch_size) {
460 assert(next_batch_size >= 1);
461 this->next_batch_size_ = next_batch_size;
462 return *this;
463 }
464
465 inline size_t next_batch_size() const {
466 if (this->next_batch_size_ == 0) {
467 return batch_size();
468 } else {
469 return this->next_batch_size_;
470 }
471 }
472
Marat Dukhanefc47b82019-11-18 09:25:38 -0800473 inline ConvolutionOperatorTester& sparsity(float sparsity) {
474 this->sparsity_ = sparsity;
475 return *this;
476 }
477
478 inline float sparsity() const {
479 return this->sparsity_;
480 }
481
XNNPACK Teamb455b122019-09-27 18:10:33 -0700482 inline ConvolutionOperatorTester& qmin(uint8_t qmin) {
483 this->qmin_ = qmin;
484 return *this;
485 }
486
487 inline uint8_t qmin() const {
488 return this->qmin_;
489 }
490
491 inline ConvolutionOperatorTester& qmax(uint8_t qmax) {
492 this->qmax_ = qmax;
493 return *this;
494 }
495
496 inline uint8_t qmax() const {
497 return this->qmax_;
498 }
499
Marat Dukhanefc47b82019-11-18 09:25:38 -0800500 inline ConvolutionOperatorTester& force_nhwc_input(bool force_nhwc_input) {
501 this->force_nhwc_input_ = force_nhwc_input;
502 return *this;
503 }
504
505 inline bool force_nhwc_input() const {
506 return this->force_nhwc_input_;
507 }
508
XNNPACK Teamb455b122019-09-27 18:10:33 -0700509 inline ConvolutionOperatorTester& depthwise_layout(bool depthwise_layout) {
510 this->depthwise_layout_ = depthwise_layout;
511 return *this;
512 }
513
514 inline bool depthwise_layout() const {
515 return this->depthwise_layout_;
516 }
517
Marat Dukhanf568f082019-10-30 09:47:07 -0700518 inline ConvolutionOperatorTester& has_bias(bool has_bias) {
519 this->has_bias_ = has_bias;
520 return *this;
521 }
522
523 inline bool has_bias() const {
524 return this->has_bias_;
525 }
526
XNNPACK Teamb455b122019-09-27 18:10:33 -0700527 inline ConvolutionOperatorTester& iterations(size_t iterations) {
528 this->iterations_ = iterations;
529 return *this;
530 }
531
532 inline size_t iterations() const {
533 return this->iterations_;
534 }
535
Marat Dukhanefc47b82019-11-18 09:25:38 -0800536 void TestNHWCxQ8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700537 std::random_device random_device;
538 auto rng = std::mt19937(random_device());
539 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
540 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
541
542 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
543 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()) + 8);
544 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
545 std::vector<int32_t> bias(groups() * group_output_channels());
546 std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
547 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
548 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
549
550 const uint8_t input_zero_point = 127;
551 const uint8_t kernel_zero_point = 127;
552
553 for (size_t iteration = 0; iteration < iterations(); iteration++) {
554 std::generate(input.begin(), input.end(), std::ref(u8rng));
555 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
556 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
557 std::fill(output.begin(), output.end(), 0xA5);
558
559 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -0700560 if (has_bias()) {
561 for (size_t i = 0; i < batch_size(); i++) {
562 for (size_t oy = 0; oy < output_height(); oy++) {
563 for (size_t ox = 0; ox < output_width(); ox++) {
564 for (size_t g = 0; g < groups(); g++) {
565 for (size_t oc = 0; oc < group_output_channels(); oc++) {
566 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
567 bias[g * group_output_channels() + oc];
568 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700569 }
570 }
571 }
572 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700573 } else {
574 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700575 }
576 if (depthwise_layout()) {
577 ASSERT_EQ(group_input_channels(), 1);
578
579 for (size_t i = 0; i < batch_size(); i++) {
580 for (size_t oy = 0; oy < output_height(); oy++) {
581 for (size_t ox = 0; ox < output_width(); ox++) {
582 for (size_t ky = 0; ky < kernel_height(); ky++) {
583 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
584 if (iy < input_height()) {
585 for (size_t kx = 0; kx < kernel_width(); kx++) {
586 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
587 if (ix < input_width()) {
588 for (size_t g = 0; g < groups(); g++) {
589 for (size_t oc = 0; oc < group_output_channels(); oc++) {
590 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
591 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g]) - int32_t(input_zero_point)) *
592 (int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));
593 }
594 }
595 }
596 }
597 }
598 }
599 }
600 }
601 }
602 } else {
603 for (size_t i = 0; i < batch_size(); i++) {
604 for (size_t oy = 0; oy < output_height(); oy++) {
605 for (size_t ox = 0; ox < output_width(); ox++) {
606 for (size_t ky = 0; ky < kernel_height(); ky++) {
607 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
608 if (iy < input_height()) {
609 for (size_t kx = 0; kx < kernel_width(); kx++) {
610 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
611 if (ix < input_width()) {
612 for (size_t g = 0; g < groups(); g++) {
613 for (size_t oc = 0; oc < group_output_channels(); oc++) {
614 for (size_t ic = 0; ic < group_input_channels(); ic++) {
615 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
616 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
617 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
618 }
619 }
620 }
621 }
622 }
623 }
624 }
625 }
626 }
627 }
628 }
629
630 // Compute renormalization parameters.
631 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
632 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
633
634 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
635 const uint8_t output_zero_point = uint8_t(std::max(std::min(
636 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
637 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
638
639 // Renormalize reference results.
640 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
641 [this, output_scale, output_zero_point](int32_t x) -> double {
642 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
643 });
644
645 // Create, setup, run, and destroy Convolution operator.
646 ASSERT_EQ(xnn_status_success, xnn_initialize());
647 xnn_operator_t convolution_op = nullptr;
648
649 ASSERT_EQ(xnn_status_success,
650 xnn_create_convolution2d_nhwc_q8(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700651 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
652 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700653 kernel_height(), kernel_width(),
654 subsampling_height(), subsampling_width(),
655 dilation_height(), dilation_width(),
656 groups(), group_input_channels(), group_output_channels(),
657 input_pixel_stride(), output_pixel_stride(),
658 input_zero_point, 1.0f /* input scale */,
659 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -0700660 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700661 output_zero_point, output_scale, qmin(), qmax(),
Marat Dukhan8440fde2019-10-24 12:46:13 -0700662 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700663 &convolution_op));
664
665 // Smart pointer to automatically delete convolution_op.
666 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
667
668 ASSERT_EQ(xnn_status_success,
669 xnn_setup_convolution2d_nhwc_q8(
670 convolution_op,
671 batch_size(), input_height(), input_width(),
672 input.data(), output.data(),
673 nullptr /* thread pool */));
674
675 ASSERT_EQ(xnn_status_success,
676 xnn_run_operator(convolution_op, nullptr /* thread pool */));
677
678 // Verify results.
679 for (size_t i = 0; i < batch_size(); i++) {
680 for (size_t y = 0; y < output_height(); y++) {
681 for (size_t x = 0; x < output_width(); x++) {
682 for (size_t g = 0; g < groups(); g++) {
683 for (size_t c = 0; c < group_output_channels(); c++) {
684 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
685 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
686 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
687 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
688 ASSERT_NEAR(
689 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
690 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
691 0.9)
692 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
693 }
694 }
695 }
696 }
697 }
698 }
699 }
700
Marat Dukhanefc47b82019-11-18 09:25:38 -0800701 void TestNHWCxF32() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700702 std::random_device random_device;
703 auto rng = std::mt19937(random_device());
704 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
705
706 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
707 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));
708 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
709 std::vector<float> bias(groups() * group_output_channels());
710 std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
711 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
712
713 for (size_t iteration = 0; iteration < iterations(); iteration++) {
714 std::generate(input.begin(), input.end(), std::ref(f32rng));
715 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
716 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
717 std::fill(output.begin(), output.end(), nanf(""));
718
719 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -0700720 if (has_bias()) {
721 for (size_t i = 0; i < batch_size(); i++) {
722 for (size_t oy = 0; oy < output_height(); oy++) {
723 for (size_t ox = 0; ox < output_width(); ox++) {
724 for (size_t g = 0; g < groups(); g++) {
725 for (size_t oc = 0; oc < group_output_channels(); oc++) {
726 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
727 bias[g * group_output_channels() + oc];
728 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700729 }
730 }
731 }
732 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700733 } else {
734 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700735 }
736 if (depthwise_layout()) {
737 ASSERT_EQ(group_input_channels(), 1);
738
739 for (size_t i = 0; i < batch_size(); i++) {
740 for (size_t oy = 0; oy < output_height(); oy++) {
741 for (size_t ox = 0; ox < output_width(); ox++) {
742 for (size_t ky = 0; ky < kernel_height(); ky++) {
743 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
744 if (iy < input_height()) {
745 for (size_t kx = 0; kx < kernel_width(); kx++) {
746 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
747 if (ix < input_width()) {
748 for (size_t g = 0; g < groups(); g++) {
749 for (size_t oc = 0; oc < group_output_channels(); oc++) {
750 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
751 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g] *
752 kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];
753 }
754 }
755 }
756 }
757 }
758 }
759 }
760 }
761 }
762 } else {
763 for (size_t i = 0; i < batch_size(); i++) {
764 for (size_t oy = 0; oy < output_height(); oy++) {
765 for (size_t ox = 0; ox < output_width(); ox++) {
766 for (size_t ky = 0; ky < kernel_height(); ky++) {
767 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
768 if (iy < input_height()) {
769 for (size_t kx = 0; kx < kernel_width(); kx++) {
770 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
771 if (ix < input_width()) {
772 for (size_t g = 0; g < groups(); g++) {
773 for (size_t oc = 0; oc < group_output_channels(); oc++) {
774 for (size_t ic = 0; ic < group_input_channels(); ic++) {
775 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
776 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
777 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
778 }
779 }
780 }
781 }
782 }
783 }
784 }
785 }
786 }
787 }
788 }
789
790 // Compute clamping parameters.
791 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
792 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
793
794 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
795 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
796
797 // Clamp reference results.
798 for (float& value : output_ref) {
799 value = std::max(std::min(value, output_max), output_min);
800 }
801
802 // Create, setup, run, and destroy Convolution operator.
803 ASSERT_EQ(xnn_status_success, xnn_initialize());
804 xnn_operator_t convolution_op = nullptr;
805
806 ASSERT_EQ(xnn_status_success,
807 xnn_create_convolution2d_nhwc_f32(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700808 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
809 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700810 kernel_height(), kernel_width(),
811 subsampling_height(), subsampling_width(),
812 dilation_height(), dilation_width(),
813 groups(), group_input_channels(), group_output_channels(),
814 input_pixel_stride(), output_pixel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -0700815 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700816 output_min, output_max,
Marat Dukhan8440fde2019-10-24 12:46:13 -0700817 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700818 &convolution_op));
819
820 // Smart pointer to automatically delete convolution_op.
821 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
822
823 ASSERT_EQ(xnn_status_success,
824 xnn_setup_convolution2d_nhwc_f32(
825 convolution_op,
826 batch_size(), input_height(), input_width(),
827 input.data(), output.data(),
828 nullptr /* thread pool */));
829
830 ASSERT_EQ(xnn_status_success,
831 xnn_run_operator(convolution_op, nullptr /* thread pool */));
832
833 // Verify results.
834 for (size_t i = 0; i < batch_size(); i++) {
835 for (size_t y = 0; y < output_height(); y++) {
836 for (size_t x = 0; x < output_width(); x++) {
837 for (size_t g = 0; g < groups(); g++) {
838 for (size_t c = 0; c < group_output_channels(); c++) {
839 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
840 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
841 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
842 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
843 ASSERT_NEAR(
844 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
845 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
846 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
847 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
848 }
849 }
850 }
851 }
852 }
853 }
854 }
855
Marat Dukhanefc47b82019-11-18 09:25:38 -0800856 void TestNCHWxF32() const {
857 ASSERT_FALSE(depthwise_layout());
858
859 std::random_device random_device;
860 auto rng = std::mt19937(random_device());
861 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
862 auto prng = std::bind(std::uniform_real_distribution<float>(), rng);
863
864 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
865 batch_size() * input_batch_stride() + groups() * group_input_channels() * input_height() * input_width());
866 std::vector<float> kernel(
867 groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
868 std::vector<float> bias(groups() * group_output_channels());
869 std::vector<float> output(
870 batch_size() * output_batch_stride() + groups() * group_output_channels() * output_height() * output_width());
871 std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());
872
873 for (size_t iteration = 0; iteration < iterations(); iteration++) {
874 std::generate(input.begin(), input.end(), std::ref(f32rng));
875 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
876 for (float& k : kernel) {
877 if (prng() <= sparsity()) {
878 k = 0.0f;
879 }
880 }
881 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
882 std::fill(output.begin(), output.end(), nanf(""));
883
884 // Compute reference results, without clamping.
885 if (has_bias()) {
886 for (size_t i = 0; i < batch_size(); i++) {
887 for (size_t oy = 0; oy < output_height(); oy++) {
888 for (size_t ox = 0; ox < output_width(); ox++) {
889 for (size_t g = 0; g < groups(); g++) {
890 for (size_t oc = 0; oc < group_output_channels(); oc++) {
891 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =
892 bias[g * group_output_channels() + oc];
893 }
894 }
895 }
896 }
897 }
898 } else {
899 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
900 }
901 if (force_nhwc_input()) {
902 for (size_t i = 0; i < batch_size(); i++) {
903 for (size_t oy = 0; oy < output_height(); oy++) {
904 for (size_t ox = 0; ox < output_width(); ox++) {
905 for (size_t ky = 0; ky < kernel_height(); ky++) {
906 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
907 if (iy < input_height()) {
908 for (size_t kx = 0; kx < kernel_width(); kx++) {
909 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
910 if (ix < input_width()) {
911 for (size_t g = 0; g < groups(); g++) {
912 for (size_t oc = 0; oc < group_output_channels(); oc++) {
913 for (size_t ic = 0; ic < group_input_channels(); ic++) {
914 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
915 input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *
916 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
917 }
918 }
919 }
920 }
921 }
922 }
923 }
924 }
925 }
926 }
927 } else {
928 for (size_t i = 0; i < batch_size(); i++) {
929 for (size_t oy = 0; oy < output_height(); oy++) {
930 for (size_t ox = 0; ox < output_width(); ox++) {
931 for (size_t ky = 0; ky < kernel_height(); ky++) {
932 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
933 if (iy < input_height()) {
934 for (size_t kx = 0; kx < kernel_width(); kx++) {
935 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
936 if (ix < input_width()) {
937 for (size_t g = 0; g < groups(); g++) {
938 for (size_t oc = 0; oc < group_output_channels(); oc++) {
939 for (size_t ic = 0; ic < group_input_channels(); ic++) {
940 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
941 input[i * input_batch_stride() +
942 ((g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *
943 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
944 }
945 }
946 }
947 }
948 }
949 }
950 }
951 }
952 }
953 }
954 }
955
956 // Compute clamping parameters.
957 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
958 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
959
960 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
961 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
962
963 // Clamp reference results.
964 for (float& value : output_ref) {
965 value = std::max(std::min(value, output_max), output_min);
966 }
967
968 // Create, setup, run, and destroy Convolution operator.
969 ASSERT_EQ(xnn_status_success, xnn_initialize());
970 xnn_operator_t convolution_op = nullptr;
971
972 xnn_status status = xnn_create_convolution2d_nchw_f32(
973 padding_top(), padding_right(), padding_bottom(), padding_left(),
974 kernel_height(), kernel_width(),
975 subsampling_height(), subsampling_width(),
976 dilation_height(), dilation_width(),
977 groups(), group_input_channels(), group_output_channels(),
978 kernel.data(), has_bias() ? bias.data() : nullptr,
979 output_min, output_max,
980 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),
981 &convolution_op);
982 if (status == xnn_status_unsupported_parameter) {
983 GTEST_SKIP();
984 }
985 ASSERT_EQ(xnn_status_success, status);
986
987 // Smart pointer to automatically delete convolution_op.
988 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
989
990 ASSERT_EQ(xnn_status_success,
991 xnn_setup_convolution2d_nchw_f32(
992 convolution_op,
993 batch_size(), input_batch_stride(), output_batch_stride(), input_height(), input_width(),
994 input.data(), output.data(),
995 nullptr /* thread pool */));
996
997 ASSERT_EQ(xnn_status_success,
998 xnn_run_operator(convolution_op, nullptr /* thread pool */));
999
1000 // Verify results.
1001 for (size_t i = 0; i < batch_size(); i++) {
1002 for (size_t y = 0; y < output_height(); y++) {
1003 for (size_t x = 0; x < output_width(); x++) {
1004 for (size_t g = 0; g < groups(); g++) {
1005 for (size_t c = 0; c < group_output_channels(); c++) {
1006 ASSERT_GE(output[i * output_batch_stride() + ((g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)
1007 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1008 ASSERT_LE(output[i * output_batch_stride() + ((g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)
1009 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1010 ASSERT_NEAR(
1011 output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],
1012 output[i * output_batch_stride() + ((g * group_output_channels() + c) * output_height() + y) * output_width() + x],
1013 1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))
1014 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1015 }
1016 }
1017 }
1018 }
1019 }
1020 }
1021 }
1022
1023 void TestSetupNHWCxQ8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001024 ASSERT_FALSE(depthwise_layout());
1025
1026 std::random_device random_device;
1027 auto rng = std::mt19937(random_device());
1028 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
1029 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
1030
1031 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
1032 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
1033 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())) + 8);
1034 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1035 std::vector<int32_t> bias(groups() * group_output_channels());
1036 std::vector<uint8_t> output(std::max(
1037 batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
1038 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
1039 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1040 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1041 std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1042 std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1043
1044 const uint8_t input_zero_point = 127;
1045 const uint8_t kernel_zero_point = 127;
1046
1047 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1048 std::generate(input.begin(), input.end(), std::ref(u8rng));
1049 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
1050 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
1051 std::fill(output.begin(), output.end(), 0xA5);
1052
1053 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -07001054 if (has_bias()) {
1055 for (size_t i = 0; i < batch_size(); i++) {
1056 for (size_t oy = 0; oy < output_height(); oy++) {
1057 for (size_t ox = 0; ox < output_width(); ox++) {
1058 for (size_t g = 0; g < groups(); g++) {
1059 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1060 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1061 bias[g * group_output_channels() + oc];
1062 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001063 }
1064 }
1065 }
1066 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001067 } else {
1068 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001069 }
1070 for (size_t i = 0; i < batch_size(); i++) {
1071 for (size_t oy = 0; oy < output_height(); oy++) {
1072 for (size_t ox = 0; ox < output_width(); ox++) {
1073 for (size_t ky = 0; ky < kernel_height(); ky++) {
1074 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1075 if (iy < input_height()) {
1076 for (size_t kx = 0; kx < kernel_width(); kx++) {
1077 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1078 if (ix < input_width()) {
1079 for (size_t g = 0; g < groups(); g++) {
1080 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1081 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1082 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1083 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
1084 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
1085 }
1086 }
1087 }
1088 }
1089 }
1090 }
1091 }
1092 }
1093 }
1094 }
1095
1096 // Compute renormalization parameters.
1097 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
1098 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
1099
1100 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
1101 const uint8_t output_zero_point = uint8_t(std::max(std::min(
1102 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
1103 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
1104
1105 // Renormalize reference results.
1106 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
1107 [this, output_scale, output_zero_point](int32_t x) -> double {
1108 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
1109 });
1110
1111 // Create, setup, and run Convolution operator once.
1112 ASSERT_EQ(xnn_status_success, xnn_initialize());
1113 xnn_operator_t convolution_op = nullptr;
1114
1115 ASSERT_EQ(xnn_status_success,
1116 xnn_create_convolution2d_nhwc_q8(
1117 padding_top(), padding_right(), padding_bottom(), padding_left(),
1118 kernel_height(), kernel_width(),
1119 subsampling_height(), subsampling_width(),
1120 dilation_height(), dilation_width(),
1121 groups(), group_input_channels(), group_output_channels(),
1122 input_pixel_stride(), output_pixel_stride(),
1123 input_zero_point, 1.0f /* input scale */,
1124 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -07001125 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001126 output_zero_point, output_scale, qmin(), qmax(),
1127 0, &convolution_op));
1128
1129 // Smart pointer to automatically delete convolution_op.
1130 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1131
1132 ASSERT_EQ(xnn_status_success,
1133 xnn_setup_convolution2d_nhwc_q8(
1134 convolution_op,
1135 batch_size(), input_height(), input_width(),
1136 input.data(), output.data(),
1137 nullptr /* thread pool */));
1138
1139 ASSERT_EQ(xnn_status_success,
1140 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1141
1142 // Verify results of the first run.
1143 for (size_t i = 0; i < batch_size(); i++) {
1144 for (size_t y = 0; y < output_height(); y++) {
1145 for (size_t x = 0; x < output_width(); x++) {
1146 for (size_t g = 0; g < groups(); g++) {
1147 for (size_t c = 0; c < group_output_channels(); c++) {
1148 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
1149 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1150 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
1151 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1152 ASSERT_NEAR(
1153 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1154 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
1155 0.9)
1156 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1157 }
1158 }
1159 }
1160 }
1161 }
1162
1163 // Re-generate data for the second run.
1164 std::generate(input.begin(), input.end(), std::ref(u8rng));
1165 std::fill(output.begin(), output.end(), 0xA5);
1166
1167 // Compute reference results for the second run, including renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -07001168 if (has_bias()) {
1169 for (size_t i = 0; i < next_batch_size(); i++) {
1170 for (size_t oy = 0; oy < next_output_height(); oy++) {
1171 for (size_t ox = 0; ox < next_output_width(); ox++) {
1172 for (size_t g = 0; g < groups(); g++) {
1173 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1174 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1175 bias[g * group_output_channels() + oc];
1176 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001177 }
1178 }
1179 }
1180 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001181 } else {
1182 std::fill(next_accumulators.begin(), next_accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001183 }
1184 for (size_t i = 0; i < next_batch_size(); i++) {
1185 for (size_t oy = 0; oy < next_output_height(); oy++) {
1186 for (size_t ox = 0; ox < next_output_width(); ox++) {
1187 for (size_t ky = 0; ky < kernel_height(); ky++) {
1188 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1189 if (iy < next_input_height()) {
1190 for (size_t kx = 0; kx < kernel_width(); kx++) {
1191 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1192 if (ix < next_input_width()) {
1193 for (size_t g = 0; g < groups(); g++) {
1194 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1195 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1196 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1197 (int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
1198 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
1199 }
1200 }
1201 }
1202 }
1203 }
1204 }
1205 }
1206 }
1207 }
1208 }
1209 std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),
1210 [this, output_scale, output_zero_point](int32_t x) -> double {
1211 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
1212 });
1213
1214 // Setup and run Convolution operator the second time, and destroy the operator.
1215 ASSERT_EQ(xnn_status_success,
1216 xnn_setup_convolution2d_nhwc_q8(
1217 convolution_op,
1218 next_batch_size(), next_input_height(), next_input_width(),
1219 input.data(), output.data(),
1220 nullptr /* thread pool */));
1221
1222 ASSERT_EQ(xnn_status_success,
1223 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1224
1225 // Verify results of the second run.
1226 for (size_t i = 0; i < next_batch_size(); i++) {
1227 for (size_t y = 0; y < next_output_height(); y++) {
1228 for (size_t x = 0; x < next_output_width(); x++) {
1229 for (size_t g = 0; g < groups(); g++) {
1230 for (size_t c = 0; c < group_output_channels(); c++) {
1231 ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
1232 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1233 ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
1234 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1235 ASSERT_NEAR(
1236 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1237 double(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
1238 0.9)
1239 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1240 }
1241 }
1242 }
1243 }
1244 }
1245 }
1246 }
1247
Marat Dukhanefc47b82019-11-18 09:25:38 -08001248 void TestSetupNHWCxF32() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001249 ASSERT_FALSE(depthwise_layout());
1250
1251 std::random_device random_device;
1252 auto rng = std::mt19937(random_device());
1253 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
1254
1255 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
1256 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
1257 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())));
1258 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1259 std::vector<float> bias(groups() * group_output_channels());
1260 std::vector<float> output(std::max(
1261 batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
1262 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
1263 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1264 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1265
1266 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1267 std::generate(input.begin(), input.end(), std::ref(f32rng));
1268 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
1269 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
1270 std::fill(output.begin(), output.end(), nanf(""));
1271
1272 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001273 if (has_bias()) {
1274 for (size_t i = 0; i < batch_size(); i++) {
1275 for (size_t oy = 0; oy < output_height(); oy++) {
1276 for (size_t ox = 0; ox < output_width(); ox++) {
1277 for (size_t g = 0; g < groups(); g++) {
1278 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1279 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1280 bias[g * group_output_channels() + oc];
1281 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001282 }
1283 }
1284 }
1285 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001286 } else {
1287 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001288 }
1289 for (size_t i = 0; i < batch_size(); i++) {
1290 for (size_t oy = 0; oy < output_height(); oy++) {
1291 for (size_t ox = 0; ox < output_width(); ox++) {
1292 for (size_t ky = 0; ky < kernel_height(); ky++) {
1293 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1294 if (iy < input_height()) {
1295 for (size_t kx = 0; kx < kernel_width(); kx++) {
1296 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1297 if (ix < input_width()) {
1298 for (size_t g = 0; g < groups(); g++) {
1299 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1300 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1301 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1302 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1303 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1304 }
1305 }
1306 }
1307 }
1308 }
1309 }
1310 }
1311 }
1312 }
1313 }
1314
1315 // Compute clamping parameters.
1316 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1317 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1318
1319 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
1320 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
1321
1322 // Clamp reference results.
1323 for (float& value : output_ref) {
1324 value = std::max(std::min(value, output_max), output_min);
1325 }
1326
1327 // Create, setup, and run Convolution operator once.
1328 ASSERT_EQ(xnn_status_success, xnn_initialize());
1329 xnn_operator_t convolution_op = nullptr;
1330
1331 ASSERT_EQ(xnn_status_success,
1332 xnn_create_convolution2d_nhwc_f32(
1333 padding_top(), padding_right(), padding_bottom(), padding_left(),
1334 kernel_height(), kernel_width(),
1335 subsampling_height(), subsampling_width(),
1336 dilation_height(), dilation_width(),
1337 groups(), group_input_channels(), group_output_channels(),
1338 input_pixel_stride(), output_pixel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -07001339 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001340 output_min, output_max,
1341 0, &convolution_op));
1342
1343 // Smart pointer to automatically delete convolution_op.
1344 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1345
1346 ASSERT_EQ(xnn_status_success,
1347 xnn_setup_convolution2d_nhwc_f32(
1348 convolution_op,
1349 batch_size(), input_height(), input_width(),
1350 input.data(), output.data(),
1351 nullptr /* thread pool */));
1352
1353 ASSERT_EQ(xnn_status_success,
1354 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1355
1356 // Verify results of the first run.
1357 for (size_t i = 0; i < batch_size(); i++) {
1358 for (size_t y = 0; y < output_height(); y++) {
1359 for (size_t x = 0; x < output_width(); x++) {
1360 for (size_t g = 0; g < groups(); g++) {
1361 for (size_t c = 0; c < group_output_channels(); c++) {
1362 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1363 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1364 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1365 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1366 ASSERT_NEAR(
1367 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1368 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1369 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
1370 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1371 }
1372 }
1373 }
1374 }
1375 }
1376
1377 // Re-generate data for the second run.
1378 std::generate(input.begin(), input.end(), std::ref(f32rng));
1379 std::fill(output.begin(), output.end(), nanf(""));
1380
1381 // Compute reference results for the second run, including clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001382 if (has_bias()) {
1383 for (size_t i = 0; i < next_batch_size(); i++) {
1384 for (size_t oy = 0; oy < next_output_height(); oy++) {
1385 for (size_t ox = 0; ox < next_output_width(); ox++) {
1386 for (size_t g = 0; g < groups(); g++) {
1387 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1388 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1389 bias[g * group_output_channels() + oc];
1390 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001391 }
1392 }
1393 }
1394 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001395 } else {
1396 std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001397 }
1398 for (size_t i = 0; i < next_batch_size(); i++) {
1399 for (size_t oy = 0; oy < next_output_height(); oy++) {
1400 for (size_t ox = 0; ox < next_output_width(); ox++) {
1401 for (size_t ky = 0; ky < kernel_height(); ky++) {
1402 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1403 if (iy < next_input_height()) {
1404 for (size_t kx = 0; kx < kernel_width(); kx++) {
1405 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1406 if (ix < next_input_width()) {
1407 for (size_t g = 0; g < groups(); g++) {
1408 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1409 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1410 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1411 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1412 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1413 }
1414 }
1415 }
1416 }
1417 }
1418 }
1419 }
1420 }
1421 }
1422 }
1423 for (float& value : next_output_ref) {
1424 value = std::max(std::min(value, output_max), output_min);
1425 }
1426
1427 // Setup and run Convolution operator the second time, and destroy the operator.
1428 ASSERT_EQ(xnn_status_success,
1429 xnn_setup_convolution2d_nhwc_f32(
1430 convolution_op,
1431 next_batch_size(), next_input_height(), next_input_width(),
1432 input.data(), output.data(),
1433 nullptr /* thread pool */));
1434
1435 ASSERT_EQ(xnn_status_success,
1436 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1437
1438 // Verify results of the second run.
1439 for (size_t i = 0; i < next_batch_size(); i++) {
1440 for (size_t y = 0; y < next_output_height(); y++) {
1441 for (size_t x = 0; x < next_output_width(); x++) {
1442 for (size_t g = 0; g < groups(); g++) {
1443 for (size_t c = 0; c < group_output_channels(); c++) {
1444 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1445 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1446 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1447 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1448 ASSERT_NEAR(
1449 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1450 output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1451 1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
1452 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1453 }
1454 }
1455 }
1456 }
1457 }
1458 }
1459 }
1460
1461 private:
1462 uint32_t padding_top_{0};
1463 uint32_t padding_right_{0};
1464 uint32_t padding_bottom_{0};
1465 uint32_t padding_left_{0};
Marat Dukhan8440fde2019-10-24 12:46:13 -07001466 bool padding_tf_same_{false};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001467 size_t input_height_{1};
1468 size_t input_width_{1};
1469 uint32_t groups_{1};
1470 size_t group_input_channels_{1};
1471 size_t input_pixel_stride_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001472 size_t input_batch_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001473 size_t group_output_channels_{1};
1474 size_t output_pixel_stride_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001475 size_t output_batch_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001476 size_t batch_size_{1};
1477 uint32_t kernel_height_{1};
1478 uint32_t kernel_width_{1};
1479 uint32_t dilation_height_{1};
1480 uint32_t dilation_width_{1};
1481 uint32_t subsampling_height_{1};
1482 uint32_t subsampling_width_{1};
1483 size_t next_input_height_{0};
1484 size_t next_input_width_{0};
1485 size_t next_batch_size_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001486 float sparsity_{0.0f};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001487 uint8_t qmin_{0};
1488 uint8_t qmax_{255};
1489 bool depthwise_layout_{false};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001490 bool force_nhwc_input_{false};
Marat Dukhanf568f082019-10-30 09:47:07 -07001491 bool has_bias_{true};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001492 size_t iterations_{1};
1493};