blob: 99506321a5374b3a50bb628cbd0c3f789a19afdb [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstddef>
17#include <cstdlib>
18#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070019#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <random>
21#include <vector>
22
Frank Barchard49b4dcc2020-06-26 14:07:19 -070023#include <fp16.h>
24
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack.h>
26
27
28class ConvolutionOperatorTester {
29 public:
Marat Dukhan8440fde2019-10-24 12:46:13 -070030 inline ConvolutionOperatorTester& padding_tf_same(bool padding_same) {
31 if (padding_same) {
32 assert(padding_top() == 0);
33 assert(padding_left() == 0);
34 assert(padding_bottom() == 0);
35 assert(padding_right() == 0);
36 }
37 this->padding_tf_same_ = padding_same;
38 return *this;
39 }
40
41 inline bool padding_tf_same() const {
42 return this->padding_tf_same_;
43 }
44
XNNPACK Teamb455b122019-09-27 18:10:33 -070045 inline ConvolutionOperatorTester& padding(uint32_t padding) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070046 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070047 this->padding_top_ = padding;
48 this->padding_right_ = padding;
49 this->padding_bottom_ = padding;
50 this->padding_left_ = padding;
51 return *this;
52 }
53
54 inline ConvolutionOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070055 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 this->padding_top_ = padding_height;
57 this->padding_right_ = padding_width;
58 this->padding_bottom_ = padding_height;
59 this->padding_left_ = padding_width;
60 return *this;
61 }
62
63 inline ConvolutionOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070064 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070065 this->padding_top_ = padding_height;
66 this->padding_bottom_ = padding_height;
67 return *this;
68 }
69
70 inline ConvolutionOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070071 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070072 this->padding_right_ = padding_width;
73 this->padding_left_ = padding_width;
74 return *this;
75 }
76
77 inline ConvolutionOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070078 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070079 this->padding_top_ = padding_top;
80 return *this;
81 }
82
83 inline uint32_t padding_top() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070084 if (padding_tf_same()) {
85 const uint32_t total_padding_height =
86 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
87 return total_padding_height / 2;
88 } else {
89 return this->padding_top_;
90 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070091 }
92
93 inline ConvolutionOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070094 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070095 this->padding_left_ = padding_left;
96 return *this;
97 }
98
99 inline uint32_t padding_left() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700100 if (padding_tf_same()) {
101 const uint32_t total_padding_width =
102 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
103 return total_padding_width / 2;
104 } else {
105 return this->padding_left_;
106 }
107 }
108
109 inline ConvolutionOperatorTester& padding_bottom(uint32_t padding_bottom) {
110 assert(!padding_tf_same());
111 this->padding_bottom_ = padding_bottom;
112 return *this;
113 }
114
115 inline uint32_t padding_bottom() const {
116 if (padding_tf_same()) {
117 const uint32_t total_padding_height =
118 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
119 return total_padding_height - total_padding_height / 2;
120 } else {
121 return this->padding_bottom_;
122 }
123 }
124
125 inline ConvolutionOperatorTester& padding_right(uint32_t padding_right) {
126 assert(!padding_tf_same());
127 this->padding_right_ = padding_right;
128 return *this;
129 }
130
131 inline uint32_t padding_right() const {
132 if (padding_tf_same()) {
133 const uint32_t total_padding_width =
134 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
135 return total_padding_width - total_padding_width / 2;
136 } else {
137 return this->padding_right_;
138 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700139 }
140
141 inline ConvolutionOperatorTester& input_size(uint32_t input_height, uint32_t input_width) {
142 assert(input_height >= 1);
143 assert(input_width >= 1);
144 this->input_height_ = input_height;
145 this->input_width_ = input_width;
146 return *this;
147 }
148
149 inline ConvolutionOperatorTester& input_height(uint32_t input_height) {
150 assert(input_height >= 1);
151 this->input_height_ = input_height;
152 return *this;
153 }
154
155 inline uint32_t input_height() const {
156 return this->input_height_;
157 }
158
159 inline ConvolutionOperatorTester& input_width(uint32_t input_width) {
160 assert(input_width >= 1);
161 this->input_width_ = input_width;
162 return *this;
163 }
164
165 inline uint32_t input_width() const {
166 return this->input_width_;
167 }
168
169 inline ConvolutionOperatorTester& groups(uint32_t groups) {
170 assert(groups >= 1);
171 this->groups_ = groups;
172 return *this;
173 }
174
175 inline uint32_t groups() const {
176 return this->groups_;
177 }
178
179 inline ConvolutionOperatorTester& group_input_channels(size_t group_input_channels) {
180 assert(group_input_channels >= 1);
181 this->group_input_channels_ = group_input_channels;
182 return *this;
183 }
184
185 inline size_t group_input_channels() const {
186 return this->group_input_channels_;
187 }
188
189 inline ConvolutionOperatorTester& group_output_channels(size_t group_output_channels) {
190 assert(group_output_channels >= 1);
191 this->group_output_channels_ = group_output_channels;
192 return *this;
193 }
194
195 inline size_t group_output_channels() const {
196 return this->group_output_channels_;
197 }
198
199 inline ConvolutionOperatorTester& batch_size(size_t batch_size) {
200 assert(batch_size >= 1);
201 this->batch_size_ = batch_size;
202 return *this;
203 }
204
205 inline size_t batch_size() const {
206 return this->batch_size_;
207 }
208
209 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_size) {
210 assert(kernel_size >= 1);
211 this->kernel_height_ = kernel_size;
212 this->kernel_width_ = kernel_size;
213 return *this;
214 }
215
216 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_height, uint32_t kernel_width) {
217 assert(kernel_height >= 1);
218 assert(kernel_width >= 1);
219 this->kernel_height_ = kernel_height;
220 this->kernel_width_ = kernel_width;
221 return *this;
222 }
223
224 inline ConvolutionOperatorTester& kernel_height(uint32_t kernel_height) {
225 assert(kernel_height >= 1);
226 this->kernel_height_ = kernel_height;
227 return *this;
228 }
229
230 inline uint32_t kernel_height() const {
231 return this->kernel_height_;
232 }
233
234 inline ConvolutionOperatorTester& kernel_width(uint32_t kernel_width) {
235 assert(kernel_width >= 1);
236 this->kernel_width_ = kernel_width;
237 return *this;
238 }
239
240 inline uint32_t kernel_width() const {
241 return this->kernel_width_;
242 }
243
244 inline ConvolutionOperatorTester& dilation(uint32_t dilation) {
245 assert(dilation >= 1);
246 this->dilation_height_ = dilation;
247 this->dilation_width_ = dilation;
248 return *this;
249 }
250
251 inline ConvolutionOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
252 assert(dilation_height >= 1);
253 assert(dilation_width >= 1);
254 this->dilation_height_ = dilation_height;
255 this->dilation_width_ = dilation_width;
256 return *this;
257 }
258
259 inline ConvolutionOperatorTester& dilation_height(uint32_t dilation_height) {
260 assert(dilation_height >= 1);
261 this->dilation_height_ = dilation_height;
262 return *this;
263 }
264
265 inline uint32_t dilation_height() const {
266 return this->dilation_height_;
267 }
268
269 inline ConvolutionOperatorTester& dilation_width(uint32_t dilation_width) {
270 assert(dilation_width >= 1);
271 this->dilation_width_ = dilation_width;
272 return *this;
273 }
274
275 inline uint32_t dilation_width() const {
276 return this->dilation_width_;
277 }
278
279 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling) {
280 assert(subsampling >= 1);
281 this->subsampling_height_ = subsampling;
282 this->subsampling_width_ = subsampling;
283 return *this;
284 }
285
286 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling_height, uint32_t subsampling_width) {
287 assert(subsampling_height >= 1);
288 assert(subsampling_width >= 1);
289 this->subsampling_height_ = subsampling_height;
290 this->subsampling_width_ = subsampling_width;
291 return *this;
292 }
293
294 inline ConvolutionOperatorTester& subsampling_height(uint32_t subsampling_height) {
295 assert(subsampling_height >= 1);
296 this->subsampling_height_ = subsampling_height;
297 return *this;
298 }
299
300 inline uint32_t subsampling_height() const {
301 return this->subsampling_height_;
302 }
303
304 inline ConvolutionOperatorTester& subsampling_width(uint32_t subsampling_width) {
305 assert(subsampling_width >= 1);
306 this->subsampling_width_ = subsampling_width;
307 return *this;
308 }
309
310 inline uint32_t subsampling_width() const {
311 return this->subsampling_width_;
312 }
313
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700314 inline ConvolutionOperatorTester& input_channel_stride(size_t input_channel_stride) {
315 assert(input_channel_stride >= 1);
316 this->input_channel_stride_ = input_channel_stride;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700317 return *this;
318 }
319
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700320 inline size_t input_channel_stride() const {
321 if (this->input_channel_stride_ == 0) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700322 return group_input_channels() * groups();
323 } else {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700324 assert(this->input_channel_stride_ >= group_input_channels() * groups());
325 return this->input_channel_stride_;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700326 }
327 }
328
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700329 inline ConvolutionOperatorTester& output_channel_stride(size_t output_channel_stride) {
330 assert(output_channel_stride >= 1);
331 this->output_channel_stride_ = output_channel_stride;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700332 return *this;
333 }
334
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700335 inline size_t output_channel_stride() const {
336 if (this->output_channel_stride_ == 0) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337 return group_output_channels() * groups();
338 } else {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700339 assert(this->output_channel_stride_ >= group_output_channels() * groups());
340 return this->output_channel_stride_;
Marat Dukhanefc47b82019-11-18 09:25:38 -0800341 }
342 }
343
XNNPACK Teamb455b122019-09-27 18:10:33 -0700344 inline uint32_t dilated_kernel_height() const {
345 return (kernel_height() - 1) * dilation_height() + 1;
346 }
347
348 inline uint32_t dilated_kernel_width() const {
349 return (kernel_width() - 1) * dilation_width() + 1;
350 }
351
352 inline size_t output_height() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700353 if (padding_tf_same()) {
354 return (input_height() + subsampling_height() - 1) / subsampling_height();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700355 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700356 const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
357 if (padded_input_height <= dilated_kernel_height()) {
358 return 1;
359 } else {
360 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
361 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700362 }
363 }
364
365 inline size_t output_width() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700366 if (padding_tf_same()) {
367 return (input_width() + subsampling_width() - 1) / subsampling_width();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700368 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700369 const size_t padded_input_width = padding_left() + input_width() + padding_right();
370 if (padded_input_width <= dilated_kernel_width()) {
371 return 1;
372 } else {
373 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
374 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700375 }
376 }
377
378 inline ConvolutionOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
379 assert(next_input_height >= 1);
380 assert(next_input_width >= 1);
381 this->next_input_height_ = next_input_height;
382 this->next_input_width_ = next_input_width;
383 return *this;
384 }
385
386 inline ConvolutionOperatorTester& next_input_height(uint32_t next_input_height) {
387 assert(next_input_height >= 1);
388 this->next_input_height_ = next_input_height;
389 return *this;
390 }
391
392 inline uint32_t next_input_height() const {
393 if (this->next_input_height_ == 0) {
394 return input_height();
395 } else {
396 return this->next_input_height_;
397 }
398 }
399
400 inline ConvolutionOperatorTester& next_input_width(uint32_t next_input_width) {
401 assert(next_input_width >= 1);
402 this->next_input_width_ = next_input_width;
403 return *this;
404 }
405
406 inline uint32_t next_input_width() const {
407 if (this->next_input_width_ == 0) {
408 return input_width();
409 } else {
410 return this->next_input_width_;
411 }
412 }
413
414 inline size_t next_output_height() const {
415 const size_t padded_input_height = padding_top() + next_input_height() + padding_bottom();
416 if (padded_input_height <= dilated_kernel_height()) {
417 return 1;
418 } else {
419 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
420 }
421 }
422
423 inline size_t next_output_width() const {
424 const size_t padded_input_width = padding_left() + next_input_width() + padding_right();
425 if (padded_input_width <= dilated_kernel_width()) {
426 return 1;
427 } else {
428 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
429 }
430 }
431
432 inline ConvolutionOperatorTester& next_batch_size(size_t next_batch_size) {
433 assert(next_batch_size >= 1);
434 this->next_batch_size_ = next_batch_size;
435 return *this;
436 }
437
438 inline size_t next_batch_size() const {
439 if (this->next_batch_size_ == 0) {
440 return batch_size();
441 } else {
442 return this->next_batch_size_;
443 }
444 }
445
Marat Dukhanefc47b82019-11-18 09:25:38 -0800446 inline ConvolutionOperatorTester& sparsity(float sparsity) {
447 this->sparsity_ = sparsity;
448 return *this;
449 }
450
451 inline float sparsity() const {
452 return this->sparsity_;
453 }
454
XNNPACK Teamb455b122019-09-27 18:10:33 -0700455 inline ConvolutionOperatorTester& qmin(uint8_t qmin) {
456 this->qmin_ = qmin;
457 return *this;
458 }
459
460 inline uint8_t qmin() const {
461 return this->qmin_;
462 }
463
464 inline ConvolutionOperatorTester& qmax(uint8_t qmax) {
465 this->qmax_ = qmax;
466 return *this;
467 }
468
469 inline uint8_t qmax() const {
470 return this->qmax_;
471 }
472
Marat Dukhanefc47b82019-11-18 09:25:38 -0800473 inline ConvolutionOperatorTester& force_nhwc_input(bool force_nhwc_input) {
474 this->force_nhwc_input_ = force_nhwc_input;
475 return *this;
476 }
477
478 inline bool force_nhwc_input() const {
479 return this->force_nhwc_input_;
480 }
481
XNNPACK Teamb455b122019-09-27 18:10:33 -0700482 inline ConvolutionOperatorTester& depthwise_layout(bool depthwise_layout) {
483 this->depthwise_layout_ = depthwise_layout;
484 return *this;
485 }
486
487 inline bool depthwise_layout() const {
488 return this->depthwise_layout_;
489 }
490
Marat Dukhanf568f082019-10-30 09:47:07 -0700491 inline ConvolutionOperatorTester& has_bias(bool has_bias) {
492 this->has_bias_ = has_bias;
493 return *this;
494 }
495
496 inline bool has_bias() const {
497 return this->has_bias_;
498 }
499
XNNPACK Teamb455b122019-09-27 18:10:33 -0700500 inline ConvolutionOperatorTester& iterations(size_t iterations) {
501 this->iterations_ = iterations;
502 return *this;
503 }
504
505 inline size_t iterations() const {
506 return this->iterations_;
507 }
508
Marat Dukhanefc47b82019-11-18 09:25:38 -0800509 void TestNHWCxQ8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700510 std::random_device random_device;
511 auto rng = std::mt19937(random_device());
512 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700513 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700514
515 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700516 batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()) + 8);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700517 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
518 std::vector<int32_t> bias(groups() * group_output_channels());
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700519 std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700520 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
521 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
522
523 const uint8_t input_zero_point = 127;
524 const uint8_t kernel_zero_point = 127;
525
526 for (size_t iteration = 0; iteration < iterations(); iteration++) {
527 std::generate(input.begin(), input.end(), std::ref(u8rng));
528 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
529 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
530 std::fill(output.begin(), output.end(), 0xA5);
531
532 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -0700533 if (has_bias()) {
534 for (size_t i = 0; i < batch_size(); i++) {
535 for (size_t oy = 0; oy < output_height(); oy++) {
536 for (size_t ox = 0; ox < output_width(); ox++) {
537 for (size_t g = 0; g < groups(); g++) {
538 for (size_t oc = 0; oc < group_output_channels(); oc++) {
539 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
540 bias[g * group_output_channels() + oc];
541 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700542 }
543 }
544 }
545 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700546 } else {
547 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700548 }
549 if (depthwise_layout()) {
550 ASSERT_EQ(group_input_channels(), 1);
551
552 for (size_t i = 0; i < batch_size(); i++) {
553 for (size_t oy = 0; oy < output_height(); oy++) {
554 for (size_t ox = 0; ox < output_width(); ox++) {
555 for (size_t ky = 0; ky < kernel_height(); ky++) {
556 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
557 if (iy < input_height()) {
558 for (size_t kx = 0; kx < kernel_width(); kx++) {
559 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
560 if (ix < input_width()) {
561 for (size_t g = 0; g < groups(); g++) {
562 for (size_t oc = 0; oc < group_output_channels(); oc++) {
563 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700564 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) - int32_t(input_zero_point)) *
XNNPACK Teamb455b122019-09-27 18:10:33 -0700565 (int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));
566 }
567 }
568 }
569 }
570 }
571 }
572 }
573 }
574 }
575 } else {
576 for (size_t i = 0; i < batch_size(); i++) {
577 for (size_t oy = 0; oy < output_height(); oy++) {
578 for (size_t ox = 0; ox < output_width(); ox++) {
579 for (size_t ky = 0; ky < kernel_height(); ky++) {
580 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
581 if (iy < input_height()) {
582 for (size_t kx = 0; kx < kernel_width(); kx++) {
583 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
584 if (ix < input_width()) {
585 for (size_t g = 0; g < groups(); g++) {
586 for (size_t oc = 0; oc < group_output_channels(); oc++) {
587 for (size_t ic = 0; ic < group_input_channels(); ic++) {
588 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700589 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
XNNPACK Teamb455b122019-09-27 18:10:33 -0700590 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
591 }
592 }
593 }
594 }
595 }
596 }
597 }
598 }
599 }
600 }
601 }
602
603 // Compute renormalization parameters.
604 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
605 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
606
607 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
608 const uint8_t output_zero_point = uint8_t(std::max(std::min(
609 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
610 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
611
612 // Renormalize reference results.
613 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
614 [this, output_scale, output_zero_point](int32_t x) -> double {
615 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
616 });
617
618 // Create, setup, run, and destroy Convolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800619 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700620 xnn_operator_t convolution_op = nullptr;
621
622 ASSERT_EQ(xnn_status_success,
623 xnn_create_convolution2d_nhwc_q8(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700624 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
625 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700626 kernel_height(), kernel_width(),
627 subsampling_height(), subsampling_width(),
628 dilation_height(), dilation_width(),
629 groups(), group_input_channels(), group_output_channels(),
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700630 input_channel_stride(), output_channel_stride(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700631 input_zero_point, 1.0f /* input scale */,
632 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -0700633 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700634 output_zero_point, output_scale, qmin(), qmax(),
Marat Dukhan8440fde2019-10-24 12:46:13 -0700635 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700636 &convolution_op));
637
638 // Smart pointer to automatically delete convolution_op.
639 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
640
641 ASSERT_EQ(xnn_status_success,
642 xnn_setup_convolution2d_nhwc_q8(
643 convolution_op,
644 batch_size(), input_height(), input_width(),
645 input.data(), output.data(),
646 nullptr /* thread pool */));
647
648 ASSERT_EQ(xnn_status_success,
649 xnn_run_operator(convolution_op, nullptr /* thread pool */));
650
651 // Verify results.
652 for (size_t i = 0; i < batch_size(); i++) {
653 for (size_t y = 0; y < output_height(); y++) {
654 for (size_t x = 0; x < output_width(); x++) {
655 for (size_t g = 0; g < groups(); g++) {
656 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700657 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
XNNPACK Teamb455b122019-09-27 18:10:33 -0700658 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700659 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
XNNPACK Teamb455b122019-09-27 18:10:33 -0700660 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
661 ASSERT_NEAR(
662 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700663 double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700664 0.9)
665 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
666 }
667 }
668 }
669 }
670 }
671 }
672 }
673
Marat Dukhanefc47b82019-11-18 09:25:38 -0800674 void TestNHWCxF32() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700675 std::random_device random_device;
676 auto rng = std::mt19937(random_device());
677 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
678
679 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700680 batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700681 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
682 std::vector<float> bias(groups() * group_output_channels());
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700683 std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700684 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
685
686 for (size_t iteration = 0; iteration < iterations(); iteration++) {
687 std::generate(input.begin(), input.end(), std::ref(f32rng));
688 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
689 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
690 std::fill(output.begin(), output.end(), nanf(""));
691
692 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -0700693 if (has_bias()) {
694 for (size_t i = 0; i < batch_size(); i++) {
695 for (size_t oy = 0; oy < output_height(); oy++) {
696 for (size_t ox = 0; ox < output_width(); ox++) {
697 for (size_t g = 0; g < groups(); g++) {
698 for (size_t oc = 0; oc < group_output_channels(); oc++) {
699 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
700 bias[g * group_output_channels() + oc];
701 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700702 }
703 }
704 }
705 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700706 } else {
707 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700708 }
709 if (depthwise_layout()) {
710 ASSERT_EQ(group_input_channels(), 1);
711
712 for (size_t i = 0; i < batch_size(); i++) {
713 for (size_t oy = 0; oy < output_height(); oy++) {
714 for (size_t ox = 0; ox < output_width(); ox++) {
715 for (size_t ky = 0; ky < kernel_height(); ky++) {
716 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
717 if (iy < input_height()) {
718 for (size_t kx = 0; kx < kernel_width(); kx++) {
719 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
720 if (ix < input_width()) {
721 for (size_t g = 0; g < groups(); g++) {
722 for (size_t oc = 0; oc < group_output_channels(); oc++) {
723 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700724 input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g] *
XNNPACK Teamb455b122019-09-27 18:10:33 -0700725 kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];
726 }
727 }
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 } else {
736 for (size_t i = 0; i < batch_size(); i++) {
737 for (size_t oy = 0; oy < output_height(); oy++) {
738 for (size_t ox = 0; ox < output_width(); ox++) {
739 for (size_t ky = 0; ky < kernel_height(); ky++) {
740 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
741 if (iy < input_height()) {
742 for (size_t kx = 0; kx < kernel_width(); kx++) {
743 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
744 if (ix < input_width()) {
745 for (size_t g = 0; g < groups(); g++) {
746 for (size_t oc = 0; oc < group_output_channels(); oc++) {
747 for (size_t ic = 0; ic < group_input_channels(); ic++) {
748 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700749 input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *
XNNPACK Teamb455b122019-09-27 18:10:33 -0700750 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
751 }
752 }
753 }
754 }
755 }
756 }
757 }
758 }
759 }
760 }
761 }
762
763 // Compute clamping parameters.
764 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
765 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
766
767 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
768 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
769
770 // Clamp reference results.
771 for (float& value : output_ref) {
772 value = std::max(std::min(value, output_max), output_min);
773 }
774
775 // Create, setup, run, and destroy Convolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800776 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700777 xnn_operator_t convolution_op = nullptr;
778
779 ASSERT_EQ(xnn_status_success,
780 xnn_create_convolution2d_nhwc_f32(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700781 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
782 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700783 kernel_height(), kernel_width(),
784 subsampling_height(), subsampling_width(),
785 dilation_height(), dilation_width(),
786 groups(), group_input_channels(), group_output_channels(),
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700787 input_channel_stride(), output_channel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -0700788 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700789 output_min, output_max,
Marat Dukhan8440fde2019-10-24 12:46:13 -0700790 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700791 &convolution_op));
792
793 // Smart pointer to automatically delete convolution_op.
794 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
795
796 ASSERT_EQ(xnn_status_success,
797 xnn_setup_convolution2d_nhwc_f32(
798 convolution_op,
799 batch_size(), input_height(), input_width(),
800 input.data(), output.data(),
801 nullptr /* thread pool */));
802
803 ASSERT_EQ(xnn_status_success,
804 xnn_run_operator(convolution_op, nullptr /* thread pool */));
805
806 // Verify results.
807 for (size_t i = 0; i < batch_size(); i++) {
808 for (size_t y = 0; y < output_height(); y++) {
809 for (size_t x = 0; x < output_width(); x++) {
810 for (size_t g = 0; g < groups(); g++) {
811 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700812 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700813 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700814 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700815 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
816 ASSERT_NEAR(
817 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700818 output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],
XNNPACK Teamb455b122019-09-27 18:10:33 -0700819 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
820 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
821 }
822 }
823 }
824 }
825 }
826 }
827 }
828
Frank Barchard49b4dcc2020-06-26 14:07:19 -0700829 void TestNHWCxF16() const {
830 std::random_device random_device;
831 auto rng = std::mt19937(random_device());
832 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
833 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
834
835 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +
836 batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));
837 std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
838 std::vector<uint16_t> bias(groups() * group_output_channels());
839 std::vector<uint16_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));
840 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
841
842 for (size_t iteration = 0; iteration < iterations(); iteration++) {
843 std::generate(input.begin(), input.end(), std::ref(f16rng));
844 std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));
845 std::generate(bias.begin(), bias.end(), std::ref(f16rng));
846 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
847
848 // Compute reference results, without clamping.
849 if (has_bias()) {
850 for (size_t i = 0; i < batch_size(); i++) {
851 for (size_t oy = 0; oy < output_height(); oy++) {
852 for (size_t ox = 0; ox < output_width(); ox++) {
853 for (size_t g = 0; g < groups(); g++) {
854 for (size_t oc = 0; oc < group_output_channels(); oc++) {
855 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
856 fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);
857 }
858 }
859 }
860 }
861 }
862 } else {
863 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
864 }
865 if (depthwise_layout()) {
866 ASSERT_EQ(group_input_channels(), 1);
867
868 for (size_t i = 0; i < batch_size(); i++) {
869 for (size_t oy = 0; oy < output_height(); oy++) {
870 for (size_t ox = 0; ox < output_width(); ox++) {
871 for (size_t ky = 0; ky < kernel_height(); ky++) {
872 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
873 if (iy < input_height()) {
874 for (size_t kx = 0; kx < kernel_width(); kx++) {
875 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
876 if (ix < input_width()) {
877 for (size_t g = 0; g < groups(); g++) {
878 for (size_t oc = 0; oc < group_output_channels(); oc++) {
879 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
880 fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) *
881 fp16_ieee_to_fp32_value(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]);
882 }
883 }
884 }
885 }
886 }
887 }
888 }
889 }
890 }
891 } else {
892 for (size_t i = 0; i < batch_size(); i++) {
893 for (size_t oy = 0; oy < output_height(); oy++) {
894 for (size_t ox = 0; ox < output_width(); ox++) {
895 for (size_t ky = 0; ky < kernel_height(); ky++) {
896 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
897 if (iy < input_height()) {
898 for (size_t kx = 0; kx < kernel_width(); kx++) {
899 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
900 if (ix < input_width()) {
901 for (size_t g = 0; g < groups(); g++) {
902 for (size_t oc = 0; oc < group_output_channels(); oc++) {
903 for (size_t ic = 0; ic < group_input_channels(); ic++) {
904 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
905 fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *
906 fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);
907 }
908 }
909 }
910 }
911 }
912 }
913 }
914 }
915 }
916 }
917 }
918
919 // Compute clamping parameters.
920 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
921 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
922 const float accumulated_range = accumulated_max - accumulated_min;
923 const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));
924 const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));
925 const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;
926 const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;
927
928 // Clamp reference results.
929 for (float& value : output_ref) {
930 value = std::max(std::min(value, output_max), output_min);
931 }
932
933 // Create, setup, run, and destroy Convolution operator.
934 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
935 xnn_operator_t convolution_op = nullptr;
936
937 xnn_status status = xnn_create_convolution2d_nhwc_f16(
938 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
939 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
940 kernel_height(), kernel_width(),
941 subsampling_height(), subsampling_width(),
942 dilation_height(), dilation_width(),
943 groups(), group_input_channels(), group_output_channels(),
944 input_channel_stride(), output_channel_stride(),
945 kernel.data(), has_bias() ? bias.data() : nullptr,
946 output_min, output_max,
947 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
948 &convolution_op);
949
950 if (status == xnn_status_unsupported_hardware) {
951 GTEST_SKIP();
952 }
953 ASSERT_EQ(xnn_status_success, status);
954 ASSERT_NE(nullptr, convolution_op);
955
956 // Smart pointer to automatically delete convolution_op.
957 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
958
959 ASSERT_EQ(xnn_status_success,
960 xnn_setup_convolution2d_nhwc_f16(
961 convolution_op,
962 batch_size(), input_height(), input_width(),
963 input.data(), output.data(),
964 nullptr /* thread pool */));
965
966 ASSERT_EQ(xnn_status_success,
967 xnn_run_operator(convolution_op, nullptr /* thread pool */));
968
969 // Verify results.
970 for (size_t i = 0; i < batch_size(); i++) {
971 for (size_t y = 0; y < output_height(); y++) {
972 for (size_t x = 0; x < output_width(); x++) {
973 for (size_t g = 0; g < groups(); g++) {
974 for (size_t c = 0; c < group_output_channels(); c++) {
975// ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)
976// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
977// ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)
978// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
979 ASSERT_NEAR(
980 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
981 fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),
982 1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
983 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
984 }
985 }
986 }
987 }
988 }
989 }
990 }
991
Marat Dukhanefc47b82019-11-18 09:25:38 -0800992 void TestNCHWxF32() const {
Marat Dukhanefc47b82019-11-18 09:25:38 -0800993 std::random_device random_device;
994 auto rng = std::mt19937(random_device());
995 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
996 auto prng = std::bind(std::uniform_real_distribution<float>(), rng);
997
Marat Dukhanc3d52cf2020-06-18 07:56:25 -0700998 std::vector<float> input(2 * XNN_EXTRA_BYTES / sizeof(float) +
999 ((batch_size() - 1) * input_channel_stride() + groups() * group_input_channels()) * input_height() * input_width());
Marat Dukhanefc47b82019-11-18 09:25:38 -08001000 std::vector<float> kernel(
1001 groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1002 std::vector<float> bias(groups() * group_output_channels());
1003 std::vector<float> output(
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001004 ((batch_size() - 1) * output_channel_stride() + groups() * group_output_channels()) * output_height() * output_width());
Marat Dukhanefc47b82019-11-18 09:25:38 -08001005 std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());
1006
1007 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1008 std::generate(input.begin(), input.end(), std::ref(f32rng));
1009 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
1010 for (float& k : kernel) {
1011 if (prng() <= sparsity()) {
1012 k = 0.0f;
1013 }
1014 }
1015 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
1016 std::fill(output.begin(), output.end(), nanf(""));
1017
1018 // Compute reference results, without clamping.
1019 if (has_bias()) {
1020 for (size_t i = 0; i < batch_size(); i++) {
1021 for (size_t oy = 0; oy < output_height(); oy++) {
1022 for (size_t ox = 0; ox < output_width(); ox++) {
1023 for (size_t g = 0; g < groups(); g++) {
1024 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1025 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =
1026 bias[g * group_output_channels() + oc];
1027 }
1028 }
1029 }
1030 }
1031 }
1032 } else {
1033 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
1034 }
1035 if (force_nhwc_input()) {
1036 for (size_t i = 0; i < batch_size(); i++) {
1037 for (size_t oy = 0; oy < output_height(); oy++) {
1038 for (size_t ox = 0; ox < output_width(); ox++) {
1039 for (size_t ky = 0; ky < kernel_height(); ky++) {
1040 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1041 if (iy < input_height()) {
1042 for (size_t kx = 0; kx < kernel_width(); kx++) {
1043 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1044 if (ix < input_width()) {
1045 for (size_t g = 0; g < groups(); g++) {
1046 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1047 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1048 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
1049 input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *
1050 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1051 }
1052 }
1053 }
1054 }
1055 }
1056 }
1057 }
1058 }
1059 }
1060 }
Marat Dukhan33032712020-06-18 11:06:04 -07001061 } else if (depthwise_layout()) {
1062 ASSERT_EQ(group_input_channels(), 1);
1063
1064 for (size_t i = 0; i < batch_size(); i++) {
1065 for (size_t oy = 0; oy < output_height(); oy++) {
1066 for (size_t ox = 0; ox < output_width(); ox++) {
1067 for (size_t ky = 0; ky < kernel_height(); ky++) {
1068 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1069 if (iy < input_height()) {
1070 for (size_t kx = 0; kx < kernel_width(); kx++) {
1071 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1072 if (ix < input_width()) {
1073 for (size_t g = 0; g < groups(); g++) {
1074 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1075 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
1076 input[((i * input_channel_stride() + g) * input_height() + iy) * input_width() + ix] *
1077 kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];
1078 }
1079 }
1080 }
1081 }
1082 }
1083 }
1084 }
1085 }
1086 }
Marat Dukhanefc47b82019-11-18 09:25:38 -08001087 } else {
1088 for (size_t i = 0; i < batch_size(); i++) {
1089 for (size_t oy = 0; oy < output_height(); oy++) {
1090 for (size_t ox = 0; ox < output_width(); ox++) {
1091 for (size_t ky = 0; ky < kernel_height(); ky++) {
1092 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1093 if (iy < input_height()) {
1094 for (size_t kx = 0; kx < kernel_width(); kx++) {
1095 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1096 if (ix < input_width()) {
1097 for (size_t g = 0; g < groups(); g++) {
1098 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1099 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1100 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001101 input[((i * input_channel_stride() + g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *
Marat Dukhanefc47b82019-11-18 09:25:38 -08001102 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1103 }
1104 }
1105 }
1106 }
1107 }
1108 }
1109 }
1110 }
1111 }
1112 }
1113 }
1114
1115 // Compute clamping parameters.
1116 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1117 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1118
Marat Dukhan869c62d2020-04-09 17:17:55 -07001119 const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :
1120 accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
1121 const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :
1122 accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
Marat Dukhanefc47b82019-11-18 09:25:38 -08001123
1124 // Clamp reference results.
1125 for (float& value : output_ref) {
1126 value = std::max(std::min(value, output_max), output_min);
1127 }
1128
1129 // Create, setup, run, and destroy Convolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001130 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
Marat Dukhanefc47b82019-11-18 09:25:38 -08001131 xnn_operator_t convolution_op = nullptr;
1132
1133 xnn_status status = xnn_create_convolution2d_nchw_f32(
1134 padding_top(), padding_right(), padding_bottom(), padding_left(),
1135 kernel_height(), kernel_width(),
1136 subsampling_height(), subsampling_width(),
1137 dilation_height(), dilation_width(),
1138 groups(), group_input_channels(), group_output_channels(),
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001139 input_channel_stride(), output_channel_stride(),
Marat Dukhanefc47b82019-11-18 09:25:38 -08001140 kernel.data(), has_bias() ? bias.data() : nullptr,
1141 output_min, output_max,
1142 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),
1143 &convolution_op);
1144 if (status == xnn_status_unsupported_parameter) {
1145 GTEST_SKIP();
1146 }
1147 ASSERT_EQ(xnn_status_success, status);
1148
1149 // Smart pointer to automatically delete convolution_op.
1150 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1151
1152 ASSERT_EQ(xnn_status_success,
1153 xnn_setup_convolution2d_nchw_f32(
1154 convolution_op,
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001155 batch_size(), input_height(), input_width(),
Marat Dukhanefc47b82019-11-18 09:25:38 -08001156 input.data(), output.data(),
1157 nullptr /* thread pool */));
1158
1159 ASSERT_EQ(xnn_status_success,
1160 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1161
1162 // Verify results.
1163 for (size_t i = 0; i < batch_size(); i++) {
1164 for (size_t y = 0; y < output_height(); y++) {
1165 for (size_t x = 0; x < output_width(); x++) {
1166 for (size_t g = 0; g < groups(); g++) {
1167 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001168 ASSERT_GE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)
Marat Dukhanefc47b82019-11-18 09:25:38 -08001169 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001170 ASSERT_LE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)
Marat Dukhanefc47b82019-11-18 09:25:38 -08001171 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1172 ASSERT_NEAR(
1173 output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001174 output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x],
Marat Dukhanefc47b82019-11-18 09:25:38 -08001175 1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))
1176 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1177 }
1178 }
1179 }
1180 }
1181 }
1182 }
1183 }
1184
1185 void TestSetupNHWCxQ8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001186 ASSERT_FALSE(depthwise_layout());
1187
1188 std::random_device random_device;
1189 auto rng = std::mt19937(random_device());
1190 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -07001191 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001192
1193 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001194 batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),
1195 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())) + 8);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001196 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1197 std::vector<int32_t> bias(groups() * group_output_channels());
1198 std::vector<uint8_t> output(std::max(
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001199 batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),
1200 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001201 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1202 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1203 std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1204 std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1205
1206 const uint8_t input_zero_point = 127;
1207 const uint8_t kernel_zero_point = 127;
1208
1209 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1210 std::generate(input.begin(), input.end(), std::ref(u8rng));
1211 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
1212 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
1213 std::fill(output.begin(), output.end(), 0xA5);
1214
1215 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -07001216 if (has_bias()) {
1217 for (size_t i = 0; i < batch_size(); i++) {
1218 for (size_t oy = 0; oy < output_height(); oy++) {
1219 for (size_t ox = 0; ox < output_width(); ox++) {
1220 for (size_t g = 0; g < groups(); g++) {
1221 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1222 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1223 bias[g * group_output_channels() + oc];
1224 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001225 }
1226 }
1227 }
1228 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001229 } else {
1230 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001231 }
1232 for (size_t i = 0; i < batch_size(); i++) {
1233 for (size_t oy = 0; oy < output_height(); oy++) {
1234 for (size_t ox = 0; ox < output_width(); ox++) {
1235 for (size_t ky = 0; ky < kernel_height(); ky++) {
1236 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1237 if (iy < input_height()) {
1238 for (size_t kx = 0; kx < kernel_width(); kx++) {
1239 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1240 if (ix < input_width()) {
1241 for (size_t g = 0; g < groups(); g++) {
1242 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1243 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1244 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001245 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
XNNPACK Teamb455b122019-09-27 18:10:33 -07001246 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
1247 }
1248 }
1249 }
1250 }
1251 }
1252 }
1253 }
1254 }
1255 }
1256 }
1257
1258 // Compute renormalization parameters.
1259 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
1260 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
1261
1262 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
1263 const uint8_t output_zero_point = uint8_t(std::max(std::min(
1264 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
1265 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
1266
1267 // Renormalize reference results.
1268 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
1269 [this, output_scale, output_zero_point](int32_t x) -> double {
1270 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
1271 });
1272
1273 // Create, setup, and run Convolution operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001274 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001275 xnn_operator_t convolution_op = nullptr;
1276
1277 ASSERT_EQ(xnn_status_success,
1278 xnn_create_convolution2d_nhwc_q8(
1279 padding_top(), padding_right(), padding_bottom(), padding_left(),
1280 kernel_height(), kernel_width(),
1281 subsampling_height(), subsampling_width(),
1282 dilation_height(), dilation_width(),
1283 groups(), group_input_channels(), group_output_channels(),
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001284 input_channel_stride(), output_channel_stride(),
XNNPACK Teamb455b122019-09-27 18:10:33 -07001285 input_zero_point, 1.0f /* input scale */,
1286 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -07001287 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001288 output_zero_point, output_scale, qmin(), qmax(),
1289 0, &convolution_op));
1290
1291 // Smart pointer to automatically delete convolution_op.
1292 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1293
1294 ASSERT_EQ(xnn_status_success,
1295 xnn_setup_convolution2d_nhwc_q8(
1296 convolution_op,
1297 batch_size(), input_height(), input_width(),
1298 input.data(), output.data(),
1299 nullptr /* thread pool */));
1300
1301 ASSERT_EQ(xnn_status_success,
1302 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1303
1304 // Verify results of the first run.
1305 for (size_t i = 0; i < batch_size(); i++) {
1306 for (size_t y = 0; y < output_height(); y++) {
1307 for (size_t x = 0; x < output_width(); x++) {
1308 for (size_t g = 0; g < groups(); g++) {
1309 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001310 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
XNNPACK Teamb455b122019-09-27 18:10:33 -07001311 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001312 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
XNNPACK Teamb455b122019-09-27 18:10:33 -07001313 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1314 ASSERT_NEAR(
1315 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001316 double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
XNNPACK Teamb455b122019-09-27 18:10:33 -07001317 0.9)
1318 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1319 }
1320 }
1321 }
1322 }
1323 }
1324
1325 // Re-generate data for the second run.
1326 std::generate(input.begin(), input.end(), std::ref(u8rng));
1327 std::fill(output.begin(), output.end(), 0xA5);
1328
1329 // Compute reference results for the second run, including renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -07001330 if (has_bias()) {
1331 for (size_t i = 0; i < next_batch_size(); i++) {
1332 for (size_t oy = 0; oy < next_output_height(); oy++) {
1333 for (size_t ox = 0; ox < next_output_width(); ox++) {
1334 for (size_t g = 0; g < groups(); g++) {
1335 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1336 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1337 bias[g * group_output_channels() + oc];
1338 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001339 }
1340 }
1341 }
1342 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001343 } else {
1344 std::fill(next_accumulators.begin(), next_accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001345 }
1346 for (size_t i = 0; i < next_batch_size(); i++) {
1347 for (size_t oy = 0; oy < next_output_height(); oy++) {
1348 for (size_t ox = 0; ox < next_output_width(); ox++) {
1349 for (size_t ky = 0; ky < kernel_height(); ky++) {
1350 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1351 if (iy < next_input_height()) {
1352 for (size_t kx = 0; kx < kernel_width(); kx++) {
1353 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1354 if (ix < next_input_width()) {
1355 for (size_t g = 0; g < groups(); g++) {
1356 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1357 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1358 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001359 (int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
XNNPACK Teamb455b122019-09-27 18:10:33 -07001360 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
1361 }
1362 }
1363 }
1364 }
1365 }
1366 }
1367 }
1368 }
1369 }
1370 }
1371 std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),
1372 [this, output_scale, output_zero_point](int32_t x) -> double {
1373 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
1374 });
1375
1376 // Setup and run Convolution operator the second time, and destroy the operator.
1377 ASSERT_EQ(xnn_status_success,
1378 xnn_setup_convolution2d_nhwc_q8(
1379 convolution_op,
1380 next_batch_size(), next_input_height(), next_input_width(),
1381 input.data(), output.data(),
1382 nullptr /* thread pool */));
1383
1384 ASSERT_EQ(xnn_status_success,
1385 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1386
1387 // Verify results of the second run.
1388 for (size_t i = 0; i < next_batch_size(); i++) {
1389 for (size_t y = 0; y < next_output_height(); y++) {
1390 for (size_t x = 0; x < next_output_width(); x++) {
1391 for (size_t g = 0; g < groups(); g++) {
1392 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001393 ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
XNNPACK Teamb455b122019-09-27 18:10:33 -07001394 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001395 ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
XNNPACK Teamb455b122019-09-27 18:10:33 -07001396 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1397 ASSERT_NEAR(
1398 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001399 double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
XNNPACK Teamb455b122019-09-27 18:10:33 -07001400 0.9)
1401 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1402 }
1403 }
1404 }
1405 }
1406 }
1407 }
1408 }
1409
Frank Barchard49b4dcc2020-06-26 14:07:19 -07001410 void TestSetupNHWCxF16() const {
1411 ASSERT_FALSE(depthwise_layout());
1412
1413 std::random_device random_device;
1414 auto rng = std::mt19937(random_device());
1415 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
1416 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
1417
1418 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(
1419 batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),
1420 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));
1421 std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1422 std::vector<uint16_t> bias(groups() * group_output_channels());
1423 std::vector<uint16_t> output(std::max(
1424 batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),
1425 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));
1426 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1427 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1428
1429 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1430 std::generate(input.begin(), input.end(), std::ref(f16rng));
1431 std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));
1432 std::generate(bias.begin(), bias.end(), std::ref(f16rng));
1433 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
1434
1435 // Compute reference results, without clamping.
1436 if (has_bias()) {
1437 for (size_t i = 0; i < batch_size(); i++) {
1438 for (size_t oy = 0; oy < output_height(); oy++) {
1439 for (size_t ox = 0; ox < output_width(); ox++) {
1440 for (size_t g = 0; g < groups(); g++) {
1441 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1442 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1443 fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);
1444 }
1445 }
1446 }
1447 }
1448 }
1449 } else {
1450 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
1451 }
1452 for (size_t i = 0; i < batch_size(); i++) {
1453 for (size_t oy = 0; oy < output_height(); oy++) {
1454 for (size_t ox = 0; ox < output_width(); ox++) {
1455 for (size_t ky = 0; ky < kernel_height(); ky++) {
1456 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1457 if (iy < input_height()) {
1458 for (size_t kx = 0; kx < kernel_width(); kx++) {
1459 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1460 if (ix < input_width()) {
1461 for (size_t g = 0; g < groups(); g++) {
1462 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1463 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1464 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1465 fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *
1466 fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);
1467 }
1468 }
1469 }
1470 }
1471 }
1472 }
1473 }
1474 }
1475 }
1476 }
1477
1478 // Compute clamping parameters.
1479 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1480 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1481 const float accumulated_range = accumulated_max - accumulated_min;
1482 const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));
1483 const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));
1484 const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;
1485 const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;
1486
1487 for (float& output_value : output_ref) {
1488 output_value = std::min(std::max(output_value, output_min), output_max);
1489 }
1490
1491 // Create, setup, and run Convolution operator once.
1492 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1493 xnn_operator_t convolution_op = nullptr;
1494
1495 xnn_status status = xnn_create_convolution2d_nhwc_f16(
1496 padding_top(), padding_right(), padding_bottom(), padding_left(),
1497 kernel_height(), kernel_width(),
1498 subsampling_height(), subsampling_width(),
1499 dilation_height(), dilation_width(),
1500 groups(), group_input_channels(), group_output_channels(),
1501 input_channel_stride(), output_channel_stride(),
1502 kernel.data(), has_bias() ? bias.data() : nullptr,
1503 output_min, output_max,
1504 0, &convolution_op);
1505
1506 if (status == xnn_status_unsupported_hardware) {
1507 GTEST_SKIP();
1508 }
1509 ASSERT_EQ(xnn_status_success, status);
1510 ASSERT_NE(nullptr, convolution_op);
1511
1512 // Smart pointer to automatically delete convolution_op.
1513 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1514
1515 ASSERT_EQ(xnn_status_success,
1516 xnn_setup_convolution2d_nhwc_f16(
1517 convolution_op,
1518 batch_size(), input_height(), input_width(),
1519 input.data(), output.data(),
1520 nullptr /* thread pool */));
1521
1522 ASSERT_EQ(xnn_status_success,
1523 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1524
1525 // Verify results of the first run.
1526 for (size_t i = 0; i < batch_size(); i++) {
1527 for (size_t y = 0; y < output_height(); y++) {
1528 for (size_t x = 0; x < output_width(); x++) {
1529 for (size_t g = 0; g < groups(); g++) {
1530 for (size_t c = 0; c < group_output_channels(); c++) {
1531 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)
1532 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1533 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)
1534 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1535 ASSERT_NEAR(
1536 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1537 fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),
1538 1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
1539 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1540 }
1541 }
1542 }
1543 }
1544 }
1545
1546 // Re-generate data for the second run.
1547 std::generate(input.begin(), input.end(), std::ref(f16rng));
1548 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
1549
1550 // Compute reference results for the second run, including clamping.
1551 if (has_bias()) {
1552 for (size_t i = 0; i < next_batch_size(); i++) {
1553 for (size_t oy = 0; oy < next_output_height(); oy++) {
1554 for (size_t ox = 0; ox < next_output_width(); ox++) {
1555 for (size_t g = 0; g < groups(); g++) {
1556 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1557 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1558 fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);
1559 }
1560 }
1561 }
1562 }
1563 }
1564 } else {
1565 std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
1566 }
1567 for (size_t i = 0; i < next_batch_size(); i++) {
1568 for (size_t oy = 0; oy < next_output_height(); oy++) {
1569 for (size_t ox = 0; ox < next_output_width(); ox++) {
1570 for (size_t ky = 0; ky < kernel_height(); ky++) {
1571 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1572 if (iy < next_input_height()) {
1573 for (size_t kx = 0; kx < kernel_width(); kx++) {
1574 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1575 if (ix < next_input_width()) {
1576 for (size_t g = 0; g < groups(); g++) {
1577 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1578 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1579 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1580 fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *
1581 fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);
1582 }
1583 }
1584 }
1585 }
1586 }
1587 }
1588 }
1589 }
1590 }
1591 }
1592 for (float& value : next_output_ref) {
1593 value = std::max(std::min(value, output_max), output_min);
1594 }
1595
1596 // Setup and run Convolution operator the second time, and destroy the operator.
1597 ASSERT_EQ(xnn_status_success,
1598 xnn_setup_convolution2d_nhwc_f16(
1599 convolution_op,
1600 next_batch_size(), next_input_height(), next_input_width(),
1601 input.data(), output.data(),
1602 nullptr /* thread pool */));
1603
1604 ASSERT_EQ(xnn_status_success,
1605 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1606
1607 // Verify results of the second run.
1608 for (size_t i = 0; i < next_batch_size(); i++) {
1609 for (size_t y = 0; y < next_output_height(); y++) {
1610 for (size_t x = 0; x < next_output_width(); x++) {
1611 for (size_t g = 0; g < groups(); g++) {
1612 for (size_t c = 0; c < group_output_channels(); c++) {
1613 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)
1614 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1615 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)
1616 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1617 ASSERT_NEAR(
1618 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1619 fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),
1620 1.0e-2 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
1621 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1622 }
1623 }
1624 }
1625 }
1626 }
1627 }
1628 }
1629
Marat Dukhanefc47b82019-11-18 09:25:38 -08001630 void TestSetupNHWCxF32() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001631 ASSERT_FALSE(depthwise_layout());
1632
1633 std::random_device random_device;
1634 auto rng = std::mt19937(random_device());
1635 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
1636
1637 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001638 batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),
1639 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001640 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1641 std::vector<float> bias(groups() * group_output_channels());
1642 std::vector<float> output(std::max(
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001643 batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),
1644 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001645 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1646 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1647
1648 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1649 std::generate(input.begin(), input.end(), std::ref(f32rng));
1650 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
1651 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
1652 std::fill(output.begin(), output.end(), nanf(""));
1653
1654 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001655 if (has_bias()) {
1656 for (size_t i = 0; i < batch_size(); i++) {
1657 for (size_t oy = 0; oy < output_height(); oy++) {
1658 for (size_t ox = 0; ox < output_width(); ox++) {
1659 for (size_t g = 0; g < groups(); g++) {
1660 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1661 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1662 bias[g * group_output_channels() + oc];
1663 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001664 }
1665 }
1666 }
1667 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001668 } else {
1669 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001670 }
1671 for (size_t i = 0; i < batch_size(); i++) {
1672 for (size_t oy = 0; oy < output_height(); oy++) {
1673 for (size_t ox = 0; ox < output_width(); ox++) {
1674 for (size_t ky = 0; ky < kernel_height(); ky++) {
1675 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1676 if (iy < input_height()) {
1677 for (size_t kx = 0; kx < kernel_width(); kx++) {
1678 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1679 if (ix < input_width()) {
1680 for (size_t g = 0; g < groups(); g++) {
1681 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1682 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1683 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001684 input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *
XNNPACK Teamb455b122019-09-27 18:10:33 -07001685 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1686 }
1687 }
1688 }
1689 }
1690 }
1691 }
1692 }
1693 }
1694 }
1695 }
1696
1697 // Compute clamping parameters.
1698 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1699 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1700
1701 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
1702 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
1703
1704 // Clamp reference results.
1705 for (float& value : output_ref) {
1706 value = std::max(std::min(value, output_max), output_min);
1707 }
1708
1709 // Create, setup, and run Convolution operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001710 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001711 xnn_operator_t convolution_op = nullptr;
1712
1713 ASSERT_EQ(xnn_status_success,
1714 xnn_create_convolution2d_nhwc_f32(
1715 padding_top(), padding_right(), padding_bottom(), padding_left(),
1716 kernel_height(), kernel_width(),
1717 subsampling_height(), subsampling_width(),
1718 dilation_height(), dilation_width(),
1719 groups(), group_input_channels(), group_output_channels(),
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001720 input_channel_stride(), output_channel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -07001721 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001722 output_min, output_max,
1723 0, &convolution_op));
1724
1725 // Smart pointer to automatically delete convolution_op.
1726 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1727
1728 ASSERT_EQ(xnn_status_success,
1729 xnn_setup_convolution2d_nhwc_f32(
1730 convolution_op,
1731 batch_size(), input_height(), input_width(),
1732 input.data(), output.data(),
1733 nullptr /* thread pool */));
1734
1735 ASSERT_EQ(xnn_status_success,
1736 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1737
1738 // Verify results of the first run.
1739 for (size_t i = 0; i < batch_size(); i++) {
1740 for (size_t y = 0; y < output_height(); y++) {
1741 for (size_t x = 0; x < output_width(); x++) {
1742 for (size_t g = 0; g < groups(); g++) {
1743 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001744 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001745 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001746 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001747 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1748 ASSERT_NEAR(
1749 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001750 output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],
XNNPACK Teamb455b122019-09-27 18:10:33 -07001751 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
1752 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1753 }
1754 }
1755 }
1756 }
1757 }
1758
1759 // Re-generate data for the second run.
1760 std::generate(input.begin(), input.end(), std::ref(f32rng));
1761 std::fill(output.begin(), output.end(), nanf(""));
1762
1763 // Compute reference results for the second run, including clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001764 if (has_bias()) {
1765 for (size_t i = 0; i < next_batch_size(); i++) {
1766 for (size_t oy = 0; oy < next_output_height(); oy++) {
1767 for (size_t ox = 0; ox < next_output_width(); ox++) {
1768 for (size_t g = 0; g < groups(); g++) {
1769 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1770 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1771 bias[g * group_output_channels() + oc];
1772 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001773 }
1774 }
1775 }
1776 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001777 } else {
1778 std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001779 }
1780 for (size_t i = 0; i < next_batch_size(); i++) {
1781 for (size_t oy = 0; oy < next_output_height(); oy++) {
1782 for (size_t ox = 0; ox < next_output_width(); ox++) {
1783 for (size_t ky = 0; ky < kernel_height(); ky++) {
1784 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1785 if (iy < next_input_height()) {
1786 for (size_t kx = 0; kx < kernel_width(); kx++) {
1787 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1788 if (ix < next_input_width()) {
1789 for (size_t g = 0; g < groups(); g++) {
1790 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1791 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1792 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001793 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *
XNNPACK Teamb455b122019-09-27 18:10:33 -07001794 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1795 }
1796 }
1797 }
1798 }
1799 }
1800 }
1801 }
1802 }
1803 }
1804 }
1805 for (float& value : next_output_ref) {
1806 value = std::max(std::min(value, output_max), output_min);
1807 }
1808
1809 // Setup and run Convolution operator the second time, and destroy the operator.
1810 ASSERT_EQ(xnn_status_success,
1811 xnn_setup_convolution2d_nhwc_f32(
1812 convolution_op,
1813 next_batch_size(), next_input_height(), next_input_width(),
1814 input.data(), output.data(),
1815 nullptr /* thread pool */));
1816
1817 ASSERT_EQ(xnn_status_success,
1818 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1819
1820 // Verify results of the second run.
1821 for (size_t i = 0; i < next_batch_size(); i++) {
1822 for (size_t y = 0; y < next_output_height(); y++) {
1823 for (size_t x = 0; x < next_output_width(); x++) {
1824 for (size_t g = 0; g < groups(); g++) {
1825 for (size_t c = 0; c < group_output_channels(); c++) {
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001826 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001827 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001828 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001829 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1830 ASSERT_NEAR(
1831 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001832 output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c],
XNNPACK Teamb455b122019-09-27 18:10:33 -07001833 1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
1834 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1835 }
1836 }
1837 }
1838 }
1839 }
1840 }
1841 }
1842
1843 private:
1844 uint32_t padding_top_{0};
1845 uint32_t padding_right_{0};
1846 uint32_t padding_bottom_{0};
1847 uint32_t padding_left_{0};
Marat Dukhan8440fde2019-10-24 12:46:13 -07001848 bool padding_tf_same_{false};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001849 size_t input_height_{1};
1850 size_t input_width_{1};
1851 uint32_t groups_{1};
1852 size_t group_input_channels_{1};
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001853 size_t input_channel_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001854 size_t group_output_channels_{1};
Marat Dukhanc3d52cf2020-06-18 07:56:25 -07001855 size_t output_channel_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001856 size_t batch_size_{1};
1857 uint32_t kernel_height_{1};
1858 uint32_t kernel_width_{1};
1859 uint32_t dilation_height_{1};
1860 uint32_t dilation_width_{1};
1861 uint32_t subsampling_height_{1};
1862 uint32_t subsampling_width_{1};
1863 size_t next_input_height_{0};
1864 size_t next_input_width_{0};
1865 size_t next_batch_size_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001866 float sparsity_{0.0f};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001867 uint8_t qmin_{0};
1868 uint8_t qmax_{255};
1869 bool depthwise_layout_{false};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001870 bool force_nhwc_input_{false};
Marat Dukhanf568f082019-10-30 09:47:07 -07001871 bool has_bias_{true};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001872 size_t iterations_{1};
1873};