blob: 8583c94f1bd680ee0bfa8f776c1b3ec909d0ab87 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstddef>
17#include <cstdlib>
18#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070019#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <random>
21#include <vector>
22
23#include <xnnpack.h>
24
25
26class ConvolutionOperatorTester {
27 public:
Marat Dukhan8440fde2019-10-24 12:46:13 -070028 inline ConvolutionOperatorTester& padding_tf_same(bool padding_same) {
29 if (padding_same) {
30 assert(padding_top() == 0);
31 assert(padding_left() == 0);
32 assert(padding_bottom() == 0);
33 assert(padding_right() == 0);
34 }
35 this->padding_tf_same_ = padding_same;
36 return *this;
37 }
38
39 inline bool padding_tf_same() const {
40 return this->padding_tf_same_;
41 }
42
XNNPACK Teamb455b122019-09-27 18:10:33 -070043 inline ConvolutionOperatorTester& padding(uint32_t padding) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070044 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070045 this->padding_top_ = padding;
46 this->padding_right_ = padding;
47 this->padding_bottom_ = padding;
48 this->padding_left_ = padding;
49 return *this;
50 }
51
52 inline ConvolutionOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070053 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070054 this->padding_top_ = padding_height;
55 this->padding_right_ = padding_width;
56 this->padding_bottom_ = padding_height;
57 this->padding_left_ = padding_width;
58 return *this;
59 }
60
61 inline ConvolutionOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070062 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 this->padding_top_ = padding_height;
64 this->padding_bottom_ = padding_height;
65 return *this;
66 }
67
68 inline ConvolutionOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070069 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070070 this->padding_right_ = padding_width;
71 this->padding_left_ = padding_width;
72 return *this;
73 }
74
75 inline ConvolutionOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070076 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070077 this->padding_top_ = padding_top;
78 return *this;
79 }
80
81 inline uint32_t padding_top() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070082 if (padding_tf_same()) {
83 const uint32_t total_padding_height =
84 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
85 return total_padding_height / 2;
86 } else {
87 return this->padding_top_;
88 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070089 }
90
91 inline ConvolutionOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan8440fde2019-10-24 12:46:13 -070092 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 this->padding_left_ = padding_left;
94 return *this;
95 }
96
97 inline uint32_t padding_left() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -070098 if (padding_tf_same()) {
99 const uint32_t total_padding_width =
100 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
101 return total_padding_width / 2;
102 } else {
103 return this->padding_left_;
104 }
105 }
106
107 inline ConvolutionOperatorTester& padding_bottom(uint32_t padding_bottom) {
108 assert(!padding_tf_same());
109 this->padding_bottom_ = padding_bottom;
110 return *this;
111 }
112
113 inline uint32_t padding_bottom() const {
114 if (padding_tf_same()) {
115 const uint32_t total_padding_height =
116 (output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
117 return total_padding_height - total_padding_height / 2;
118 } else {
119 return this->padding_bottom_;
120 }
121 }
122
123 inline ConvolutionOperatorTester& padding_right(uint32_t padding_right) {
124 assert(!padding_tf_same());
125 this->padding_right_ = padding_right;
126 return *this;
127 }
128
129 inline uint32_t padding_right() const {
130 if (padding_tf_same()) {
131 const uint32_t total_padding_width =
132 (output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
133 return total_padding_width - total_padding_width / 2;
134 } else {
135 return this->padding_right_;
136 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700137 }
138
139 inline ConvolutionOperatorTester& input_size(uint32_t input_height, uint32_t input_width) {
140 assert(input_height >= 1);
141 assert(input_width >= 1);
142 this->input_height_ = input_height;
143 this->input_width_ = input_width;
144 return *this;
145 }
146
147 inline ConvolutionOperatorTester& input_height(uint32_t input_height) {
148 assert(input_height >= 1);
149 this->input_height_ = input_height;
150 return *this;
151 }
152
153 inline uint32_t input_height() const {
154 return this->input_height_;
155 }
156
157 inline ConvolutionOperatorTester& input_width(uint32_t input_width) {
158 assert(input_width >= 1);
159 this->input_width_ = input_width;
160 return *this;
161 }
162
163 inline uint32_t input_width() const {
164 return this->input_width_;
165 }
166
167 inline ConvolutionOperatorTester& groups(uint32_t groups) {
168 assert(groups >= 1);
169 this->groups_ = groups;
170 return *this;
171 }
172
173 inline uint32_t groups() const {
174 return this->groups_;
175 }
176
177 inline ConvolutionOperatorTester& group_input_channels(size_t group_input_channels) {
178 assert(group_input_channels >= 1);
179 this->group_input_channels_ = group_input_channels;
180 return *this;
181 }
182
183 inline size_t group_input_channels() const {
184 return this->group_input_channels_;
185 }
186
187 inline ConvolutionOperatorTester& group_output_channels(size_t group_output_channels) {
188 assert(group_output_channels >= 1);
189 this->group_output_channels_ = group_output_channels;
190 return *this;
191 }
192
193 inline size_t group_output_channels() const {
194 return this->group_output_channels_;
195 }
196
197 inline ConvolutionOperatorTester& batch_size(size_t batch_size) {
198 assert(batch_size >= 1);
199 this->batch_size_ = batch_size;
200 return *this;
201 }
202
203 inline size_t batch_size() const {
204 return this->batch_size_;
205 }
206
207 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_size) {
208 assert(kernel_size >= 1);
209 this->kernel_height_ = kernel_size;
210 this->kernel_width_ = kernel_size;
211 return *this;
212 }
213
214 inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_height, uint32_t kernel_width) {
215 assert(kernel_height >= 1);
216 assert(kernel_width >= 1);
217 this->kernel_height_ = kernel_height;
218 this->kernel_width_ = kernel_width;
219 return *this;
220 }
221
222 inline ConvolutionOperatorTester& kernel_height(uint32_t kernel_height) {
223 assert(kernel_height >= 1);
224 this->kernel_height_ = kernel_height;
225 return *this;
226 }
227
228 inline uint32_t kernel_height() const {
229 return this->kernel_height_;
230 }
231
232 inline ConvolutionOperatorTester& kernel_width(uint32_t kernel_width) {
233 assert(kernel_width >= 1);
234 this->kernel_width_ = kernel_width;
235 return *this;
236 }
237
238 inline uint32_t kernel_width() const {
239 return this->kernel_width_;
240 }
241
242 inline ConvolutionOperatorTester& dilation(uint32_t dilation) {
243 assert(dilation >= 1);
244 this->dilation_height_ = dilation;
245 this->dilation_width_ = dilation;
246 return *this;
247 }
248
249 inline ConvolutionOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
250 assert(dilation_height >= 1);
251 assert(dilation_width >= 1);
252 this->dilation_height_ = dilation_height;
253 this->dilation_width_ = dilation_width;
254 return *this;
255 }
256
257 inline ConvolutionOperatorTester& dilation_height(uint32_t dilation_height) {
258 assert(dilation_height >= 1);
259 this->dilation_height_ = dilation_height;
260 return *this;
261 }
262
263 inline uint32_t dilation_height() const {
264 return this->dilation_height_;
265 }
266
267 inline ConvolutionOperatorTester& dilation_width(uint32_t dilation_width) {
268 assert(dilation_width >= 1);
269 this->dilation_width_ = dilation_width;
270 return *this;
271 }
272
273 inline uint32_t dilation_width() const {
274 return this->dilation_width_;
275 }
276
277 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling) {
278 assert(subsampling >= 1);
279 this->subsampling_height_ = subsampling;
280 this->subsampling_width_ = subsampling;
281 return *this;
282 }
283
284 inline ConvolutionOperatorTester& subsampling(uint32_t subsampling_height, uint32_t subsampling_width) {
285 assert(subsampling_height >= 1);
286 assert(subsampling_width >= 1);
287 this->subsampling_height_ = subsampling_height;
288 this->subsampling_width_ = subsampling_width;
289 return *this;
290 }
291
292 inline ConvolutionOperatorTester& subsampling_height(uint32_t subsampling_height) {
293 assert(subsampling_height >= 1);
294 this->subsampling_height_ = subsampling_height;
295 return *this;
296 }
297
298 inline uint32_t subsampling_height() const {
299 return this->subsampling_height_;
300 }
301
302 inline ConvolutionOperatorTester& subsampling_width(uint32_t subsampling_width) {
303 assert(subsampling_width >= 1);
304 this->subsampling_width_ = subsampling_width;
305 return *this;
306 }
307
308 inline uint32_t subsampling_width() const {
309 return this->subsampling_width_;
310 }
311
312 inline ConvolutionOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
313 assert(input_pixel_stride >= 1);
314 this->input_pixel_stride_ = input_pixel_stride;
315 return *this;
316 }
317
318 inline size_t input_pixel_stride() const {
319 if (this->input_pixel_stride_ == 0) {
320 return group_input_channels() * groups();
321 } else {
322 assert(this->input_pixel_stride_ >= group_input_channels() * groups());
323 return this->input_pixel_stride_;
324 }
325 }
326
327 inline ConvolutionOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
328 assert(output_pixel_stride >= 1);
329 this->output_pixel_stride_ = output_pixel_stride;
330 return *this;
331 }
332
333 inline size_t output_pixel_stride() const {
334 if (this->output_pixel_stride_ == 0) {
335 return group_output_channels() * groups();
336 } else {
337 assert(this->output_pixel_stride_ >= group_output_channels() * groups());
338 return this->output_pixel_stride_;
339 }
340 }
341
Marat Dukhanefc47b82019-11-18 09:25:38 -0800342 inline ConvolutionOperatorTester& input_batch_stride(size_t input_batch_stride) {
343 assert(input_batch_stride >= 1);
344 this->input_batch_stride_ = input_batch_stride;
345 return *this;
346 }
347
348 inline size_t input_batch_stride() const {
349 if (this->input_batch_stride_ == 0) {
350 return groups() * group_input_channels() * input_height() * input_width();
351 } else {
352 assert(this->input_batch_stride_ >= groups() * group_input_channels() * input_height() * input_width());
353 return this->input_batch_stride_;
354 }
355 }
356
357 inline ConvolutionOperatorTester& output_batch_stride(size_t output_batch_stride) {
358 assert(output_batch_stride >= 1);
359 this->output_batch_stride_ = output_batch_stride;
360 return *this;
361 }
362
363 inline size_t output_batch_stride() const {
364 if (this->output_batch_stride_ == 0) {
365 return groups() * group_output_channels() * output_height() * output_width();
366 } else {
367 assert(this->output_batch_stride_ >= groups() * group_output_channels() * output_height() * output_width());
368 return this->output_batch_stride_;
369 }
370 }
371
XNNPACK Teamb455b122019-09-27 18:10:33 -0700372 inline uint32_t dilated_kernel_height() const {
373 return (kernel_height() - 1) * dilation_height() + 1;
374 }
375
376 inline uint32_t dilated_kernel_width() const {
377 return (kernel_width() - 1) * dilation_width() + 1;
378 }
379
380 inline size_t output_height() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700381 if (padding_tf_same()) {
382 return (input_height() + subsampling_height() - 1) / subsampling_height();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700383 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700384 const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
385 if (padded_input_height <= dilated_kernel_height()) {
386 return 1;
387 } else {
388 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
389 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700390 }
391 }
392
393 inline size_t output_width() const {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700394 if (padding_tf_same()) {
395 return (input_width() + subsampling_width() - 1) / subsampling_width();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700396 } else {
Marat Dukhan8440fde2019-10-24 12:46:13 -0700397 const size_t padded_input_width = padding_left() + input_width() + padding_right();
398 if (padded_input_width <= dilated_kernel_width()) {
399 return 1;
400 } else {
401 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
402 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700403 }
404 }
405
406 inline ConvolutionOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
407 assert(next_input_height >= 1);
408 assert(next_input_width >= 1);
409 this->next_input_height_ = next_input_height;
410 this->next_input_width_ = next_input_width;
411 return *this;
412 }
413
414 inline ConvolutionOperatorTester& next_input_height(uint32_t next_input_height) {
415 assert(next_input_height >= 1);
416 this->next_input_height_ = next_input_height;
417 return *this;
418 }
419
420 inline uint32_t next_input_height() const {
421 if (this->next_input_height_ == 0) {
422 return input_height();
423 } else {
424 return this->next_input_height_;
425 }
426 }
427
428 inline ConvolutionOperatorTester& next_input_width(uint32_t next_input_width) {
429 assert(next_input_width >= 1);
430 this->next_input_width_ = next_input_width;
431 return *this;
432 }
433
434 inline uint32_t next_input_width() const {
435 if (this->next_input_width_ == 0) {
436 return input_width();
437 } else {
438 return this->next_input_width_;
439 }
440 }
441
442 inline size_t next_output_height() const {
443 const size_t padded_input_height = padding_top() + next_input_height() + padding_bottom();
444 if (padded_input_height <= dilated_kernel_height()) {
445 return 1;
446 } else {
447 return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
448 }
449 }
450
451 inline size_t next_output_width() const {
452 const size_t padded_input_width = padding_left() + next_input_width() + padding_right();
453 if (padded_input_width <= dilated_kernel_width()) {
454 return 1;
455 } else {
456 return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
457 }
458 }
459
460 inline ConvolutionOperatorTester& next_batch_size(size_t next_batch_size) {
461 assert(next_batch_size >= 1);
462 this->next_batch_size_ = next_batch_size;
463 return *this;
464 }
465
466 inline size_t next_batch_size() const {
467 if (this->next_batch_size_ == 0) {
468 return batch_size();
469 } else {
470 return this->next_batch_size_;
471 }
472 }
473
Marat Dukhanefc47b82019-11-18 09:25:38 -0800474 inline ConvolutionOperatorTester& sparsity(float sparsity) {
475 this->sparsity_ = sparsity;
476 return *this;
477 }
478
479 inline float sparsity() const {
480 return this->sparsity_;
481 }
482
XNNPACK Teamb455b122019-09-27 18:10:33 -0700483 inline ConvolutionOperatorTester& qmin(uint8_t qmin) {
484 this->qmin_ = qmin;
485 return *this;
486 }
487
488 inline uint8_t qmin() const {
489 return this->qmin_;
490 }
491
492 inline ConvolutionOperatorTester& qmax(uint8_t qmax) {
493 this->qmax_ = qmax;
494 return *this;
495 }
496
497 inline uint8_t qmax() const {
498 return this->qmax_;
499 }
500
Marat Dukhanefc47b82019-11-18 09:25:38 -0800501 inline ConvolutionOperatorTester& force_nhwc_input(bool force_nhwc_input) {
502 this->force_nhwc_input_ = force_nhwc_input;
503 return *this;
504 }
505
506 inline bool force_nhwc_input() const {
507 return this->force_nhwc_input_;
508 }
509
XNNPACK Teamb455b122019-09-27 18:10:33 -0700510 inline ConvolutionOperatorTester& depthwise_layout(bool depthwise_layout) {
511 this->depthwise_layout_ = depthwise_layout;
512 return *this;
513 }
514
515 inline bool depthwise_layout() const {
516 return this->depthwise_layout_;
517 }
518
Marat Dukhanf568f082019-10-30 09:47:07 -0700519 inline ConvolutionOperatorTester& has_bias(bool has_bias) {
520 this->has_bias_ = has_bias;
521 return *this;
522 }
523
524 inline bool has_bias() const {
525 return this->has_bias_;
526 }
527
XNNPACK Teamb455b122019-09-27 18:10:33 -0700528 inline ConvolutionOperatorTester& iterations(size_t iterations) {
529 this->iterations_ = iterations;
530 return *this;
531 }
532
533 inline size_t iterations() const {
534 return this->iterations_;
535 }
536
Marat Dukhanefc47b82019-11-18 09:25:38 -0800537 void TestNHWCxQ8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700538 std::random_device random_device;
539 auto rng = std::mt19937(random_device());
540 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700541 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700542
543 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
544 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()) + 8);
545 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
546 std::vector<int32_t> bias(groups() * group_output_channels());
547 std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
548 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
549 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
550
551 const uint8_t input_zero_point = 127;
552 const uint8_t kernel_zero_point = 127;
553
554 for (size_t iteration = 0; iteration < iterations(); iteration++) {
555 std::generate(input.begin(), input.end(), std::ref(u8rng));
556 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
557 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
558 std::fill(output.begin(), output.end(), 0xA5);
559
560 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -0700561 if (has_bias()) {
562 for (size_t i = 0; i < batch_size(); i++) {
563 for (size_t oy = 0; oy < output_height(); oy++) {
564 for (size_t ox = 0; ox < output_width(); ox++) {
565 for (size_t g = 0; g < groups(); g++) {
566 for (size_t oc = 0; oc < group_output_channels(); oc++) {
567 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
568 bias[g * group_output_channels() + oc];
569 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570 }
571 }
572 }
573 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700574 } else {
575 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700576 }
577 if (depthwise_layout()) {
578 ASSERT_EQ(group_input_channels(), 1);
579
580 for (size_t i = 0; i < batch_size(); i++) {
581 for (size_t oy = 0; oy < output_height(); oy++) {
582 for (size_t ox = 0; ox < output_width(); ox++) {
583 for (size_t ky = 0; ky < kernel_height(); ky++) {
584 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
585 if (iy < input_height()) {
586 for (size_t kx = 0; kx < kernel_width(); kx++) {
587 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
588 if (ix < input_width()) {
589 for (size_t g = 0; g < groups(); g++) {
590 for (size_t oc = 0; oc < group_output_channels(); oc++) {
591 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
592 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g]) - int32_t(input_zero_point)) *
593 (int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));
594 }
595 }
596 }
597 }
598 }
599 }
600 }
601 }
602 }
603 } else {
604 for (size_t i = 0; i < batch_size(); i++) {
605 for (size_t oy = 0; oy < output_height(); oy++) {
606 for (size_t ox = 0; ox < output_width(); ox++) {
607 for (size_t ky = 0; ky < kernel_height(); ky++) {
608 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
609 if (iy < input_height()) {
610 for (size_t kx = 0; kx < kernel_width(); kx++) {
611 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
612 if (ix < input_width()) {
613 for (size_t g = 0; g < groups(); g++) {
614 for (size_t oc = 0; oc < group_output_channels(); oc++) {
615 for (size_t ic = 0; ic < group_input_channels(); ic++) {
616 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
617 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
618 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
619 }
620 }
621 }
622 }
623 }
624 }
625 }
626 }
627 }
628 }
629 }
630
631 // Compute renormalization parameters.
632 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
633 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
634
635 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
636 const uint8_t output_zero_point = uint8_t(std::max(std::min(
637 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
638 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
639
640 // Renormalize reference results.
641 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
642 [this, output_scale, output_zero_point](int32_t x) -> double {
643 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
644 });
645
646 // Create, setup, run, and destroy Convolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800647 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700648 xnn_operator_t convolution_op = nullptr;
649
650 ASSERT_EQ(xnn_status_success,
651 xnn_create_convolution2d_nhwc_q8(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700652 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
653 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700654 kernel_height(), kernel_width(),
655 subsampling_height(), subsampling_width(),
656 dilation_height(), dilation_width(),
657 groups(), group_input_channels(), group_output_channels(),
658 input_pixel_stride(), output_pixel_stride(),
659 input_zero_point, 1.0f /* input scale */,
660 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -0700661 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700662 output_zero_point, output_scale, qmin(), qmax(),
Marat Dukhan8440fde2019-10-24 12:46:13 -0700663 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700664 &convolution_op));
665
666 // Smart pointer to automatically delete convolution_op.
667 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
668
669 ASSERT_EQ(xnn_status_success,
670 xnn_setup_convolution2d_nhwc_q8(
671 convolution_op,
672 batch_size(), input_height(), input_width(),
673 input.data(), output.data(),
674 nullptr /* thread pool */));
675
676 ASSERT_EQ(xnn_status_success,
677 xnn_run_operator(convolution_op, nullptr /* thread pool */));
678
679 // Verify results.
680 for (size_t i = 0; i < batch_size(); i++) {
681 for (size_t y = 0; y < output_height(); y++) {
682 for (size_t x = 0; x < output_width(); x++) {
683 for (size_t g = 0; g < groups(); g++) {
684 for (size_t c = 0; c < group_output_channels(); c++) {
685 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
686 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
687 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
688 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
689 ASSERT_NEAR(
690 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
691 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
692 0.9)
693 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701
Marat Dukhanefc47b82019-11-18 09:25:38 -0800702 void TestNHWCxF32() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700703 std::random_device random_device;
704 auto rng = std::mt19937(random_device());
705 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
706
707 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
708 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));
709 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
710 std::vector<float> bias(groups() * group_output_channels());
711 std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
712 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
713
714 for (size_t iteration = 0; iteration < iterations(); iteration++) {
715 std::generate(input.begin(), input.end(), std::ref(f32rng));
716 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
717 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
718 std::fill(output.begin(), output.end(), nanf(""));
719
720 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -0700721 if (has_bias()) {
722 for (size_t i = 0; i < batch_size(); i++) {
723 for (size_t oy = 0; oy < output_height(); oy++) {
724 for (size_t ox = 0; ox < output_width(); ox++) {
725 for (size_t g = 0; g < groups(); g++) {
726 for (size_t oc = 0; oc < group_output_channels(); oc++) {
727 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
728 bias[g * group_output_channels() + oc];
729 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700730 }
731 }
732 }
733 }
Marat Dukhanf568f082019-10-30 09:47:07 -0700734 } else {
735 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700736 }
737 if (depthwise_layout()) {
738 ASSERT_EQ(group_input_channels(), 1);
739
740 for (size_t i = 0; i < batch_size(); i++) {
741 for (size_t oy = 0; oy < output_height(); oy++) {
742 for (size_t ox = 0; ox < output_width(); ox++) {
743 for (size_t ky = 0; ky < kernel_height(); ky++) {
744 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
745 if (iy < input_height()) {
746 for (size_t kx = 0; kx < kernel_width(); kx++) {
747 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
748 if (ix < input_width()) {
749 for (size_t g = 0; g < groups(); g++) {
750 for (size_t oc = 0; oc < group_output_channels(); oc++) {
751 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
752 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g] *
753 kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];
754 }
755 }
756 }
757 }
758 }
759 }
760 }
761 }
762 }
763 } else {
764 for (size_t i = 0; i < batch_size(); i++) {
765 for (size_t oy = 0; oy < output_height(); oy++) {
766 for (size_t ox = 0; ox < output_width(); ox++) {
767 for (size_t ky = 0; ky < kernel_height(); ky++) {
768 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
769 if (iy < input_height()) {
770 for (size_t kx = 0; kx < kernel_width(); kx++) {
771 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
772 if (ix < input_width()) {
773 for (size_t g = 0; g < groups(); g++) {
774 for (size_t oc = 0; oc < group_output_channels(); oc++) {
775 for (size_t ic = 0; ic < group_input_channels(); ic++) {
776 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
777 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
778 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
779 }
780 }
781 }
782 }
783 }
784 }
785 }
786 }
787 }
788 }
789 }
790
791 // Compute clamping parameters.
792 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
793 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
794
795 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
796 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
797
798 // Clamp reference results.
799 for (float& value : output_ref) {
800 value = std::max(std::min(value, output_max), output_min);
801 }
802
803 // Create, setup, run, and destroy Convolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800804 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700805 xnn_operator_t convolution_op = nullptr;
806
807 ASSERT_EQ(xnn_status_success,
808 xnn_create_convolution2d_nhwc_f32(
Marat Dukhan8440fde2019-10-24 12:46:13 -0700809 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
810 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700811 kernel_height(), kernel_width(),
812 subsampling_height(), subsampling_width(),
813 dilation_height(), dilation_width(),
814 groups(), group_input_channels(), group_output_channels(),
815 input_pixel_stride(), output_pixel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -0700816 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700817 output_min, output_max,
Marat Dukhan8440fde2019-10-24 12:46:13 -0700818 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700819 &convolution_op));
820
821 // Smart pointer to automatically delete convolution_op.
822 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
823
824 ASSERT_EQ(xnn_status_success,
825 xnn_setup_convolution2d_nhwc_f32(
826 convolution_op,
827 batch_size(), input_height(), input_width(),
828 input.data(), output.data(),
829 nullptr /* thread pool */));
830
831 ASSERT_EQ(xnn_status_success,
832 xnn_run_operator(convolution_op, nullptr /* thread pool */));
833
834 // Verify results.
835 for (size_t i = 0; i < batch_size(); i++) {
836 for (size_t y = 0; y < output_height(); y++) {
837 for (size_t x = 0; x < output_width(); x++) {
838 for (size_t g = 0; g < groups(); g++) {
839 for (size_t c = 0; c < group_output_channels(); c++) {
840 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
841 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
842 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
843 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
844 ASSERT_NEAR(
845 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
846 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
847 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
848 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
849 }
850 }
851 }
852 }
853 }
854 }
855 }
856
Marat Dukhanefc47b82019-11-18 09:25:38 -0800857 void TestNCHWxF32() const {
858 ASSERT_FALSE(depthwise_layout());
859
860 std::random_device random_device;
861 auto rng = std::mt19937(random_device());
862 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
863 auto prng = std::bind(std::uniform_real_distribution<float>(), rng);
864
865 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
866 batch_size() * input_batch_stride() + groups() * group_input_channels() * input_height() * input_width());
867 std::vector<float> kernel(
868 groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
869 std::vector<float> bias(groups() * group_output_channels());
870 std::vector<float> output(
871 batch_size() * output_batch_stride() + groups() * group_output_channels() * output_height() * output_width());
872 std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());
873
874 for (size_t iteration = 0; iteration < iterations(); iteration++) {
875 std::generate(input.begin(), input.end(), std::ref(f32rng));
876 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
877 for (float& k : kernel) {
878 if (prng() <= sparsity()) {
879 k = 0.0f;
880 }
881 }
882 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
883 std::fill(output.begin(), output.end(), nanf(""));
884
885 // Compute reference results, without clamping.
886 if (has_bias()) {
887 for (size_t i = 0; i < batch_size(); i++) {
888 for (size_t oy = 0; oy < output_height(); oy++) {
889 for (size_t ox = 0; ox < output_width(); ox++) {
890 for (size_t g = 0; g < groups(); g++) {
891 for (size_t oc = 0; oc < group_output_channels(); oc++) {
892 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =
893 bias[g * group_output_channels() + oc];
894 }
895 }
896 }
897 }
898 }
899 } else {
900 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
901 }
902 if (force_nhwc_input()) {
903 for (size_t i = 0; i < batch_size(); i++) {
904 for (size_t oy = 0; oy < output_height(); oy++) {
905 for (size_t ox = 0; ox < output_width(); ox++) {
906 for (size_t ky = 0; ky < kernel_height(); ky++) {
907 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
908 if (iy < input_height()) {
909 for (size_t kx = 0; kx < kernel_width(); kx++) {
910 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
911 if (ix < input_width()) {
912 for (size_t g = 0; g < groups(); g++) {
913 for (size_t oc = 0; oc < group_output_channels(); oc++) {
914 for (size_t ic = 0; ic < group_input_channels(); ic++) {
915 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
916 input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *
917 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
918 }
919 }
920 }
921 }
922 }
923 }
924 }
925 }
926 }
927 }
928 } else {
929 for (size_t i = 0; i < batch_size(); i++) {
930 for (size_t oy = 0; oy < output_height(); oy++) {
931 for (size_t ox = 0; ox < output_width(); ox++) {
932 for (size_t ky = 0; ky < kernel_height(); ky++) {
933 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
934 if (iy < input_height()) {
935 for (size_t kx = 0; kx < kernel_width(); kx++) {
936 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
937 if (ix < input_width()) {
938 for (size_t g = 0; g < groups(); g++) {
939 for (size_t oc = 0; oc < group_output_channels(); oc++) {
940 for (size_t ic = 0; ic < group_input_channels(); ic++) {
941 output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=
942 input[i * input_batch_stride() +
943 ((g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *
944 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
945 }
946 }
947 }
948 }
949 }
950 }
951 }
952 }
953 }
954 }
955 }
956
957 // Compute clamping parameters.
958 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
959 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
960
Marat Dukhan869c62d2020-04-09 17:17:55 -0700961 const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :
962 accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
963 const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :
964 accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
Marat Dukhanefc47b82019-11-18 09:25:38 -0800965
966 // Clamp reference results.
967 for (float& value : output_ref) {
968 value = std::max(std::min(value, output_max), output_min);
969 }
970
971 // Create, setup, run, and destroy Convolution operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800972 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
Marat Dukhanefc47b82019-11-18 09:25:38 -0800973 xnn_operator_t convolution_op = nullptr;
974
975 xnn_status status = xnn_create_convolution2d_nchw_f32(
976 padding_top(), padding_right(), padding_bottom(), padding_left(),
977 kernel_height(), kernel_width(),
978 subsampling_height(), subsampling_width(),
979 dilation_height(), dilation_width(),
980 groups(), group_input_channels(), group_output_channels(),
981 kernel.data(), has_bias() ? bias.data() : nullptr,
982 output_min, output_max,
983 (depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),
984 &convolution_op);
985 if (status == xnn_status_unsupported_parameter) {
986 GTEST_SKIP();
987 }
988 ASSERT_EQ(xnn_status_success, status);
989
990 // Smart pointer to automatically delete convolution_op.
991 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
992
993 ASSERT_EQ(xnn_status_success,
994 xnn_setup_convolution2d_nchw_f32(
995 convolution_op,
996 batch_size(), input_batch_stride(), output_batch_stride(), input_height(), input_width(),
997 input.data(), output.data(),
998 nullptr /* thread pool */));
999
1000 ASSERT_EQ(xnn_status_success,
1001 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1002
1003 // Verify results.
1004 for (size_t i = 0; i < batch_size(); i++) {
1005 for (size_t y = 0; y < output_height(); y++) {
1006 for (size_t x = 0; x < output_width(); x++) {
1007 for (size_t g = 0; g < groups(); g++) {
1008 for (size_t c = 0; c < group_output_channels(); c++) {
1009 ASSERT_GE(output[i * output_batch_stride() + ((g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)
1010 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1011 ASSERT_LE(output[i * output_batch_stride() + ((g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)
1012 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1013 ASSERT_NEAR(
1014 output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],
1015 output[i * output_batch_stride() + ((g * group_output_channels() + c) * output_height() + y) * output_width() + x],
1016 1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))
1017 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;
1018 }
1019 }
1020 }
1021 }
1022 }
1023 }
1024 }
1025
1026 void TestSetupNHWCxQ8() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001027 ASSERT_FALSE(depthwise_layout());
1028
1029 std::random_device random_device;
1030 auto rng = std::mt19937(random_device());
1031 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
Marat Dukhan5ce30d92020-04-14 03:31:26 -07001032 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001033
1034 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
1035 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
1036 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())) + 8);
1037 std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1038 std::vector<int32_t> bias(groups() * group_output_channels());
1039 std::vector<uint8_t> output(std::max(
1040 batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
1041 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
1042 std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1043 std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1044 std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1045 std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1046
1047 const uint8_t input_zero_point = 127;
1048 const uint8_t kernel_zero_point = 127;
1049
1050 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1051 std::generate(input.begin(), input.end(), std::ref(u8rng));
1052 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
1053 std::generate(bias.begin(), bias.end(), std::ref(s32rng));
1054 std::fill(output.begin(), output.end(), 0xA5);
1055
1056 // Compute reference results, without renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -07001057 if (has_bias()) {
1058 for (size_t i = 0; i < batch_size(); i++) {
1059 for (size_t oy = 0; oy < output_height(); oy++) {
1060 for (size_t ox = 0; ox < output_width(); ox++) {
1061 for (size_t g = 0; g < groups(); g++) {
1062 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1063 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1064 bias[g * group_output_channels() + oc];
1065 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001066 }
1067 }
1068 }
1069 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001070 } else {
1071 std::fill(accumulators.begin(), accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001072 }
1073 for (size_t i = 0; i < batch_size(); i++) {
1074 for (size_t oy = 0; oy < output_height(); oy++) {
1075 for (size_t ox = 0; ox < output_width(); ox++) {
1076 for (size_t ky = 0; ky < kernel_height(); ky++) {
1077 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1078 if (iy < input_height()) {
1079 for (size_t kx = 0; kx < kernel_width(); kx++) {
1080 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1081 if (ix < input_width()) {
1082 for (size_t g = 0; g < groups(); g++) {
1083 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1084 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1085 accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1086 (int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
1087 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
1088 }
1089 }
1090 }
1091 }
1092 }
1093 }
1094 }
1095 }
1096 }
1097 }
1098
1099 // Compute renormalization parameters.
1100 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
1101 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
1102
1103 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
1104 const uint8_t output_zero_point = uint8_t(std::max(std::min(
1105 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
1106 long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
1107
1108 // Renormalize reference results.
1109 std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
1110 [this, output_scale, output_zero_point](int32_t x) -> double {
1111 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
1112 });
1113
1114 // Create, setup, and run Convolution operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001115 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001116 xnn_operator_t convolution_op = nullptr;
1117
1118 ASSERT_EQ(xnn_status_success,
1119 xnn_create_convolution2d_nhwc_q8(
1120 padding_top(), padding_right(), padding_bottom(), padding_left(),
1121 kernel_height(), kernel_width(),
1122 subsampling_height(), subsampling_width(),
1123 dilation_height(), dilation_width(),
1124 groups(), group_input_channels(), group_output_channels(),
1125 input_pixel_stride(), output_pixel_stride(),
1126 input_zero_point, 1.0f /* input scale */,
1127 kernel_zero_point, 1.0f /* kernel scale */,
Marat Dukhanf568f082019-10-30 09:47:07 -07001128 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001129 output_zero_point, output_scale, qmin(), qmax(),
1130 0, &convolution_op));
1131
1132 // Smart pointer to automatically delete convolution_op.
1133 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1134
1135 ASSERT_EQ(xnn_status_success,
1136 xnn_setup_convolution2d_nhwc_q8(
1137 convolution_op,
1138 batch_size(), input_height(), input_width(),
1139 input.data(), output.data(),
1140 nullptr /* thread pool */));
1141
1142 ASSERT_EQ(xnn_status_success,
1143 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1144
1145 // Verify results of the first run.
1146 for (size_t i = 0; i < batch_size(); i++) {
1147 for (size_t y = 0; y < output_height(); y++) {
1148 for (size_t x = 0; x < output_width(); x++) {
1149 for (size_t g = 0; g < groups(); g++) {
1150 for (size_t c = 0; c < group_output_channels(); c++) {
1151 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
1152 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1153 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
1154 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1155 ASSERT_NEAR(
1156 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1157 double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
1158 0.9)
1159 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1160 }
1161 }
1162 }
1163 }
1164 }
1165
1166 // Re-generate data for the second run.
1167 std::generate(input.begin(), input.end(), std::ref(u8rng));
1168 std::fill(output.begin(), output.end(), 0xA5);
1169
1170 // Compute reference results for the second run, including renormalization.
Marat Dukhanf568f082019-10-30 09:47:07 -07001171 if (has_bias()) {
1172 for (size_t i = 0; i < next_batch_size(); i++) {
1173 for (size_t oy = 0; oy < next_output_height(); oy++) {
1174 for (size_t ox = 0; ox < next_output_width(); ox++) {
1175 for (size_t g = 0; g < groups(); g++) {
1176 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1177 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1178 bias[g * group_output_channels() + oc];
1179 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001180 }
1181 }
1182 }
1183 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001184 } else {
1185 std::fill(next_accumulators.begin(), next_accumulators.end(), 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001186 }
1187 for (size_t i = 0; i < next_batch_size(); i++) {
1188 for (size_t oy = 0; oy < next_output_height(); oy++) {
1189 for (size_t ox = 0; ox < next_output_width(); ox++) {
1190 for (size_t ky = 0; ky < kernel_height(); ky++) {
1191 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1192 if (iy < next_input_height()) {
1193 for (size_t kx = 0; kx < kernel_width(); kx++) {
1194 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1195 if (ix < next_input_width()) {
1196 for (size_t g = 0; g < groups(); g++) {
1197 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1198 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1199 next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1200 (int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
1201 (int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
1202 }
1203 }
1204 }
1205 }
1206 }
1207 }
1208 }
1209 }
1210 }
1211 }
1212 std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),
1213 [this, output_scale, output_zero_point](int32_t x) -> double {
1214 return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
1215 });
1216
1217 // Setup and run Convolution operator the second time, and destroy the operator.
1218 ASSERT_EQ(xnn_status_success,
1219 xnn_setup_convolution2d_nhwc_q8(
1220 convolution_op,
1221 next_batch_size(), next_input_height(), next_input_width(),
1222 input.data(), output.data(),
1223 nullptr /* thread pool */));
1224
1225 ASSERT_EQ(xnn_status_success,
1226 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1227
1228 // Verify results of the second run.
1229 for (size_t i = 0; i < next_batch_size(); i++) {
1230 for (size_t y = 0; y < next_output_height(); y++) {
1231 for (size_t x = 0; x < next_output_width(); x++) {
1232 for (size_t g = 0; g < groups(); g++) {
1233 for (size_t c = 0; c < group_output_channels(); c++) {
1234 ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
1235 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1236 ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
1237 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1238 ASSERT_NEAR(
1239 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1240 double(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
1241 0.9)
1242 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1243 }
1244 }
1245 }
1246 }
1247 }
1248 }
1249 }
1250
Marat Dukhanefc47b82019-11-18 09:25:38 -08001251 void TestSetupNHWCxF32() const {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001252 ASSERT_FALSE(depthwise_layout());
1253
1254 std::random_device random_device;
1255 auto rng = std::mt19937(random_device());
1256 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
1257
1258 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
1259 batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
1260 next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())));
1261 std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
1262 std::vector<float> bias(groups() * group_output_channels());
1263 std::vector<float> output(std::max(
1264 batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
1265 next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
1266 std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
1267 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
1268
1269 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1270 std::generate(input.begin(), input.end(), std::ref(f32rng));
1271 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
1272 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
1273 std::fill(output.begin(), output.end(), nanf(""));
1274
1275 // Compute reference results, without clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001276 if (has_bias()) {
1277 for (size_t i = 0; i < batch_size(); i++) {
1278 for (size_t oy = 0; oy < output_height(); oy++) {
1279 for (size_t ox = 0; ox < output_width(); ox++) {
1280 for (size_t g = 0; g < groups(); g++) {
1281 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1282 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1283 bias[g * group_output_channels() + oc];
1284 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001285 }
1286 }
1287 }
1288 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001289 } else {
1290 std::fill(output_ref.begin(), output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001291 }
1292 for (size_t i = 0; i < batch_size(); i++) {
1293 for (size_t oy = 0; oy < output_height(); oy++) {
1294 for (size_t ox = 0; ox < output_width(); ox++) {
1295 for (size_t ky = 0; ky < kernel_height(); ky++) {
1296 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1297 if (iy < input_height()) {
1298 for (size_t kx = 0; kx < kernel_width(); kx++) {
1299 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1300 if (ix < input_width()) {
1301 for (size_t g = 0; g < groups(); g++) {
1302 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1303 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1304 output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1305 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1306 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1307 }
1308 }
1309 }
1310 }
1311 }
1312 }
1313 }
1314 }
1315 }
1316 }
1317
1318 // Compute clamping parameters.
1319 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1320 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1321
1322 const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
1323 const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
1324
1325 // Clamp reference results.
1326 for (float& value : output_ref) {
1327 value = std::max(std::min(value, output_max), output_min);
1328 }
1329
1330 // Create, setup, and run Convolution operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001331 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001332 xnn_operator_t convolution_op = nullptr;
1333
1334 ASSERT_EQ(xnn_status_success,
1335 xnn_create_convolution2d_nhwc_f32(
1336 padding_top(), padding_right(), padding_bottom(), padding_left(),
1337 kernel_height(), kernel_width(),
1338 subsampling_height(), subsampling_width(),
1339 dilation_height(), dilation_width(),
1340 groups(), group_input_channels(), group_output_channels(),
1341 input_pixel_stride(), output_pixel_stride(),
Marat Dukhanf568f082019-10-30 09:47:07 -07001342 kernel.data(), has_bias() ? bias.data() : nullptr,
XNNPACK Teamb455b122019-09-27 18:10:33 -07001343 output_min, output_max,
1344 0, &convolution_op));
1345
1346 // Smart pointer to automatically delete convolution_op.
1347 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
1348
1349 ASSERT_EQ(xnn_status_success,
1350 xnn_setup_convolution2d_nhwc_f32(
1351 convolution_op,
1352 batch_size(), input_height(), input_width(),
1353 input.data(), output.data(),
1354 nullptr /* thread pool */));
1355
1356 ASSERT_EQ(xnn_status_success,
1357 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1358
1359 // Verify results of the first run.
1360 for (size_t i = 0; i < batch_size(); i++) {
1361 for (size_t y = 0; y < output_height(); y++) {
1362 for (size_t x = 0; x < output_width(); x++) {
1363 for (size_t g = 0; g < groups(); g++) {
1364 for (size_t c = 0; c < group_output_channels(); c++) {
1365 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1366 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1367 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1368 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1369 ASSERT_NEAR(
1370 output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
1371 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1372 1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
1373 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1374 }
1375 }
1376 }
1377 }
1378 }
1379
1380 // Re-generate data for the second run.
1381 std::generate(input.begin(), input.end(), std::ref(f32rng));
1382 std::fill(output.begin(), output.end(), nanf(""));
1383
1384 // Compute reference results for the second run, including clamping.
Marat Dukhanf568f082019-10-30 09:47:07 -07001385 if (has_bias()) {
1386 for (size_t i = 0; i < next_batch_size(); i++) {
1387 for (size_t oy = 0; oy < next_output_height(); oy++) {
1388 for (size_t ox = 0; ox < next_output_width(); ox++) {
1389 for (size_t g = 0; g < groups(); g++) {
1390 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1391 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
1392 bias[g * group_output_channels() + oc];
1393 }
XNNPACK Teamb455b122019-09-27 18:10:33 -07001394 }
1395 }
1396 }
1397 }
Marat Dukhanf568f082019-10-30 09:47:07 -07001398 } else {
1399 std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001400 }
1401 for (size_t i = 0; i < next_batch_size(); i++) {
1402 for (size_t oy = 0; oy < next_output_height(); oy++) {
1403 for (size_t ox = 0; ox < next_output_width(); ox++) {
1404 for (size_t ky = 0; ky < kernel_height(); ky++) {
1405 const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
1406 if (iy < next_input_height()) {
1407 for (size_t kx = 0; kx < kernel_width(); kx++) {
1408 const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
1409 if (ix < next_input_width()) {
1410 for (size_t g = 0; g < groups(); g++) {
1411 for (size_t oc = 0; oc < group_output_channels(); oc++) {
1412 for (size_t ic = 0; ic < group_input_channels(); ic++) {
1413 next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
1414 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
1415 kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
1416 }
1417 }
1418 }
1419 }
1420 }
1421 }
1422 }
1423 }
1424 }
1425 }
1426 for (float& value : next_output_ref) {
1427 value = std::max(std::min(value, output_max), output_min);
1428 }
1429
1430 // Setup and run Convolution operator the second time, and destroy the operator.
1431 ASSERT_EQ(xnn_status_success,
1432 xnn_setup_convolution2d_nhwc_f32(
1433 convolution_op,
1434 next_batch_size(), next_input_height(), next_input_width(),
1435 input.data(), output.data(),
1436 nullptr /* thread pool */));
1437
1438 ASSERT_EQ(xnn_status_success,
1439 xnn_run_operator(convolution_op, nullptr /* thread pool */));
1440
1441 // Verify results of the second run.
1442 for (size_t i = 0; i < next_batch_size(); i++) {
1443 for (size_t y = 0; y < next_output_height(); y++) {
1444 for (size_t x = 0; x < next_output_width(); x++) {
1445 for (size_t g = 0; g < groups(); g++) {
1446 for (size_t c = 0; c < group_output_channels(); c++) {
1447 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
1448 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1449 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
1450 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1451 ASSERT_NEAR(
1452 next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
1453 output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
1454 1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
1455 << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
1456 }
1457 }
1458 }
1459 }
1460 }
1461 }
1462 }
1463
1464 private:
1465 uint32_t padding_top_{0};
1466 uint32_t padding_right_{0};
1467 uint32_t padding_bottom_{0};
1468 uint32_t padding_left_{0};
Marat Dukhan8440fde2019-10-24 12:46:13 -07001469 bool padding_tf_same_{false};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001470 size_t input_height_{1};
1471 size_t input_width_{1};
1472 uint32_t groups_{1};
1473 size_t group_input_channels_{1};
1474 size_t input_pixel_stride_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001475 size_t input_batch_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001476 size_t group_output_channels_{1};
1477 size_t output_pixel_stride_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001478 size_t output_batch_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001479 size_t batch_size_{1};
1480 uint32_t kernel_height_{1};
1481 uint32_t kernel_width_{1};
1482 uint32_t dilation_height_{1};
1483 uint32_t dilation_width_{1};
1484 uint32_t subsampling_height_{1};
1485 uint32_t subsampling_width_{1};
1486 size_t next_input_height_{0};
1487 size_t next_input_width_{0};
1488 size_t next_batch_size_{0};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001489 float sparsity_{0.0f};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001490 uint8_t qmin_{0};
1491 uint8_t qmax_{255};
1492 bool depthwise_layout_{false};
Marat Dukhanefc47b82019-11-18 09:25:38 -08001493 bool force_nhwc_input_{false};
Marat Dukhanf568f082019-10-30 09:47:07 -07001494 bool has_bias_{true};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001495 size_t iterations_{1};
1496};