blob: 106150603294ca459b25de2d6859aa8359f685e8 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
Marat Dukhan5756a922022-02-04 01:55:53 -080013#include <fp16.h>
14
XNNPACK Teamb455b122019-09-27 18:10:33 -070015#include <algorithm>
16#include <cassert>
17#include <cstddef>
18#include <cstdlib>
19#include <functional>
20#include <limits>
21#include <random>
22#include <vector>
23
24#include <xnnpack.h>
25
26
27class MaxPoolingOperatorTester {
28 public:
Marat Dukhanbee78252020-02-27 23:52:08 -080029 inline MaxPoolingOperatorTester& padding_tf_same(bool padding_same) {
30 if (padding_same) {
31 assert(padding_top() == 0);
32 assert(padding_left() == 0);
33 assert(padding_bottom() == 0);
34 assert(padding_right() == 0);
35 }
36 this->padding_tf_same_ = padding_same;
37 return *this;
38 }
39
40 inline bool padding_tf_same() const {
41 return this->padding_tf_same_;
42 }
43
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 inline MaxPoolingOperatorTester& padding(uint32_t padding) {
Marat Dukhanbee78252020-02-27 23:52:08 -080045 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070046 this->padding_top_ = padding;
47 this->padding_right_ = padding;
48 this->padding_bottom_ = padding;
49 this->padding_left_ = padding;
50 return *this;
51 }
52
53 inline MaxPoolingOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhanbee78252020-02-27 23:52:08 -080054 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 this->padding_top_ = padding_height;
56 this->padding_right_ = padding_width;
57 this->padding_bottom_ = padding_height;
58 this->padding_left_ = padding_width;
59 return *this;
60 }
61
62 inline MaxPoolingOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhanbee78252020-02-27 23:52:08 -080063 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070064 this->padding_top_ = padding_height;
65 this->padding_bottom_ = padding_height;
66 return *this;
67 }
68
69 inline MaxPoolingOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhanbee78252020-02-27 23:52:08 -080070 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070071 this->padding_right_ = padding_width;
72 this->padding_left_ = padding_width;
73 return *this;
74 }
75
76 inline MaxPoolingOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhanbee78252020-02-27 23:52:08 -080077 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070078 this->padding_top_ = padding_top;
79 return *this;
80 }
81
82 inline uint32_t padding_top() const {
Marat Dukhanbee78252020-02-27 23:52:08 -080083 if (padding_tf_same()) {
84 const uint32_t total_padding_height =
85 (output_height() - 1) * stride_height() + dilated_pooling_height() - input_height();
86 return total_padding_height / 2;
87 } else {
88 return this->padding_top_;
89 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070090 }
91
92 inline MaxPoolingOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhanbee78252020-02-27 23:52:08 -080093 assert(!padding_tf_same());
XNNPACK Teamb455b122019-09-27 18:10:33 -070094 this->padding_left_ = padding_left;
95 return *this;
96 }
97
98 inline uint32_t padding_left() const {
Marat Dukhanbee78252020-02-27 23:52:08 -080099 if (padding_tf_same()) {
100 const uint32_t total_padding_width =
101 (output_width() - 1) * stride_width() + dilated_pooling_width() - input_width();
102 return total_padding_width / 2;
103 } else {
104 return this->padding_left_;
105 }
106 }
107
108 inline MaxPoolingOperatorTester& padding_bottom(uint32_t padding_bottom) {
109 assert(!padding_tf_same());
110 this->padding_bottom_ = padding_bottom;
111 return *this;
112 }
113
114 inline uint32_t padding_bottom() const {
115 if (padding_tf_same()) {
116 const uint32_t total_padding_height =
117 (output_height() - 1) * stride_height() + dilated_pooling_height() - input_height();
118 return total_padding_height - total_padding_height / 2;
119 } else {
120 return this->padding_bottom_;
121 }
122 }
123
124 inline MaxPoolingOperatorTester& padding_right(uint32_t padding_right) {
125 assert(!padding_tf_same());
126 this->padding_right_ = padding_right;
127 return *this;
128 }
129
130 inline uint32_t padding_right() const {
131 if (padding_tf_same()) {
132 const uint32_t total_padding_width =
133 (output_width() - 1) * stride_width() + dilated_pooling_width() - input_width();
134 return total_padding_width - total_padding_width / 2;
135 } else {
136 return this->padding_right_;
137 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700138 }
139
140 inline MaxPoolingOperatorTester& input_size(size_t input_height, size_t input_width) {
141 assert(input_height >= 1);
142 assert(input_width >= 1);
143 this->input_height_ = input_height;
144 this->input_width_ = input_width;
145 return *this;
146 }
147
148 inline MaxPoolingOperatorTester& input_height(size_t input_height) {
149 assert(input_height >= 1);
150 this->input_height_ = input_height;
151 return *this;
152 }
153
154 inline size_t input_height() const {
155 return this->input_height_;
156 }
157
158 inline MaxPoolingOperatorTester& input_width(size_t input_width) {
159 assert(input_width >= 1);
160 this->input_width_ = input_width;
161 return *this;
162 }
163
164 inline size_t input_width() const {
165 return this->input_width_;
166 }
167
168 inline MaxPoolingOperatorTester& channels(size_t channels) {
169 assert(channels != 0);
170 this->channels_ = channels;
171 return *this;
172 }
173
174 inline size_t channels() const {
175 return this->channels_;
176 }
177
178 inline MaxPoolingOperatorTester& batch_size(size_t batch_size) {
179 assert(batch_size != 0);
180 this->batch_size_ = batch_size;
181 return *this;
182 }
183
184 inline size_t batch_size() const {
185 return this->batch_size_;
186 }
187
188 inline MaxPoolingOperatorTester& pooling_size(uint32_t pooling_size) {
189 assert(pooling_size >= 1);
190 this->pooling_height_ = pooling_size;
191 this->pooling_width_ = pooling_size;
192 return *this;
193 }
194
195 inline MaxPoolingOperatorTester& pooling_size(uint32_t pooling_height, uint32_t pooling_width) {
196 assert(pooling_height >= 1);
197 assert(pooling_width >= 1);
198 this->pooling_height_ = pooling_height;
199 this->pooling_width_ = pooling_width;
200 return *this;
201 }
202
203 inline MaxPoolingOperatorTester& pooling_height(uint32_t pooling_height) {
204 assert(pooling_height >= 1);
205 this->pooling_height_ = pooling_height;
206 return *this;
207 }
208
209 inline uint32_t pooling_height() const {
210 return this->pooling_height_;
211 }
212
213 inline MaxPoolingOperatorTester& pooling_width(uint32_t pooling_width) {
214 assert(pooling_width >= 1);
215 this->pooling_width_ = pooling_width;
216 return *this;
217 }
218
219 inline uint32_t pooling_width() const {
220 return this->pooling_width_;
221 }
222
223 inline MaxPoolingOperatorTester& stride(uint32_t stride) {
224 assert(stride >= 1);
225 this->stride_height_ = stride;
226 this->stride_width_ = stride;
227 return *this;
228 }
229
230 inline MaxPoolingOperatorTester& stride(uint32_t stride_height, uint32_t stride_width) {
231 assert(stride_height >= 1);
232 assert(stride_width >= 1);
233 this->stride_height_ = stride_height;
234 this->stride_width_ = stride_width;
235 return *this;
236 }
237
238 inline MaxPoolingOperatorTester& stride_height(uint32_t stride_height) {
239 assert(stride_height >= 1);
240 this->stride_height_ = stride_height;
241 return *this;
242 }
243
244 inline uint32_t stride_height() const {
245 return this->stride_height_;
246 }
247
248 inline MaxPoolingOperatorTester& stride_width(uint32_t stride_width) {
249 assert(stride_width >= 1);
250 this->stride_width_ = stride_width;
251 return *this;
252 }
253
254 inline uint32_t stride_width() const {
255 return this->stride_width_;
256 }
257
258 inline MaxPoolingOperatorTester& dilation(uint32_t dilation) {
259 assert(dilation >= 1);
260 this->dilation_height_ = dilation;
261 this->dilation_width_ = dilation;
262 return *this;
263 }
264
265 inline MaxPoolingOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
266 assert(dilation_height >= 1);
267 assert(dilation_width >= 1);
268 this->dilation_height_ = dilation_height;
269 this->dilation_width_ = dilation_width;
270 return *this;
271 }
272
273 inline MaxPoolingOperatorTester& dilation_height(uint32_t dilation_height) {
274 assert(dilation_height >= 1);
275 this->dilation_height_ = dilation_height;
276 return *this;
277 }
278
279 inline uint32_t dilation_height() const {
280 return this->dilation_height_;
281 }
282
283 inline MaxPoolingOperatorTester& dilation_width(uint32_t dilation_width) {
284 assert(dilation_width >= 1);
285 this->dilation_width_ = dilation_width;
286 return *this;
287 }
288
289 inline uint32_t dilation_width() const {
290 return this->dilation_width_;
291 }
292
293 inline uint32_t dilated_pooling_height() const {
294 return (pooling_height() - 1) * dilation_height() + 1;
295 }
296
297 inline uint32_t dilated_pooling_width() const {
298 return (pooling_width() - 1) * dilation_width() + 1;
299 }
300
301 inline size_t output_height() const {
Marat Dukhanbee78252020-02-27 23:52:08 -0800302 if (padding_tf_same()) {
303 return (input_height() + stride_height() - 1) / stride_height();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700304 } else {
Marat Dukhanbee78252020-02-27 23:52:08 -0800305 const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
306 if (padded_input_height <= dilated_pooling_height()) {
307 return 1;
308 } else {
309 return (padded_input_height - dilated_pooling_height()) / stride_height() + 1;
310 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700311 }
312 }
313
314 inline size_t output_width() const {
Marat Dukhanbee78252020-02-27 23:52:08 -0800315 if (padding_tf_same()) {
316 return (input_width() + stride_width() - 1) / stride_width();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700317 } else {
Marat Dukhanbee78252020-02-27 23:52:08 -0800318 const size_t padded_input_width = padding_left() + input_width() + padding_right();
319 if (padded_input_width <= dilated_pooling_width()) {
320 return 1;
321 } else {
322 return (padded_input_width - dilated_pooling_width()) / stride_width() + 1;
323 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700324 }
325 }
326
327 inline MaxPoolingOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
328 assert(input_pixel_stride != 0);
329 this->input_pixel_stride_ = input_pixel_stride;
330 return *this;
331 }
332
333 inline size_t input_pixel_stride() const {
334 if (this->input_pixel_stride_ == 0) {
335 return channels();
336 } else {
337 assert(this->input_pixel_stride_ >= channels());
338 return this->input_pixel_stride_;
339 }
340 }
341
342 inline MaxPoolingOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
343 assert(output_pixel_stride != 0);
344 this->output_pixel_stride_ = output_pixel_stride;
345 return *this;
346 }
347
348 inline size_t output_pixel_stride() const {
349 if (this->output_pixel_stride_ == 0) {
350 return channels();
351 } else {
352 assert(this->output_pixel_stride_ >= channels());
353 return this->output_pixel_stride_;
354 }
355 }
356
357 inline MaxPoolingOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
358 assert(next_input_height >= 1);
359 assert(next_input_width >= 1);
360 this->next_input_height_ = next_input_height;
361 this->next_input_width_ = next_input_width;
362 return *this;
363 }
364
365 inline MaxPoolingOperatorTester& next_input_height(uint32_t next_input_height) {
366 assert(next_input_height >= 1);
367 this->next_input_height_ = next_input_height;
368 return *this;
369 }
370
371 inline uint32_t next_input_height() const {
372 if (this->next_input_height_ == 0) {
373 return input_height();
374 } else {
375 return this->next_input_height_;
376 }
377 }
378
379 inline MaxPoolingOperatorTester& next_input_width(uint32_t next_input_width) {
380 assert(next_input_width >= 1);
381 this->next_input_width_ = next_input_width;
382 return *this;
383 }
384
385 inline uint32_t next_input_width() const {
386 if (this->next_input_width_ == 0) {
387 return input_width();
388 } else {
389 return this->next_input_width_;
390 }
391 }
392
393 inline size_t next_output_height() const {
394 const size_t padded_next_input_height = padding_top() + next_input_height() + padding_bottom();
395 if (padded_next_input_height <= dilated_pooling_height()) {
396 return 1;
397 } else {
398 return (padded_next_input_height - dilated_pooling_height()) / stride_height() + 1;
399 }
400 }
401
402 inline size_t next_output_width() const {
403 const size_t padded_next_input_width = padding_left() + next_input_width() + padding_right();
404 if (padded_next_input_width <= dilated_pooling_width()) {
405 return 1;
406 } else {
407 return (padded_next_input_width - dilated_pooling_width()) / stride_width() + 1;
408 }
409 }
410
411 inline MaxPoolingOperatorTester& next_batch_size(size_t next_batch_size) {
412 assert(next_batch_size >= 1);
413 this->next_batch_size_ = next_batch_size;
414 return *this;
415 }
416
417 inline size_t next_batch_size() const {
418 if (this->next_batch_size_ == 0) {
419 return batch_size();
420 } else {
421 return this->next_batch_size_;
422 }
423 }
424
425 inline MaxPoolingOperatorTester& qmin(uint8_t qmin) {
426 this->qmin_ = qmin;
427 return *this;
428 }
429
430 inline uint8_t qmin() const {
431 return this->qmin_;
432 }
433
434 inline MaxPoolingOperatorTester& qmax(uint8_t qmax) {
435 this->qmax_ = qmax;
436 return *this;
437 }
438
439 inline uint8_t qmax() const {
440 return this->qmax_;
441 }
442
443 inline MaxPoolingOperatorTester& iterations(size_t iterations) {
444 this->iterations_ = iterations;
445 return *this;
446 }
447
448 inline size_t iterations() const {
449 return this->iterations_;
450 }
451
Marat Dukhandc5c1482021-08-16 09:03:15 -0700452 void TestS8() const {
453 std::random_device random_device;
454 auto rng = std::mt19937(random_device());
455 auto i8rng = std::bind(
456 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
457 std::ref(rng));
458
459 std::vector<int8_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
460 std::vector<int8_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
461 std::vector<int8_t> output_ref(batch_size() * output_height() * output_width() * channels());
462 for (size_t iteration = 0; iteration < iterations(); iteration++) {
463 std::generate(input.begin(), input.end(), std::ref(i8rng));
464 std::fill(output.begin(), output.end(), 0xA5);
465
466 // Compute reference results.
467 for (size_t i = 0; i < batch_size(); i++) {
468 for (size_t oy = 0; oy < output_height(); oy++) {
469 for (size_t ox = 0; ox < output_width(); ox++) {
470 for (size_t c = 0; c < channels(); c++) {
471 int8_t max_value = std::numeric_limits<int8_t>::min();
472 for (size_t py = 0; py < pooling_height(); py++) {
473 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
474 for (size_t px = 0; px < pooling_width(); px++) {
475 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
476 if (ix < input_width() && iy < input_height()) {
477 max_value = std::max(max_value,
478 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
479 }
480 }
481 }
482 max_value = std::min(max_value, int8_t(qmax() - 0x80));
483 max_value = std::max(max_value, int8_t(qmin() - 0x80));
484 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
485 }
486 }
487 }
488 }
489
490 // Create, setup, run, and destroy Max Pooling operator.
491 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
492 xnn_operator_t max_pooling_op = nullptr;
493
494 ASSERT_EQ(xnn_status_success,
495 xnn_create_max_pooling2d_nhwc_s8(
496 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
497 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
498 pooling_height(), pooling_width(),
499 stride_height(), stride_width(),
500 dilation_height(), dilation_width(),
501 channels(), input_pixel_stride(), output_pixel_stride(),
502 int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),
503 padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
504 &max_pooling_op));
505 ASSERT_NE(nullptr, max_pooling_op);
506
507 // Smart pointer to automatically delete max_pooling_op.
508 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
509
510 ASSERT_EQ(xnn_status_success,
511 xnn_setup_max_pooling2d_nhwc_s8(
512 max_pooling_op,
513 batch_size(), input_height(), input_width(),
514 input.data(), output.data(),
515 nullptr /* thread pool */));
516
517 ASSERT_EQ(xnn_status_success,
518 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
519
520 // Verify results.
521 for (size_t i = 0; i < batch_size(); i++) {
522 for (size_t y = 0; y < output_height(); y++) {
523 for (size_t x = 0; x < output_width(); x++) {
524 for (size_t c = 0; c < channels(); c++) {
525 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmax() - 0x80));
526 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmin() - 0x80));
527 ASSERT_EQ(int32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
528 int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
529 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
530 }
531 }
532 }
533 }
534 }
535 }
536
XNNPACK Teamb455b122019-09-27 18:10:33 -0700537 void TestU8() const {
538 std::random_device random_device;
539 auto rng = std::mt19937(random_device());
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700540 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700541
542 std::vector<uint8_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
543 std::vector<uint8_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
544 std::vector<uint8_t> output_ref(batch_size() * output_height() * output_width() * channels());
545 for (size_t iteration = 0; iteration < iterations(); iteration++) {
546 std::generate(input.begin(), input.end(), std::ref(u8rng));
547 std::fill(output.begin(), output.end(), 0xA5);
548
549 // Compute reference results.
550 for (size_t i = 0; i < batch_size(); i++) {
551 for (size_t oy = 0; oy < output_height(); oy++) {
552 for (size_t ox = 0; ox < output_width(); ox++) {
553 for (size_t c = 0; c < channels(); c++) {
554 uint8_t max_value = 0;
555 for (size_t py = 0; py < pooling_height(); py++) {
556 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
557 for (size_t px = 0; px < pooling_width(); px++) {
558 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
Marat Dukhane0df8312019-10-22 18:16:56 -0700559 if (ix < input_width() && iy < input_height()) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700560 max_value = std::max(max_value,
561 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
562 }
563 }
564 }
565 max_value = std::min(max_value, qmax());
566 max_value = std::max(max_value, qmin());
567 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
568 }
569 }
570 }
571 }
572
573 // Create, setup, run, and destroy Max Pooling operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800574 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700575 xnn_operator_t max_pooling_op = nullptr;
576
577 ASSERT_EQ(xnn_status_success,
578 xnn_create_max_pooling2d_nhwc_u8(
Marat Dukhanbee78252020-02-27 23:52:08 -0800579 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
580 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700581 pooling_height(), pooling_width(),
582 stride_height(), stride_width(),
583 dilation_height(), dilation_width(),
584 channels(), input_pixel_stride(), output_pixel_stride(),
585 qmin(), qmax(),
Marat Dukhanbee78252020-02-27 23:52:08 -0800586 padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
587 &max_pooling_op));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700588 ASSERT_NE(nullptr, max_pooling_op);
589
590 // Smart pointer to automatically delete max_pooling_op.
591 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
592
593 ASSERT_EQ(xnn_status_success,
594 xnn_setup_max_pooling2d_nhwc_u8(
595 max_pooling_op,
596 batch_size(), input_height(), input_width(),
597 input.data(), output.data(),
598 nullptr /* thread pool */));
599
600 ASSERT_EQ(xnn_status_success,
601 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
602
603 // Verify results.
604 for (size_t i = 0; i < batch_size(); i++) {
605 for (size_t y = 0; y < output_height(); y++) {
606 for (size_t x = 0; x < output_width(); x++) {
607 for (size_t c = 0; c < channels(); c++) {
608 ASSERT_LE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax()));
609 ASSERT_GE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin()));
610 ASSERT_EQ(uint32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
611 uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
612 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
613 }
614 }
615 }
616 }
617 }
618 }
619
Marat Dukhan5756a922022-02-04 01:55:53 -0800620 void TestF16() const {
621 std::random_device random_device;
622 auto rng = std::mt19937(random_device());
623 // Note: we need to avoid FP16 denormals in the generated tensor because they might be processed differently in
624 // native vs emulated arithmetics, and we use exact comparison to verify the results against reference.
625 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.001f, 1.0f), rng);
626 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
627
628 std::vector<uint16_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
629 std::vector<uint16_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
630 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
631 for (size_t iteration = 0; iteration < iterations(); iteration++) {
632 std::generate(input.begin(), input.end(), std::ref(f16rng));
633 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
634
635 // Compute reference results, without clamping.
636 for (size_t i = 0; i < batch_size(); i++) {
637 for (size_t oy = 0; oy < output_height(); oy++) {
638 for (size_t ox = 0; ox < output_width(); ox++) {
639 for (size_t c = 0; c < channels(); c++) {
640 float max_value = -std::numeric_limits<float>::infinity();
641 for (size_t py = 0; py < pooling_height(); py++) {
642 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
643 for (size_t px = 0; px < pooling_width(); px++) {
644 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
645 if (ix < input_width() && iy < input_height()) {
646 max_value = std::max(max_value,
647 fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]));
648 }
649 }
650 }
651 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
652 }
653 }
654 }
655 }
656
657 // Compute clamping parameters.
658 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
659 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
660 const float accumulated_range = accumulated_max - accumulated_min;
661 float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());
662 float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
663 output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min));
664 output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max));
665 if (accumulated_range == 0.0f) {
666 output_min = -std::numeric_limits<float>::infinity();
667 output_max = +std::numeric_limits<float>::infinity();
668 }
669 if (qmin() == std::numeric_limits<uint8_t>::min()) {
670 output_min = -std::numeric_limits<float>::infinity();
671 }
672 if (qmax() == std::numeric_limits<uint8_t>::max()) {
673 output_max = +std::numeric_limits<float>::infinity();
674 }
675
676 // Clamp reference results.
677 for (float& value : output_ref) {
678 value = std::max(std::min(value, output_max), output_min);
679 }
680
681 // Create, setup, run, and destroy Max Pooling operator.
682 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
683 xnn_operator_t max_pooling_op = nullptr;
684
685 const xnn_status status = xnn_create_max_pooling2d_nhwc_f16(
686 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
687 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
688 pooling_height(), pooling_width(),
689 stride_height(), stride_width(),
690 dilation_height(), dilation_width(),
691 channels(), input_pixel_stride(), output_pixel_stride(),
692 output_min, output_max,
693 padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
694 &max_pooling_op);
695 if (status == xnn_status_unsupported_hardware) {
696 GTEST_SKIP();
697 }
698 ASSERT_EQ(xnn_status_success, status);
699 ASSERT_NE(nullptr, max_pooling_op);
700
701 // Smart pointer to automatically delete max_pooling_op.
702 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
703
704 ASSERT_EQ(xnn_status_success,
705 xnn_setup_max_pooling2d_nhwc_f16(
706 max_pooling_op,
707 batch_size(), input_height(), input_width(),
708 input.data(), output.data(),
709 nullptr /* thread pool */));
710
711 ASSERT_EQ(xnn_status_success,
712 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
713
714 // Verify results.
715 for (size_t i = 0; i < batch_size(); i++) {
716 for (size_t y = 0; y < output_height(); y++) {
717 for (size_t x = 0; x < output_width(); x++) {
718 for (size_t c = 0; c < channels(); c++) {
719 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_max);
720 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_min);
721 ASSERT_EQ(
722 fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]),
723 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) <<
724 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
725 << ", min = " << output_min << ", max = " << output_max;
726 }
727 }
728 }
729 }
730 }
731 }
732
XNNPACK Teamb455b122019-09-27 18:10:33 -0700733 void TestF32() const {
734 std::random_device random_device;
735 auto rng = std::mt19937(random_device());
736 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
737
738 std::vector<float> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
739 std::vector<float> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
740 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
741 for (size_t iteration = 0; iteration < iterations(); iteration++) {
742 std::generate(input.begin(), input.end(), std::ref(f32rng));
743 std::fill(output.begin(), output.end(), nanf(""));
744
745 // Compute reference results, without clamping.
746 for (size_t i = 0; i < batch_size(); i++) {
747 for (size_t oy = 0; oy < output_height(); oy++) {
748 for (size_t ox = 0; ox < output_width(); ox++) {
749 for (size_t c = 0; c < channels(); c++) {
750 float max_value = -std::numeric_limits<float>::infinity();
751 for (size_t py = 0; py < pooling_height(); py++) {
752 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
753 for (size_t px = 0; px < pooling_width(); px++) {
754 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
755 if (ix < input_width() && iy < input_height()) {
756 max_value = std::max(max_value,
757 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
758 }
759 }
760 }
761 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
762 }
763 }
764 }
765 }
766
767 // Compute clamping parameters.
768 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
769 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
770 const float accumulated_range = accumulated_max - accumulated_min;
771 const float output_min = accumulated_range == 0.0f ?
772 -std::numeric_limits<float>::infinity() :
773 accumulated_min + accumulated_range / 255.0f * float(qmin());
774 const float output_max = accumulated_range == 0.0f ?
775 +std::numeric_limits<float>::infinity() :
776 accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
777
778 // Clamp reference results.
779 for (float& value : output_ref) {
780 value = std::max(std::min(value, output_max), output_min);
781 }
782
783 // Create, setup, run, and destroy Max Pooling operator.
Marat Dukhan04f03be2019-11-19 12:36:47 -0800784 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700785 xnn_operator_t max_pooling_op = nullptr;
786
787 ASSERT_EQ(xnn_status_success,
788 xnn_create_max_pooling2d_nhwc_f32(
Marat Dukhanbee78252020-02-27 23:52:08 -0800789 padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
790 padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700791 pooling_height(), pooling_width(),
792 stride_height(), stride_width(),
793 dilation_height(), dilation_width(),
794 channels(), input_pixel_stride(), output_pixel_stride(),
795 output_min, output_max,
Marat Dukhanbee78252020-02-27 23:52:08 -0800796 padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
797 &max_pooling_op));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700798 ASSERT_NE(nullptr, max_pooling_op);
799
800 // Smart pointer to automatically delete max_pooling_op.
801 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
802
803 ASSERT_EQ(xnn_status_success,
804 xnn_setup_max_pooling2d_nhwc_f32(
805 max_pooling_op,
806 batch_size(), input_height(), input_width(),
807 input.data(), output.data(),
808 nullptr /* thread pool */));
809
810 ASSERT_EQ(xnn_status_success,
811 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
812
813 // Verify results.
814 for (size_t i = 0; i < batch_size(); i++) {
815 for (size_t y = 0; y < output_height(); y++) {
816 for (size_t x = 0; x < output_width(); x++) {
817 for (size_t c = 0; c < channels(); c++) {
818 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max);
819 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min);
820 ASSERT_EQ(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c],
821 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]) <<
822 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
823 << ", min = " << output_min << ", max = " << output_max;
824 }
825 }
826 }
827 }
828 }
829 }
830
Marat Dukhandc5c1482021-08-16 09:03:15 -0700831 void TestSetupS8() const {
832 std::random_device random_device;
833 auto rng = std::mt19937(random_device());
834 auto i8rng = std::bind(
835 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
836 std::ref(rng));
837
838 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(
839 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
840 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
841 std::vector<int8_t> output(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(
842 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
843 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
844 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
845 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
846 for (size_t iteration = 0; iteration < iterations(); iteration++) {
847 std::generate(input.begin(), input.end(), std::ref(i8rng));
848 std::fill(output.begin(), output.end(), 0xA5);
849
850 // Compute reference results.
851 for (size_t i = 0; i < batch_size(); i++) {
852 for (size_t oy = 0; oy < output_height(); oy++) {
853 for (size_t ox = 0; ox < output_width(); ox++) {
854 for (size_t c = 0; c < channels(); c++) {
855 int8_t max_value = std::numeric_limits<int8_t>::min();
856 for (size_t py = 0; py < pooling_height(); py++) {
857 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
858 for (size_t px = 0; px < pooling_width(); px++) {
859 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
860 if (ix < input_width() && iy < input_height()) {
861 max_value = std::max(max_value,
862 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
863 }
864 }
865 }
866 max_value = std::min(max_value, int8_t(qmax() - 0x80));
867 max_value = std::max(max_value, int8_t(qmin() - 0x80));
868 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
869 }
870 }
871 }
872 }
873
874 // Create, setup, and run Max Pooling operator once.
875 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
876 xnn_operator_t max_pooling_op = nullptr;
877
878 ASSERT_EQ(xnn_status_success,
879 xnn_create_max_pooling2d_nhwc_s8(
880 padding_top(), padding_right(), padding_bottom(), padding_left(),
881 pooling_height(), pooling_width(),
882 stride_height(), stride_width(),
883 dilation_height(), dilation_width(),
884 channels(), input_pixel_stride(), output_pixel_stride(),
885 int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),
886 0, &max_pooling_op));
887 ASSERT_NE(nullptr, max_pooling_op);
888
889 // Smart pointer to automatically delete max_pooling_op.
890 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
891
892 ASSERT_EQ(xnn_status_success,
893 xnn_setup_max_pooling2d_nhwc_s8(
894 max_pooling_op,
895 batch_size(), input_height(), input_width(),
896 input.data(), output.data(),
897 nullptr /* thread pool */));
898
899 ASSERT_EQ(xnn_status_success,
900 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
901
902 // Verify results of the first run.
903 for (size_t i = 0; i < batch_size(); i++) {
904 for (size_t y = 0; y < output_height(); y++) {
905 for (size_t x = 0; x < output_width(); x++) {
906 for (size_t c = 0; c < channels(); c++) {
907 ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmax() - 0x80));
908 ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmin() - 0x80));
909 ASSERT_EQ(int32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
910 int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
911 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
912 }
913 }
914 }
915 }
916
917 // Re-generate data for the second run.
918 std::generate(input.begin(), input.end(), std::ref(i8rng));
919 std::fill(output.begin(), output.end(), 0xA5);
920
921 // Compute reference results for the second run.
922 for (size_t i = 0; i < next_batch_size(); i++) {
923 for (size_t oy = 0; oy < next_output_height(); oy++) {
924 for (size_t ox = 0; ox < next_output_width(); ox++) {
925 for (size_t c = 0; c < channels(); c++) {
926 int8_t max_value = std::numeric_limits<int8_t>::min();
927 for (size_t py = 0; py < pooling_height(); py++) {
928 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
929 for (size_t px = 0; px < pooling_width(); px++) {
930 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
931 if (ix < next_input_width() && iy < next_input_height()) {
932 max_value = std::max(max_value,
933 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]);
934 }
935 }
936 }
937 max_value = std::min(max_value, int8_t(qmax() - 0x80));
938 max_value = std::max(max_value, int8_t(qmin() - 0x80));
939 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
940 }
941 }
942 }
943 }
944
945 // Setup and run Max Pooling operator the second time, and destroy the operator.
946 ASSERT_EQ(xnn_status_success,
947 xnn_setup_max_pooling2d_nhwc_s8(
948 max_pooling_op,
949 next_batch_size(), next_input_height(), next_input_width(),
950 input.data(), output.data(),
951 nullptr /* thread pool */));
952
953 ASSERT_EQ(xnn_status_success,
954 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
955
956 // Verify results of the second run.
957 for (size_t i = 0; i < next_batch_size(); i++) {
958 for (size_t y = 0; y < next_output_height(); y++) {
959 for (size_t x = 0; x < next_output_width(); x++) {
960 for (size_t c = 0; c < channels(); c++) {
961 ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), int32_t(qmax() - 0x80));
962 ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), int32_t(qmin() - 0x80));
963 ASSERT_EQ(int32_t(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]),
964 int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c])) <<
965 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
966 }
967 }
968 }
969 }
970 }
971 }
972
XNNPACK Teamb455b122019-09-27 18:10:33 -0700973 void TestSetupU8() const {
974 std::random_device random_device;
975 auto rng = std::mt19937(random_device());
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700976 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700977
978 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
979 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
980 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
981 std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
982 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
983 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
984 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
985 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
986 for (size_t iteration = 0; iteration < iterations(); iteration++) {
987 std::generate(input.begin(), input.end(), std::ref(u8rng));
988 std::fill(output.begin(), output.end(), 0xA5);
989
990 // Compute reference results.
991 for (size_t i = 0; i < batch_size(); i++) {
992 for (size_t oy = 0; oy < output_height(); oy++) {
993 for (size_t ox = 0; ox < output_width(); ox++) {
994 for (size_t c = 0; c < channels(); c++) {
995 uint8_t max_value = 0;
996 for (size_t py = 0; py < pooling_height(); py++) {
997 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
998 for (size_t px = 0; px < pooling_width(); px++) {
999 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
1000 if (ix < input_width() && iy < input_height()) {
1001 max_value = std::max(max_value,
1002 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
1003 }
1004 }
1005 }
1006 max_value = std::min(max_value, qmax());
1007 max_value = std::max(max_value, qmin());
1008 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
1009 }
1010 }
1011 }
1012 }
1013
1014 // Create, setup, and run Max Pooling operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001015 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001016 xnn_operator_t max_pooling_op = nullptr;
1017
1018 ASSERT_EQ(xnn_status_success,
1019 xnn_create_max_pooling2d_nhwc_u8(
1020 padding_top(), padding_right(), padding_bottom(), padding_left(),
1021 pooling_height(), pooling_width(),
1022 stride_height(), stride_width(),
1023 dilation_height(), dilation_width(),
1024 channels(), input_pixel_stride(), output_pixel_stride(),
1025 qmin(), qmax(),
1026 0, &max_pooling_op));
1027 ASSERT_NE(nullptr, max_pooling_op);
1028
1029 // Smart pointer to automatically delete max_pooling_op.
1030 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
1031
1032 ASSERT_EQ(xnn_status_success,
1033 xnn_setup_max_pooling2d_nhwc_u8(
1034 max_pooling_op,
1035 batch_size(), input_height(), input_width(),
1036 input.data(), output.data(),
1037 nullptr /* thread pool */));
1038
1039 ASSERT_EQ(xnn_status_success,
1040 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
1041
1042 // Verify results of the first run.
1043 for (size_t i = 0; i < batch_size(); i++) {
1044 for (size_t y = 0; y < output_height(); y++) {
1045 for (size_t x = 0; x < output_width(); x++) {
1046 for (size_t c = 0; c < channels(); c++) {
1047 ASSERT_LE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax()));
1048 ASSERT_GE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin()));
1049 ASSERT_EQ(uint32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
1050 uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
1051 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
1052 }
1053 }
1054 }
1055 }
1056
1057 // Re-generate data for the second run.
1058 std::generate(input.begin(), input.end(), std::ref(u8rng));
1059 std::fill(output.begin(), output.end(), 0xA5);
1060
1061 // Compute reference results for the second run.
1062 for (size_t i = 0; i < next_batch_size(); i++) {
1063 for (size_t oy = 0; oy < next_output_height(); oy++) {
1064 for (size_t ox = 0; ox < next_output_width(); ox++) {
1065 for (size_t c = 0; c < channels(); c++) {
1066 uint8_t max_value = 0;
1067 for (size_t py = 0; py < pooling_height(); py++) {
1068 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
1069 for (size_t px = 0; px < pooling_width(); px++) {
1070 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
1071 if (ix < next_input_width() && iy < next_input_height()) {
1072 max_value = std::max(max_value,
1073 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]);
1074 }
1075 }
1076 }
1077 max_value = std::min(max_value, qmax());
1078 max_value = std::max(max_value, qmin());
1079 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
1080 }
1081 }
1082 }
1083 }
1084
1085 // Setup and run Max Pooling operator the second time, and destroy the operator.
1086 ASSERT_EQ(xnn_status_success,
1087 xnn_setup_max_pooling2d_nhwc_u8(
1088 max_pooling_op,
1089 next_batch_size(), next_input_height(), next_input_width(),
1090 input.data(), output.data(),
1091 nullptr /* thread pool */));
1092
1093 ASSERT_EQ(xnn_status_success,
1094 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
1095
1096 // Verify results of the second run.
1097 for (size_t i = 0; i < next_batch_size(); i++) {
1098 for (size_t y = 0; y < next_output_height(); y++) {
1099 for (size_t x = 0; x < next_output_width(); x++) {
1100 for (size_t c = 0; c < channels(); c++) {
1101 ASSERT_LE(uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax()));
1102 ASSERT_GE(uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin()));
1103 ASSERT_EQ(uint32_t(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]),
1104 uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c])) <<
1105 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
1106 }
1107 }
1108 }
1109 }
1110 }
1111 }
1112
Marat Dukhan5756a922022-02-04 01:55:53 -08001113 void TestSetupF16() const {
1114 std::random_device random_device;
1115 auto rng = std::mt19937(random_device());
1116 // Note: we need to avoid FP16 denormals in the generated tensor because they might be processed differently in
1117 // native vs emulated arithmetics, and we use exact comparison to verify the results against reference.
1118 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.001f, 1.0f), rng);
1119 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
1120
1121 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(
1122 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
1123 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
1124 std::vector<uint16_t> output(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(
1125 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
1126 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
1127 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
1128 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
1129 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1130 std::generate(input.begin(), input.end(), std::ref(f16rng));
1131 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
1132
1133 // Compute reference results, without clamping.
1134 for (size_t i = 0; i < batch_size(); i++) {
1135 for (size_t oy = 0; oy < output_height(); oy++) {
1136 for (size_t ox = 0; ox < output_width(); ox++) {
1137 for (size_t c = 0; c < channels(); c++) {
1138 float max_value = -std::numeric_limits<float>::infinity();
1139 for (size_t py = 0; py < pooling_height(); py++) {
1140 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
1141 for (size_t px = 0; px < pooling_width(); px++) {
1142 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
1143 if (ix < input_width() && iy < input_height()) {
1144 max_value = std::max(max_value,
1145 fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]));
1146 }
1147 }
1148 }
1149 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
1150 }
1151 }
1152 }
1153 }
1154
1155 // Compute clamping parameters.
1156 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1157 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1158 const float accumulated_range = accumulated_max - accumulated_min;
1159 float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());
1160 float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
1161 output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min));
1162 output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max));
1163 if (accumulated_range == 0.0f) {
1164 output_min = -std::numeric_limits<float>::infinity();
1165 output_max = +std::numeric_limits<float>::infinity();
1166 }
1167 if (qmin() == std::numeric_limits<uint8_t>::min()) {
1168 output_min = -std::numeric_limits<float>::infinity();
1169 }
1170 if (qmax() == std::numeric_limits<uint8_t>::max()) {
1171 output_max = +std::numeric_limits<float>::infinity();
1172 }
1173
1174 // Clamp reference results.
1175 for (float& value : output_ref) {
1176 value = std::max(std::min(value, output_max), output_min);
1177 }
1178
1179 // Create, setup, and run Max Pooling operator once.
1180 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1181 xnn_operator_t max_pooling_op = nullptr;
1182
1183 const xnn_status status = xnn_create_max_pooling2d_nhwc_f16(
1184 padding_top(), padding_right(), padding_bottom(), padding_left(),
1185 pooling_height(), pooling_width(),
1186 stride_height(), stride_width(),
1187 dilation_height(), dilation_width(),
1188 channels(), input_pixel_stride(), output_pixel_stride(),
1189 output_min, output_max,
1190 0, &max_pooling_op);
1191 if (status == xnn_status_unsupported_hardware) {
1192 GTEST_SKIP();
1193 }
1194 ASSERT_EQ(xnn_status_success, status);
1195 ASSERT_NE(nullptr, max_pooling_op);
1196
1197 // Smart pointer to automatically delete max_pooling_op.
1198 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
1199
1200 ASSERT_EQ(xnn_status_success,
1201 xnn_setup_max_pooling2d_nhwc_f16(
1202 max_pooling_op,
1203 batch_size(), input_height(), input_width(),
1204 input.data(), output.data(),
1205 nullptr /* thread pool */));
1206
1207 ASSERT_EQ(xnn_status_success,
1208 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
1209
1210 // Verify results of the first run.
1211 for (size_t i = 0; i < batch_size(); i++) {
1212 for (size_t y = 0; y < output_height(); y++) {
1213 for (size_t x = 0; x < output_width(); x++) {
1214 for (size_t c = 0; c < channels(); c++) {
1215 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_max);
1216 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_min);
1217 ASSERT_EQ(
1218 fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]),
1219 output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) <<
1220 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
1221 << ", min = " << output_min << ", max = " << output_max;
1222 }
1223 }
1224 }
1225 }
1226
1227 // Re-generate data for the second run.
1228 std::generate(input.begin(), input.end(), std::ref(f16rng));
1229 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
1230
1231 // Compute reference results for the second run, including clamping.
1232 for (size_t i = 0; i < next_batch_size(); i++) {
1233 for (size_t oy = 0; oy < next_output_height(); oy++) {
1234 for (size_t ox = 0; ox < next_output_width(); ox++) {
1235 for (size_t c = 0; c < channels(); c++) {
1236 float max_value = -std::numeric_limits<float>::infinity();
1237 for (size_t py = 0; py < pooling_height(); py++) {
1238 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
1239 for (size_t px = 0; px < pooling_width(); px++) {
1240 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
1241 if (ix < next_input_width() && iy < next_input_height()) {
1242 max_value = std::max(max_value,
1243 fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]));
1244 }
1245 }
1246 }
1247 max_value = std::min(max_value, output_max);
1248 max_value = std::max(max_value, output_min);
1249 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
1250 }
1251 }
1252 }
1253 }
1254
1255 // Setup and run Max Pooling operator the second time, and destroy the operator.
1256 ASSERT_EQ(xnn_status_success,
1257 xnn_setup_max_pooling2d_nhwc_f16(
1258 max_pooling_op,
1259 next_batch_size(), next_input_height(), next_input_width(),
1260 input.data(), output.data(),
1261 nullptr /* thread pool */));
1262
1263 ASSERT_EQ(xnn_status_success,
1264 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
1265
1266 // Verify results of the second run.
1267 for (size_t i = 0; i < next_batch_size(); i++) {
1268 for (size_t y = 0; y < next_output_height(); y++) {
1269 for (size_t x = 0; x < next_output_width(); x++) {
1270 for (size_t c = 0; c < channels(); c++) {
1271 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), output_max);
1272 ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), output_min);
1273 ASSERT_EQ(
1274 fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]),
1275 next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]) <<
1276 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
1277 << ", min = " << output_min << ", max = " << output_max;
1278 }
1279 }
1280 }
1281 }
1282 }
1283 }
1284
XNNPACK Teamb455b122019-09-27 18:10:33 -07001285 void TestSetupF32() const {
1286 std::random_device random_device;
1287 auto rng = std::mt19937(random_device());
1288 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
1289
1290 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
1291 (batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
1292 (next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
1293 std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) + std::max(
1294 (batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
1295 (next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
1296 std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
1297 std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
1298 for (size_t iteration = 0; iteration < iterations(); iteration++) {
1299 std::generate(input.begin(), input.end(), std::ref(f32rng));
1300 std::fill(output.begin(), output.end(), nanf(""));
1301
1302 // Compute reference results, without clamping.
1303 for (size_t i = 0; i < batch_size(); i++) {
1304 for (size_t oy = 0; oy < output_height(); oy++) {
1305 for (size_t ox = 0; ox < output_width(); ox++) {
1306 for (size_t c = 0; c < channels(); c++) {
1307 float max_value = -std::numeric_limits<float>::infinity();
1308 for (size_t py = 0; py < pooling_height(); py++) {
1309 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
1310 for (size_t px = 0; px < pooling_width(); px++) {
1311 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
1312 if (ix < input_width() && iy < input_height()) {
1313 max_value = std::max(max_value,
1314 input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
1315 }
1316 }
1317 }
1318 output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
1319 }
1320 }
1321 }
1322 }
1323
1324 // Compute clamping parameters.
1325 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
1326 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
1327 const float accumulated_range = accumulated_max - accumulated_min;
1328 const float output_min = accumulated_range == 0.0f ?
1329 -std::numeric_limits<float>::infinity() :
1330 accumulated_min + accumulated_range / 255.0f * float(qmin());
1331 const float output_max = accumulated_range == 0.0f ?
1332 +std::numeric_limits<float>::infinity() :
1333 accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
1334
1335 // Clamp reference results.
1336 for (float& value : output_ref) {
1337 value = std::max(std::min(value, output_max), output_min);
1338 }
1339
1340 // Create, setup, and run Max Pooling operator once.
Marat Dukhan04f03be2019-11-19 12:36:47 -08001341 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Teamb455b122019-09-27 18:10:33 -07001342 xnn_operator_t max_pooling_op = nullptr;
1343
1344 ASSERT_EQ(xnn_status_success,
1345 xnn_create_max_pooling2d_nhwc_f32(
1346 padding_top(), padding_right(), padding_bottom(), padding_left(),
1347 pooling_height(), pooling_width(),
1348 stride_height(), stride_width(),
1349 dilation_height(), dilation_width(),
1350 channels(), input_pixel_stride(), output_pixel_stride(),
1351 output_min, output_max,
1352 0, &max_pooling_op));
1353 ASSERT_NE(nullptr, max_pooling_op);
1354
1355 // Smart pointer to automatically delete max_pooling_op.
1356 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
1357
1358 ASSERT_EQ(xnn_status_success,
1359 xnn_setup_max_pooling2d_nhwc_f32(
1360 max_pooling_op,
1361 batch_size(), input_height(), input_width(),
1362 input.data(), output.data(),
1363 nullptr /* thread pool */));
1364
1365 ASSERT_EQ(xnn_status_success,
1366 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
1367
1368 // Verify results of the first run.
1369 for (size_t i = 0; i < batch_size(); i++) {
1370 for (size_t y = 0; y < output_height(); y++) {
1371 for (size_t x = 0; x < output_width(); x++) {
1372 for (size_t c = 0; c < channels(); c++) {
1373 ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max);
1374 ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min);
1375 ASSERT_EQ(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c],
1376 output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]) <<
1377 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
1378 }
1379 }
1380 }
1381 }
1382
1383 // Re-generate data for the second run.
1384 std::generate(input.begin(), input.end(), std::ref(f32rng));
1385 std::fill(output.begin(), output.end(), 0xA5);
1386
1387 // Compute reference results for the second run, including clamping.
1388 for (size_t i = 0; i < next_batch_size(); i++) {
1389 for (size_t oy = 0; oy < next_output_height(); oy++) {
1390 for (size_t ox = 0; ox < next_output_width(); ox++) {
1391 for (size_t c = 0; c < channels(); c++) {
1392 float max_value = -std::numeric_limits<float>::infinity();
1393 for (size_t py = 0; py < pooling_height(); py++) {
1394 const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
1395 for (size_t px = 0; px < pooling_width(); px++) {
1396 const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
1397 if (ix < next_input_width() && iy < next_input_height()) {
1398 max_value = std::max(max_value,
1399 input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]);
1400 }
1401 }
1402 }
1403 max_value = std::min(max_value, output_max);
1404 max_value = std::max(max_value, output_min);
1405 next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
1406 }
1407 }
1408 }
1409 }
1410
1411 // Setup and run Max Pooling operator the second time, and destroy the operator.
1412 ASSERT_EQ(xnn_status_success,
1413 xnn_setup_max_pooling2d_nhwc_f32(
1414 max_pooling_op,
1415 next_batch_size(), next_input_height(), next_input_width(),
1416 input.data(), output.data(),
1417 nullptr /* thread pool */));
1418
1419 ASSERT_EQ(xnn_status_success,
1420 xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
1421
1422 // Verify results of the second run.
1423 for (size_t i = 0; i < next_batch_size(); i++) {
1424 for (size_t y = 0; y < next_output_height(); y++) {
1425 for (size_t x = 0; x < next_output_width(); x++) {
1426 for (size_t c = 0; c < channels(); c++) {
1427 ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], output_max);
1428 ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], output_min);
1429 ASSERT_EQ(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c],
1430 output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]) <<
1431 "in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
1432 }
1433 }
1434 }
1435 }
1436 }
1437 }
1438
1439 private:
1440 uint32_t padding_top_{0};
1441 uint32_t padding_right_{0};
1442 uint32_t padding_bottom_{0};
1443 uint32_t padding_left_{0};
Marat Dukhanbee78252020-02-27 23:52:08 -08001444 bool padding_tf_same_{false};
XNNPACK Teamb455b122019-09-27 18:10:33 -07001445 size_t input_height_{1};
1446 size_t input_width_{1};
1447 size_t channels_{1};
1448 size_t batch_size_{1};
1449 size_t input_pixel_stride_{0};
1450 size_t output_pixel_stride_{0};
1451 uint32_t pooling_height_{1};
1452 uint32_t pooling_width_{1};
1453 uint32_t stride_height_{1};
1454 uint32_t stride_width_{1};
1455 uint32_t dilation_height_{1};
1456 uint32_t dilation_width_{1};
1457 size_t next_input_height_{0};
1458 size_t next_input_width_{0};
1459 size_t next_batch_size_{0};
1460 uint8_t qmin_{0};
1461 uint8_t qmax_{255};
1462 size_t iterations_{1};
1463};