blob: 25e0f4f758c21f7a6af23ba718e8e82721fadf48 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <gtest/gtest.h>
12
13#include <algorithm>
14#include <cassert>
15#include <cstddef>
16#include <cstdlib>
17#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070018#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <random>
20#include <vector>
21
Marat Dukhan16c09122022-02-03 18:43:24 -080022#include <fp16.h>
23
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070025#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070026#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070027
28
29class MaxPoolMicrokernelTester {
30 public:
Marat Dukhan329da642019-11-19 21:44:39 -080031 inline MaxPoolMicrokernelTester& output_pixels(size_t output_pixels) {
32 assert(output_pixels != 0);
33 this->output_pixels_ = output_pixels;
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 return *this;
35 }
36
Marat Dukhan329da642019-11-19 21:44:39 -080037 inline size_t output_pixels() const {
38 return this->output_pixels_;
XNNPACK Teamb455b122019-09-27 18:10:33 -070039 }
40
Marat Dukhan329da642019-11-19 21:44:39 -080041 inline MaxPoolMicrokernelTester& step(size_t step) {
42 assert(step != 0);
43 this->step_ = step;
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 return *this;
45 }
46
Marat Dukhan329da642019-11-19 21:44:39 -080047 inline size_t step() const {
48 return this->step_;
XNNPACK Teamb455b122019-09-27 18:10:33 -070049 }
50
Marat Dukhan329da642019-11-19 21:44:39 -080051 inline MaxPoolMicrokernelTester& input_offset(size_t input_offset) {
52 assert(input_offset != 0);
53 this->input_offset_ = input_offset;
XNNPACK Teamb455b122019-09-27 18:10:33 -070054 return *this;
55 }
56
Marat Dukhan329da642019-11-19 21:44:39 -080057 inline size_t input_offset() const {
58 return this->input_offset_;
XNNPACK Teamb455b122019-09-27 18:10:33 -070059 }
60
Marat Dukhan329da642019-11-19 21:44:39 -080061 inline MaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) {
62 assert(pooling_elements != 0);
63 this->pooling_elements_ = pooling_elements;
XNNPACK Teamb455b122019-09-27 18:10:33 -070064 return *this;
65 }
66
Marat Dukhan329da642019-11-19 21:44:39 -080067 inline size_t pooling_elements() const {
68 return this->pooling_elements_;
XNNPACK Teamb455b122019-09-27 18:10:33 -070069 }
70
Marat Dukhan329da642019-11-19 21:44:39 -080071 inline size_t packed_pooling_elements() const {
72 if (pooling_elements() <= primary_pooling_tile()) {
73 return primary_pooling_tile();
XNNPACK Teamb455b122019-09-27 18:10:33 -070074 } else {
Marat Dukhan329da642019-11-19 21:44:39 -080075 return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile();
XNNPACK Teamb455b122019-09-27 18:10:33 -070076 }
77 }
78
Marat Dukhan329da642019-11-19 21:44:39 -080079 inline MaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) {
80 assert(primary_tile != 0);
81 this->primary_pooling_tile_ = primary_tile;
82 this->incremental_pooling_tile_ = incremental_tile;
XNNPACK Teamb455b122019-09-27 18:10:33 -070083 return *this;
84 }
85
Marat Dukhan329da642019-11-19 21:44:39 -080086 inline MaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) {
87 assert(primary_pooling_tile != 0);
88 this->primary_pooling_tile_ = primary_pooling_tile;
XNNPACK Teamb455b122019-09-27 18:10:33 -070089 return *this;
90 }
91
Marat Dukhan329da642019-11-19 21:44:39 -080092 inline size_t primary_pooling_tile() const {
93 return this->primary_pooling_tile_;
XNNPACK Teamb455b122019-09-27 18:10:33 -070094 }
95
Marat Dukhan329da642019-11-19 21:44:39 -080096 inline MaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) {
97 assert(incremental_pooling_tile != 0);
98 this->incremental_pooling_tile_ = incremental_pooling_tile;
XNNPACK Teamb455b122019-09-27 18:10:33 -070099 return *this;
100 }
101
Marat Dukhan329da642019-11-19 21:44:39 -0800102 inline size_t incremental_pooling_tile() const {
103 return this->incremental_pooling_tile_;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700104 }
105
Marat Dukhan329da642019-11-19 21:44:39 -0800106 inline MaxPoolMicrokernelTester& channels(size_t channels) {
107 assert(channels != 0);
108 this->channels_ = channels;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700109 return *this;
110 }
111
Marat Dukhan329da642019-11-19 21:44:39 -0800112 inline size_t channels() const {
113 return this->channels_;
114 }
115
116 inline MaxPoolMicrokernelTester& output_stride(size_t output_stride) {
117 assert(output_stride != 0);
118 this->output_stride_ = output_stride;
119 return *this;
120 }
121
122 inline size_t output_stride() const {
123 if (this->output_stride_ == 0) {
124 return channels();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 } else {
Marat Dukhan329da642019-11-19 21:44:39 -0800126 assert(this->output_stride_ >= channels());
127 return this->output_stride_;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700128 }
129 }
130
131 inline MaxPoolMicrokernelTester& qmin(uint8_t qmin) {
132 this->qmin_ = qmin;
133 return *this;
134 }
135
136 inline uint8_t qmin() const {
137 return this->qmin_;
138 }
139
140 inline MaxPoolMicrokernelTester& qmax(uint8_t qmax) {
141 this->qmax_ = qmax;
142 return *this;
143 }
144
145 inline uint8_t qmax() const {
146 return this->qmax_;
147 }
148
149 inline MaxPoolMicrokernelTester& iterations(size_t iterations) {
150 this->iterations_ = iterations;
151 return *this;
152 }
153
154 inline size_t iterations() const {
155 return this->iterations_;
156 }
157
Marat Dukhan23147532021-08-16 07:26:56 -0700158 void Test(xnn_s8_maxpool_ukernel_function maxpool, xnn_init_s8_minmax_params_fn init_params) const {
159 std::random_device random_device;
160 auto rng = std::mt19937(random_device());
161 auto i8rng = std::bind(
162 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
163 std::ref(rng));
164
165 std::vector<const int8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
166 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +
167 indirect_input.size() * channels());
168 std::vector<int8_t> output(XNN_EXTRA_BYTES / sizeof(int8_t) +
169 (output_pixels() - 1) * output_stride() + channels());
170 std::vector<int8_t> output_ref(output_pixels() * channels());
171 for (size_t iteration = 0; iteration < iterations(); iteration++) {
172 do {
173 std::generate(input.begin(), input.end(), std::ref(i8rng));
174 } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend()));
175 std::fill(output.begin(), output.end(), 0xA5);
176
177 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
178 indirect_input[i] = input.data() + i * channels() - input_offset();
179 }
180 std::shuffle(indirect_input.begin(),
181 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
182
183 // Prepare parameters.
184 xnn_s8_minmax_params params;
185 init_params(&params, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
186
187 // Compute reference results.
188 for (size_t x = 0; x < output_pixels(); x++) {
189 for (size_t c = 0; c < channels(); c++) {
190 int8_t max_value = std::numeric_limits<int8_t>::min();
191 for (size_t p = 0; p < pooling_elements(); p++) {
192 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]);
193 }
194 max_value = std::min(max_value, int8_t(qmax() - 0x80));
195 max_value = std::max(max_value, int8_t(qmin() - 0x80));
196 output_ref[x * channels() + c] = max_value;
197 }
198 }
199
200 // Call optimized micro-kernel.
201 maxpool(output_pixels(), pooling_elements(), channels(),
202 indirect_input.data(), input_offset() * sizeof(int8_t), output.data(),
203 (step() - packed_pooling_elements()) * sizeof(void*),
204 (output_stride() - channels()) * sizeof(int8_t),
205 &params);
206
207 // Verify results.
208 for (size_t x = 0; x < output_pixels(); x++) {
209 for (size_t c = 0; c < channels(); c++) {
210 ASSERT_GE(int32_t(output[x * output_stride() + c]), int32_t(qmin() - 0x80))
211 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
212 << ", pooling elements = " << pooling_elements() << ", step = " << step()
213 << ", input offset = " << input_offset();
214 ASSERT_LE(int32_t(output[x * output_stride() + c]), int32_t(qmax() - 0x80))
215 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
216 << ", pooling elements = " << pooling_elements() << ", step = " << step()
217 << ", input offset = " << input_offset();
218 ASSERT_EQ(int32_t(output_ref[x * channels() + c]), int32_t(output[x * output_stride() + c]))
219 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
220 << ", pooling elements = " << pooling_elements() << ", step = " << step()
221 << ", input offset = " << input_offset();
222 }
223 }
224 }
225 }
226
Marat Dukhan91ae1652021-08-15 19:19:49 -0700227 void Test(xnn_u8_maxpool_ukernel_function maxpool, xnn_init_u8_minmax_params_fn init_params) const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700228 std::random_device random_device;
229 auto rng = std::mt19937(random_device());
Marat Dukhan5ce30d92020-04-14 03:31:26 -0700230 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700231
Marat Dukhan329da642019-11-19 21:44:39 -0800232 std::vector<const uint8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
233 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
234 indirect_input.size() * channels());
235 std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) +
236 (output_pixels() - 1) * output_stride() + channels());
237 std::vector<uint8_t> output_ref(output_pixels() * channels());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700238 for (size_t iteration = 0; iteration < iterations(); iteration++) {
Marat Dukhan329da642019-11-19 21:44:39 -0800239 do {
240 std::generate(input.begin(), input.end(), std::ref(u8rng));
241 } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend()));
242 std::fill(output.begin(), output.end(), 0xA5);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700243
Marat Dukhan329da642019-11-19 21:44:39 -0800244 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
245 indirect_input[i] = input.data() + i * channels() - input_offset();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700246 }
Marat Dukhan329da642019-11-19 21:44:39 -0800247 std::shuffle(indirect_input.begin(),
248 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700249
Frank Barchard9f3a8432020-06-02 13:59:35 -0700250 // Prepare parameters.
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700251 xnn_u8_minmax_params params;
Marat Dukhan91ae1652021-08-15 19:19:49 -0700252 init_params(&params, qmin(), qmax());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700253
254 // Compute reference results.
Marat Dukhan329da642019-11-19 21:44:39 -0800255 for (size_t x = 0; x < output_pixels(); x++) {
256 for (size_t c = 0; c < channels(); c++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700257 uint8_t max_value = 0;
Marat Dukhan329da642019-11-19 21:44:39 -0800258 for (size_t p = 0; p < pooling_elements(); p++) {
259 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700260 }
261 max_value = std::min(max_value, qmax());
262 max_value = std::max(max_value, qmin());
Marat Dukhan329da642019-11-19 21:44:39 -0800263 output_ref[x * channels() + c] = max_value;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700264 }
265 }
266
267 // Call optimized micro-kernel.
Marat Dukhan329da642019-11-19 21:44:39 -0800268 maxpool(output_pixels(), pooling_elements(), channels(),
269 indirect_input.data(), input_offset() * sizeof(uint8_t), output.data(),
270 (step() - packed_pooling_elements()) * sizeof(void*),
271 (output_stride() - channels()) * sizeof(uint8_t),
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700272 &params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700273
274 // Verify results.
Marat Dukhan329da642019-11-19 21:44:39 -0800275 for (size_t x = 0; x < output_pixels(); x++) {
276 for (size_t c = 0; c < channels(); c++) {
277 ASSERT_GE(uint32_t(output[x * output_stride() + c]), uint32_t(qmin()))
278 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
279 << ", pooling elements = " << pooling_elements() << ", step = " << step()
280 << ", input offset = " << input_offset();
281 ASSERT_LE(uint32_t(output[x * output_stride() + c]), uint32_t(qmax()))
282 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
283 << ", pooling elements = " << pooling_elements() << ", step = " << step()
284 << ", input offset = " << input_offset();
285 ASSERT_EQ(uint32_t(output_ref[x * channels() + c]), uint32_t(output[x * output_stride() + c]))
286 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
287 << ", pooling elements = " << pooling_elements() << ", step = " << step()
288 << ", input offset = " << input_offset();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700289 }
290 }
291 }
292 }
293
Marat Dukhan16c09122022-02-03 18:43:24 -0800294 void Test(xnn_f16_maxpool_ukernel_function maxpool, xnn_init_f16_minmax_params_fn init_params) const {
295 std::random_device random_device;
296 auto rng = std::mt19937(random_device());
297 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
298 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
299
300 std::vector<const uint16_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
301 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +
302 ((output_pixels() - 1) * step() + pooling_elements()) * channels());
303 std::vector<uint16_t> output(XNN_EXTRA_BYTES / sizeof(uint16_t) +
304 (output_pixels() - 1) * output_stride() + channels());
305 std::vector<float> output_ref(output_pixels() * channels());
306 for (size_t iteration = 0; iteration < iterations(); iteration++) {
307 std::generate(input.begin(), input.end(), std::ref(f16rng));
308 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
309
310 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
311 indirect_input[i] = input.data() + i * channels() - input_offset();
312 }
313 std::shuffle(indirect_input.begin(),
314 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
315
316 // Compute reference results, without clamping.
317 for (size_t x = 0; x < output_pixels(); x++) {
318 for (size_t c = 0; c < channels(); c++) {
319 float max_value = -std::numeric_limits<float>::infinity();
320 for (size_t p = 0; p < pooling_elements(); p++) {
321 max_value = std::max(max_value, fp16_ieee_to_fp32_value(indirect_input[x * step() + p][c + input_offset()]));
322 }
323 output_ref[x * channels() + c] = max_value;
324 }
325 }
326
327 // Compute clamping parameters.
328 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
329 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
330 const float accumulated_range = accumulated_max - accumulated_min;
331 float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
332 if (qmin() == std::numeric_limits<uint8_t>::min()) {
333 output_min = -std::numeric_limits<float>::infinity();
334 }
335 float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
336 if (qmax() == std::numeric_limits<uint8_t>::max()) {
337 output_max = +std::numeric_limits<float>::infinity();
338 }
339 output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min));
340 output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max));
341
342 // Prepare parameters.
343 xnn_f16_minmax_params params;
344 init_params(&params, fp16_ieee_from_fp32_value(output_min), fp16_ieee_from_fp32_value(output_max));
345
346 // Clamp reference results.
347 for (float& output_value : output_ref) {
348 output_value = std::max(std::min(output_value, output_max), output_min);
349 }
350
351 // Call optimized micro-kernel.
352 maxpool(output_pixels(), pooling_elements(), channels(),
353 reinterpret_cast<const void**>(indirect_input.data()), input_offset() * sizeof(uint16_t), output.data(),
354 (step() - packed_pooling_elements()) * sizeof(void*),
355 (output_stride() - channels()) * sizeof(uint16_t),
356 &params);
357
358 // Verify results.
359 for (size_t x = 0; x < output_pixels(); x++) {
360 for (size_t c = 0; c < channels(); c++) {
361 ASSERT_GE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_min)
362 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
363 << ", pooling elements = " << pooling_elements() << ", step = " << step()
364 << ", input offset = " << input_offset();
365 ASSERT_LE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_max)
366 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
367 << ", pooling elements = " << pooling_elements() << ", step = " << step()
368 << ", input offset = " << input_offset();
369 ASSERT_EQ(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_ref[x * channels() + c])
370 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
371 << ", pooling elements = " << pooling_elements() << ", step = " << step()
372 << ", input offset = " << input_offset();
373 }
374 }
375 }
376 }
377
Marat Dukhan91ae1652021-08-15 19:19:49 -0700378 void Test(xnn_f32_maxpool_ukernel_function maxpool, xnn_init_f32_minmax_params_fn init_params) const {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700379 std::random_device random_device;
380 auto rng = std::mt19937(random_device());
Marat Dukhan16c09122022-02-03 18:43:24 -0800381 auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700382
Marat Dukhan329da642019-11-19 21:44:39 -0800383 std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
384 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
385 ((output_pixels() - 1) * step() + pooling_elements()) * channels());
386 std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) +
387 (output_pixels() - 1) * output_stride() + channels());
388 std::vector<float> output_ref(output_pixels() * channels());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700389 for (size_t iteration = 0; iteration < iterations(); iteration++) {
Marat Dukhan329da642019-11-19 21:44:39 -0800390 std::generate(input.begin(), input.end(), std::ref(f32rng));
391 std::fill(output.begin(), output.end(), nanf(""));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700392
Marat Dukhan329da642019-11-19 21:44:39 -0800393 for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
394 indirect_input[i] = input.data() + i * channels() - input_offset();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700395 }
Marat Dukhan329da642019-11-19 21:44:39 -0800396 std::shuffle(indirect_input.begin(),
397 indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700398
399 // Compute reference results, without clamping.
Marat Dukhan329da642019-11-19 21:44:39 -0800400 for (size_t x = 0; x < output_pixels(); x++) {
401 for (size_t c = 0; c < channels(); c++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700402 float max_value = -std::numeric_limits<float>::infinity();
Marat Dukhan329da642019-11-19 21:44:39 -0800403 for (size_t p = 0; p < pooling_elements(); p++) {
404 max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700405 }
Marat Dukhan329da642019-11-19 21:44:39 -0800406 output_ref[x * channels() + c] = max_value;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700407 }
408 }
409
410 // Compute clamping parameters.
Marat Dukhan329da642019-11-19 21:44:39 -0800411 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
412 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700413 const float accumulated_range = accumulated_max - accumulated_min;
Marat Dukhan329da642019-11-19 21:44:39 -0800414 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
415 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700416
417
Frank Barchard9f3a8432020-06-02 13:59:35 -0700418 // Prepare parameters.
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700419 xnn_f32_minmax_params params;
Marat Dukhan91ae1652021-08-15 19:19:49 -0700420 init_params(&params, output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700421
422 // Clamp reference results.
Marat Dukhan329da642019-11-19 21:44:39 -0800423 for (float& output_value : output_ref) {
424 output_value = std::max(std::min(output_value, output_max), output_min);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700425 }
426
427 // Call optimized micro-kernel.
Marat Dukhan329da642019-11-19 21:44:39 -0800428 maxpool(output_pixels(), pooling_elements(), channels(),
429 indirect_input.data(), input_offset() * sizeof(float), output.data(),
430 (step() - packed_pooling_elements()) * sizeof(void*),
431 (output_stride() - channels()) * sizeof(float),
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700432 &params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700433
434 // Verify results.
Marat Dukhan329da642019-11-19 21:44:39 -0800435 for (size_t x = 0; x < output_pixels(); x++) {
436 for (size_t c = 0; c < channels(); c++) {
437 ASSERT_GE(output[x * output_stride() + c], output_min)
438 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
439 << ", pooling elements = " << pooling_elements() << ", step = " << step()
440 << ", input offset = " << input_offset();
441 ASSERT_LE(output[x * output_stride() + c], output_max)
442 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
443 << ", pooling elements = " << pooling_elements() << ", step = " << step()
444 << ", input offset = " << input_offset();
445 ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c])
446 << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
447 << ", pooling elements = " << pooling_elements() << ", step = " << step()
448 << ", input offset = " << input_offset();
XNNPACK Teamb455b122019-09-27 18:10:33 -0700449 }
450 }
451 }
452 }
453
454 private:
Marat Dukhan329da642019-11-19 21:44:39 -0800455 size_t output_pixels_{1};
456 size_t pooling_elements_{1};
457 size_t channels_{1};
458 size_t input_offset_{0};
459 size_t step_{1};
460 size_t primary_pooling_tile_{1};
461 size_t incremental_pooling_tile_{1};
462 size_t output_stride_{0};
XNNPACK Teamb455b122019-09-27 18:10:33 -0700463 uint8_t qmin_{0};
464 uint8_t qmax_{255};
Marat Dukhan329da642019-11-19 21:44:39 -0800465 size_t iterations_{3};
XNNPACK Teamb455b122019-09-27 18:10:33 -0700466};