Blame - test/max-pooling-operator-tester.h - platform/external/XNNPACK

blob: 106150603294ca459b25de2d6859aa8359f685e8 [file] [log] [blame]

XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1	// Copyright (c) Facebook, Inc. and its affiliates.
				2	// All rights reserved.
				3	//
				4	// Copyright 2019 Google LLC
				5	//
				6	// This source code is licensed under the BSD-style license found in the
				7	// LICENSE file in the root directory of this source tree.
				8
				9	#pragma once
				10
				11	#include <gtest/gtest.h>
				12
Marat Dukhan	5756a92	2022-02-04 01:55:53 -0800	[diff] [blame]	13	#include <fp16.h>
				14
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	15	#include <algorithm>
				16	#include <cassert>
				17	#include <cstddef>
				18	#include <cstdlib>
				19	#include <functional>
				20	#include <limits>
				21	#include <random>
				22	#include <vector>
				23
				24	#include <xnnpack.h>
				25
				26
				27	class MaxPoolingOperatorTester {
				28	public:
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	29	inline MaxPoolingOperatorTester& padding_tf_same(bool padding_same) {
				30	if (padding_same) {
				31	assert(padding_top() == 0);
				32	assert(padding_left() == 0);
				33	assert(padding_bottom() == 0);
				34	assert(padding_right() == 0);
				35	}
				36	this->padding_tf_same_ = padding_same;
				37	return *this;
				38	}
				39
				40	inline bool padding_tf_same() const {
				41	return this->padding_tf_same_;
				42	}
				43
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	44	inline MaxPoolingOperatorTester& padding(uint32_t padding) {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	45	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	46	this->padding_top_ = padding;
				47	this->padding_right_ = padding;
				48	this->padding_bottom_ = padding;
				49	this->padding_left_ = padding;
				50	return *this;
				51	}
				52
				53	inline MaxPoolingOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	54	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	55	this->padding_top_ = padding_height;
				56	this->padding_right_ = padding_width;
				57	this->padding_bottom_ = padding_height;
				58	this->padding_left_ = padding_width;
				59	return *this;
				60	}
				61
				62	inline MaxPoolingOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	63	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	64	this->padding_top_ = padding_height;
				65	this->padding_bottom_ = padding_height;
				66	return *this;
				67	}
				68
				69	inline MaxPoolingOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	70	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	71	this->padding_right_ = padding_width;
				72	this->padding_left_ = padding_width;
				73	return *this;
				74	}
				75
				76	inline MaxPoolingOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	77	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	78	this->padding_top_ = padding_top;
				79	return *this;
				80	}
				81
				82	inline uint32_t padding_top() const {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	83	if (padding_tf_same()) {
				84	const uint32_t total_padding_height =
				85	(output_height() - 1) * stride_height() + dilated_pooling_height() - input_height();
				86	return total_padding_height / 2;
				87	} else {
				88	return this->padding_top_;
				89	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	90	}
				91
				92	inline MaxPoolingOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	93	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	94	this->padding_left_ = padding_left;
				95	return *this;
				96	}
				97
				98	inline uint32_t padding_left() const {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	99	if (padding_tf_same()) {
				100	const uint32_t total_padding_width =
				101	(output_width() - 1) * stride_width() + dilated_pooling_width() - input_width();
				102	return total_padding_width / 2;
				103	} else {
				104	return this->padding_left_;
				105	}
				106	}
				107
				108	inline MaxPoolingOperatorTester& padding_bottom(uint32_t padding_bottom) {
				109	assert(!padding_tf_same());
				110	this->padding_bottom_ = padding_bottom;
				111	return *this;
				112	}
				113
				114	inline uint32_t padding_bottom() const {
				115	if (padding_tf_same()) {
				116	const uint32_t total_padding_height =
				117	(output_height() - 1) * stride_height() + dilated_pooling_height() - input_height();
				118	return total_padding_height - total_padding_height / 2;
				119	} else {
				120	return this->padding_bottom_;
				121	}
				122	}
				123
				124	inline MaxPoolingOperatorTester& padding_right(uint32_t padding_right) {
				125	assert(!padding_tf_same());
				126	this->padding_right_ = padding_right;
				127	return *this;
				128	}
				129
				130	inline uint32_t padding_right() const {
				131	if (padding_tf_same()) {
				132	const uint32_t total_padding_width =
				133	(output_width() - 1) * stride_width() + dilated_pooling_width() - input_width();
				134	return total_padding_width - total_padding_width / 2;
				135	} else {
				136	return this->padding_right_;
				137	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	138	}
				139
				140	inline MaxPoolingOperatorTester& input_size(size_t input_height, size_t input_width) {
				141	assert(input_height >= 1);
				142	assert(input_width >= 1);
				143	this->input_height_ = input_height;
				144	this->input_width_ = input_width;
				145	return *this;
				146	}
				147
				148	inline MaxPoolingOperatorTester& input_height(size_t input_height) {
				149	assert(input_height >= 1);
				150	this->input_height_ = input_height;
				151	return *this;
				152	}
				153
				154	inline size_t input_height() const {
				155	return this->input_height_;
				156	}
				157
				158	inline MaxPoolingOperatorTester& input_width(size_t input_width) {
				159	assert(input_width >= 1);
				160	this->input_width_ = input_width;
				161	return *this;
				162	}
				163
				164	inline size_t input_width() const {
				165	return this->input_width_;
				166	}
				167
				168	inline MaxPoolingOperatorTester& channels(size_t channels) {
				169	assert(channels != 0);
				170	this->channels_ = channels;
				171	return *this;
				172	}
				173
				174	inline size_t channels() const {
				175	return this->channels_;
				176	}
				177
				178	inline MaxPoolingOperatorTester& batch_size(size_t batch_size) {
				179	assert(batch_size != 0);
				180	this->batch_size_ = batch_size;
				181	return *this;
				182	}
				183
				184	inline size_t batch_size() const {
				185	return this->batch_size_;
				186	}
				187
				188	inline MaxPoolingOperatorTester& pooling_size(uint32_t pooling_size) {
				189	assert(pooling_size >= 1);
				190	this->pooling_height_ = pooling_size;
				191	this->pooling_width_ = pooling_size;
				192	return *this;
				193	}
				194
				195	inline MaxPoolingOperatorTester& pooling_size(uint32_t pooling_height, uint32_t pooling_width) {
				196	assert(pooling_height >= 1);
				197	assert(pooling_width >= 1);
				198	this->pooling_height_ = pooling_height;
				199	this->pooling_width_ = pooling_width;
				200	return *this;
				201	}
				202
				203	inline MaxPoolingOperatorTester& pooling_height(uint32_t pooling_height) {
				204	assert(pooling_height >= 1);
				205	this->pooling_height_ = pooling_height;
				206	return *this;
				207	}
				208
				209	inline uint32_t pooling_height() const {
				210	return this->pooling_height_;
				211	}
				212
				213	inline MaxPoolingOperatorTester& pooling_width(uint32_t pooling_width) {
				214	assert(pooling_width >= 1);
				215	this->pooling_width_ = pooling_width;
				216	return *this;
				217	}
				218
				219	inline uint32_t pooling_width() const {
				220	return this->pooling_width_;
				221	}
				222
				223	inline MaxPoolingOperatorTester& stride(uint32_t stride) {
				224	assert(stride >= 1);
				225	this->stride_height_ = stride;
				226	this->stride_width_ = stride;
				227	return *this;
				228	}
				229
				230	inline MaxPoolingOperatorTester& stride(uint32_t stride_height, uint32_t stride_width) {
				231	assert(stride_height >= 1);
				232	assert(stride_width >= 1);
				233	this->stride_height_ = stride_height;
				234	this->stride_width_ = stride_width;
				235	return *this;
				236	}
				237
				238	inline MaxPoolingOperatorTester& stride_height(uint32_t stride_height) {
				239	assert(stride_height >= 1);
				240	this->stride_height_ = stride_height;
				241	return *this;
				242	}
				243
				244	inline uint32_t stride_height() const {
				245	return this->stride_height_;
				246	}
				247
				248	inline MaxPoolingOperatorTester& stride_width(uint32_t stride_width) {
				249	assert(stride_width >= 1);
				250	this->stride_width_ = stride_width;
				251	return *this;
				252	}
				253
				254	inline uint32_t stride_width() const {
				255	return this->stride_width_;
				256	}
				257
				258	inline MaxPoolingOperatorTester& dilation(uint32_t dilation) {
				259	assert(dilation >= 1);
				260	this->dilation_height_ = dilation;
				261	this->dilation_width_ = dilation;
				262	return *this;
				263	}
				264
				265	inline MaxPoolingOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
				266	assert(dilation_height >= 1);
				267	assert(dilation_width >= 1);
				268	this->dilation_height_ = dilation_height;
				269	this->dilation_width_ = dilation_width;
				270	return *this;
				271	}
				272
				273	inline MaxPoolingOperatorTester& dilation_height(uint32_t dilation_height) {
				274	assert(dilation_height >= 1);
				275	this->dilation_height_ = dilation_height;
				276	return *this;
				277	}
				278
				279	inline uint32_t dilation_height() const {
				280	return this->dilation_height_;
				281	}
				282
				283	inline MaxPoolingOperatorTester& dilation_width(uint32_t dilation_width) {
				284	assert(dilation_width >= 1);
				285	this->dilation_width_ = dilation_width;
				286	return *this;
				287	}
				288
				289	inline uint32_t dilation_width() const {
				290	return this->dilation_width_;
				291	}
				292
				293	inline uint32_t dilated_pooling_height() const {
				294	return (pooling_height() - 1) * dilation_height() + 1;
				295	}
				296
				297	inline uint32_t dilated_pooling_width() const {
				298	return (pooling_width() - 1) * dilation_width() + 1;
				299	}
				300
				301	inline size_t output_height() const {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	302	if (padding_tf_same()) {
				303	return (input_height() + stride_height() - 1) / stride_height();
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	304	} else {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	305	const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
				306	if (padded_input_height <= dilated_pooling_height()) {
				307	return 1;
				308	} else {
				309	return (padded_input_height - dilated_pooling_height()) / stride_height() + 1;
				310	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	311	}
				312	}
				313
				314	inline size_t output_width() const {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	315	if (padding_tf_same()) {
				316	return (input_width() + stride_width() - 1) / stride_width();
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	317	} else {
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	318	const size_t padded_input_width = padding_left() + input_width() + padding_right();
				319	if (padded_input_width <= dilated_pooling_width()) {
				320	return 1;
				321	} else {
				322	return (padded_input_width - dilated_pooling_width()) / stride_width() + 1;
				323	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	324	}
				325	}
				326
				327	inline MaxPoolingOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
				328	assert(input_pixel_stride != 0);
				329	this->input_pixel_stride_ = input_pixel_stride;
				330	return *this;
				331	}
				332
				333	inline size_t input_pixel_stride() const {
				334	if (this->input_pixel_stride_ == 0) {
				335	return channels();
				336	} else {
				337	assert(this->input_pixel_stride_ >= channels());
				338	return this->input_pixel_stride_;
				339	}
				340	}
				341
				342	inline MaxPoolingOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
				343	assert(output_pixel_stride != 0);
				344	this->output_pixel_stride_ = output_pixel_stride;
				345	return *this;
				346	}
				347
				348	inline size_t output_pixel_stride() const {
				349	if (this->output_pixel_stride_ == 0) {
				350	return channels();
				351	} else {
				352	assert(this->output_pixel_stride_ >= channels());
				353	return this->output_pixel_stride_;
				354	}
				355	}
				356
				357	inline MaxPoolingOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
				358	assert(next_input_height >= 1);
				359	assert(next_input_width >= 1);
				360	this->next_input_height_ = next_input_height;
				361	this->next_input_width_ = next_input_width;
				362	return *this;
				363	}
				364
				365	inline MaxPoolingOperatorTester& next_input_height(uint32_t next_input_height) {
				366	assert(next_input_height >= 1);
				367	this->next_input_height_ = next_input_height;
				368	return *this;
				369	}
				370
				371	inline uint32_t next_input_height() const {
				372	if (this->next_input_height_ == 0) {
				373	return input_height();
				374	} else {
				375	return this->next_input_height_;
				376	}
				377	}
				378
				379	inline MaxPoolingOperatorTester& next_input_width(uint32_t next_input_width) {
				380	assert(next_input_width >= 1);
				381	this->next_input_width_ = next_input_width;
				382	return *this;
				383	}
				384
				385	inline uint32_t next_input_width() const {
				386	if (this->next_input_width_ == 0) {
				387	return input_width();
				388	} else {
				389	return this->next_input_width_;
				390	}
				391	}
				392
				393	inline size_t next_output_height() const {
				394	const size_t padded_next_input_height = padding_top() + next_input_height() + padding_bottom();
				395	if (padded_next_input_height <= dilated_pooling_height()) {
				396	return 1;
				397	} else {
				398	return (padded_next_input_height - dilated_pooling_height()) / stride_height() + 1;
				399	}
				400	}
				401
				402	inline size_t next_output_width() const {
				403	const size_t padded_next_input_width = padding_left() + next_input_width() + padding_right();
				404	if (padded_next_input_width <= dilated_pooling_width()) {
				405	return 1;
				406	} else {
				407	return (padded_next_input_width - dilated_pooling_width()) / stride_width() + 1;
				408	}
				409	}
				410
				411	inline MaxPoolingOperatorTester& next_batch_size(size_t next_batch_size) {
				412	assert(next_batch_size >= 1);
				413	this->next_batch_size_ = next_batch_size;
				414	return *this;
				415	}
				416
				417	inline size_t next_batch_size() const {
				418	if (this->next_batch_size_ == 0) {
				419	return batch_size();
				420	} else {
				421	return this->next_batch_size_;
				422	}
				423	}
				424
				425	inline MaxPoolingOperatorTester& qmin(uint8_t qmin) {
				426	this->qmin_ = qmin;
				427	return *this;
				428	}
				429
				430	inline uint8_t qmin() const {
				431	return this->qmin_;
				432	}
				433
				434	inline MaxPoolingOperatorTester& qmax(uint8_t qmax) {
				435	this->qmax_ = qmax;
				436	return *this;
				437	}
				438
				439	inline uint8_t qmax() const {
				440	return this->qmax_;
				441	}
				442
				443	inline MaxPoolingOperatorTester& iterations(size_t iterations) {
				444	this->iterations_ = iterations;
				445	return *this;
				446	}
				447
				448	inline size_t iterations() const {
				449	return this->iterations_;
				450	}
				451
Marat Dukhan	dc5c148	2021-08-16 09:03:15 -0700	[diff] [blame]	452	void TestS8() const {
				453	std::random_device random_device;
				454	auto rng = std::mt19937(random_device());
				455	auto i8rng = std::bind(
				456	std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
				457	std::ref(rng));
				458
				459	std::vector<int8_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
				460	std::vector<int8_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
				461	std::vector<int8_t> output_ref(batch_size() * output_height() * output_width() * channels());
				462	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				463	std::generate(input.begin(), input.end(), std::ref(i8rng));
				464	std::fill(output.begin(), output.end(), 0xA5);
				465
				466	// Compute reference results.
				467	for (size_t i = 0; i < batch_size(); i++) {
				468	for (size_t oy = 0; oy < output_height(); oy++) {
				469	for (size_t ox = 0; ox < output_width(); ox++) {
				470	for (size_t c = 0; c < channels(); c++) {
				471	int8_t max_value = std::numeric_limits<int8_t>::min();
				472	for (size_t py = 0; py < pooling_height(); py++) {
				473	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				474	for (size_t px = 0; px < pooling_width(); px++) {
				475	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				476	if (ix < input_width() && iy < input_height()) {
				477	max_value = std::max(max_value,
				478	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
				479	}
				480	}
				481	}
				482	max_value = std::min(max_value, int8_t(qmax() - 0x80));
				483	max_value = std::max(max_value, int8_t(qmin() - 0x80));
				484	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				485	}
				486	}
				487	}
				488	}
				489
				490	// Create, setup, run, and destroy Max Pooling operator.
				491	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
				492	xnn_operator_t max_pooling_op = nullptr;
				493
				494	ASSERT_EQ(xnn_status_success,
				495	xnn_create_max_pooling2d_nhwc_s8(
				496	padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
				497	padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
				498	pooling_height(), pooling_width(),
				499	stride_height(), stride_width(),
				500	dilation_height(), dilation_width(),
				501	channels(), input_pixel_stride(), output_pixel_stride(),
				502	int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),
				503	padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
				504	&max_pooling_op));
				505	ASSERT_NE(nullptr, max_pooling_op);
				506
				507	// Smart pointer to automatically delete max_pooling_op.
				508	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				509
				510	ASSERT_EQ(xnn_status_success,
				511	xnn_setup_max_pooling2d_nhwc_s8(
				512	max_pooling_op,
				513	batch_size(), input_height(), input_width(),
				514	input.data(), output.data(),
				515	nullptr /* thread pool */));
				516
				517	ASSERT_EQ(xnn_status_success,
				518	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				519
				520	// Verify results.
				521	for (size_t i = 0; i < batch_size(); i++) {
				522	for (size_t y = 0; y < output_height(); y++) {
				523	for (size_t x = 0; x < output_width(); x++) {
				524	for (size_t c = 0; c < channels(); c++) {
				525	ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmax() - 0x80));
				526	ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmin() - 0x80));
				527	ASSERT_EQ(int32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
				528	int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
				529	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				530	}
				531	}
				532	}
				533	}
				534	}
				535	}
				536
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	537	void TestU8() const {
				538	std::random_device random_device;
				539	auto rng = std::mt19937(random_device());
Marat Dukhan	5ce30d9	2020-04-14 03:31:26 -0700	[diff] [blame]	540	auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	541
				542	std::vector<uint8_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
				543	std::vector<uint8_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
				544	std::vector<uint8_t> output_ref(batch_size() * output_height() * output_width() * channels());
				545	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				546	std::generate(input.begin(), input.end(), std::ref(u8rng));
				547	std::fill(output.begin(), output.end(), 0xA5);
				548
				549	// Compute reference results.
				550	for (size_t i = 0; i < batch_size(); i++) {
				551	for (size_t oy = 0; oy < output_height(); oy++) {
				552	for (size_t ox = 0; ox < output_width(); ox++) {
				553	for (size_t c = 0; c < channels(); c++) {
				554	uint8_t max_value = 0;
				555	for (size_t py = 0; py < pooling_height(); py++) {
				556	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				557	for (size_t px = 0; px < pooling_width(); px++) {
				558	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
Marat Dukhan	e0df831	2019-10-22 18:16:56 -0700	[diff] [blame]	559	if (ix < input_width() && iy < input_height()) {
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	560	max_value = std::max(max_value,
				561	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
				562	}
				563	}
				564	}
				565	max_value = std::min(max_value, qmax());
				566	max_value = std::max(max_value, qmin());
				567	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				568	}
				569	}
				570	}
				571	}
				572
				573	// Create, setup, run, and destroy Max Pooling operator.
Marat Dukhan	04f03be	2019-11-19 12:36:47 -0800	[diff] [blame]	574	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	575	xnn_operator_t max_pooling_op = nullptr;
				576
				577	ASSERT_EQ(xnn_status_success,
				578	xnn_create_max_pooling2d_nhwc_u8(
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	579	padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
				580	padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	581	pooling_height(), pooling_width(),
				582	stride_height(), stride_width(),
				583	dilation_height(), dilation_width(),
				584	channels(), input_pixel_stride(), output_pixel_stride(),
				585	qmin(), qmax(),
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	586	padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
				587	&max_pooling_op));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	588	ASSERT_NE(nullptr, max_pooling_op);
				589
				590	// Smart pointer to automatically delete max_pooling_op.
				591	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				592
				593	ASSERT_EQ(xnn_status_success,
				594	xnn_setup_max_pooling2d_nhwc_u8(
				595	max_pooling_op,
				596	batch_size(), input_height(), input_width(),
				597	input.data(), output.data(),
				598	nullptr /* thread pool */));
				599
				600	ASSERT_EQ(xnn_status_success,
				601	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				602
				603	// Verify results.
				604	for (size_t i = 0; i < batch_size(); i++) {
				605	for (size_t y = 0; y < output_height(); y++) {
				606	for (size_t x = 0; x < output_width(); x++) {
				607	for (size_t c = 0; c < channels(); c++) {
				608	ASSERT_LE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax()));
				609	ASSERT_GE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin()));
				610	ASSERT_EQ(uint32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
				611	uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
				612	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				613	}
				614	}
				615	}
				616	}
				617	}
				618	}
				619
Marat Dukhan	5756a92	2022-02-04 01:55:53 -0800	[diff] [blame]	620	void TestF16() const {
				621	std::random_device random_device;
				622	auto rng = std::mt19937(random_device());
				623	// Note: we need to avoid FP16 denormals in the generated tensor because they might be processed differently in
				624	// native vs emulated arithmetics, and we use exact comparison to verify the results against reference.
				625	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.001f, 1.0f), rng);
				626	auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
				627
				628	std::vector<uint16_t> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
				629	std::vector<uint16_t> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
				630	std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
				631	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				632	std::generate(input.begin(), input.end(), std::ref(f16rng));
				633	std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
				634
				635	// Compute reference results, without clamping.
				636	for (size_t i = 0; i < batch_size(); i++) {
				637	for (size_t oy = 0; oy < output_height(); oy++) {
				638	for (size_t ox = 0; ox < output_width(); ox++) {
				639	for (size_t c = 0; c < channels(); c++) {
				640	float max_value = -std::numeric_limits<float>::infinity();
				641	for (size_t py = 0; py < pooling_height(); py++) {
				642	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				643	for (size_t px = 0; px < pooling_width(); px++) {
				644	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				645	if (ix < input_width() && iy < input_height()) {
				646	max_value = std::max(max_value,
				647	fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]));
				648	}
				649	}
				650	}
				651	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				652	}
				653	}
				654	}
				655	}
				656
				657	// Compute clamping parameters.
				658	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				659	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				660	const float accumulated_range = accumulated_max - accumulated_min;
				661	float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());
				662	float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
				663	output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min));
				664	output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max));
				665	if (accumulated_range == 0.0f) {
				666	output_min = -std::numeric_limits<float>::infinity();
				667	output_max = +std::numeric_limits<float>::infinity();
				668	}
				669	if (qmin() == std::numeric_limits<uint8_t>::min()) {
				670	output_min = -std::numeric_limits<float>::infinity();
				671	}
				672	if (qmax() == std::numeric_limits<uint8_t>::max()) {
				673	output_max = +std::numeric_limits<float>::infinity();
				674	}
				675
				676	// Clamp reference results.
				677	for (float& value : output_ref) {
				678	value = std::max(std::min(value, output_max), output_min);
				679	}
				680
				681	// Create, setup, run, and destroy Max Pooling operator.
				682	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
				683	xnn_operator_t max_pooling_op = nullptr;
				684
				685	const xnn_status status = xnn_create_max_pooling2d_nhwc_f16(
				686	padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
				687	padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
				688	pooling_height(), pooling_width(),
				689	stride_height(), stride_width(),
				690	dilation_height(), dilation_width(),
				691	channels(), input_pixel_stride(), output_pixel_stride(),
				692	output_min, output_max,
				693	padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
				694	&max_pooling_op);
				695	if (status == xnn_status_unsupported_hardware) {
				696	GTEST_SKIP();
				697	}
				698	ASSERT_EQ(xnn_status_success, status);
				699	ASSERT_NE(nullptr, max_pooling_op);
				700
				701	// Smart pointer to automatically delete max_pooling_op.
				702	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				703
				704	ASSERT_EQ(xnn_status_success,
				705	xnn_setup_max_pooling2d_nhwc_f16(
				706	max_pooling_op,
				707	batch_size(), input_height(), input_width(),
				708	input.data(), output.data(),
				709	nullptr /* thread pool */));
				710
				711	ASSERT_EQ(xnn_status_success,
				712	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				713
				714	// Verify results.
				715	for (size_t i = 0; i < batch_size(); i++) {
				716	for (size_t y = 0; y < output_height(); y++) {
				717	for (size_t x = 0; x < output_width(); x++) {
				718	for (size_t c = 0; c < channels(); c++) {
				719	ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_max);
				720	ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_min);
				721	ASSERT_EQ(
				722	fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]),
				723	output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) <<
				724	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
				725	<< ", min = " << output_min << ", max = " << output_max;
				726	}
				727	}
				728	}
				729	}
				730	}
				731	}
				732
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	733	void TestF32() const {
				734	std::random_device random_device;
				735	auto rng = std::mt19937(random_device());
				736	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
				737
				738	std::vector<float> input((batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
				739	std::vector<float> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
				740	std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
				741	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				742	std::generate(input.begin(), input.end(), std::ref(f32rng));
				743	std::fill(output.begin(), output.end(), nanf(""));
				744
				745	// Compute reference results, without clamping.
				746	for (size_t i = 0; i < batch_size(); i++) {
				747	for (size_t oy = 0; oy < output_height(); oy++) {
				748	for (size_t ox = 0; ox < output_width(); ox++) {
				749	for (size_t c = 0; c < channels(); c++) {
				750	float max_value = -std::numeric_limits<float>::infinity();
				751	for (size_t py = 0; py < pooling_height(); py++) {
				752	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				753	for (size_t px = 0; px < pooling_width(); px++) {
				754	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				755	if (ix < input_width() && iy < input_height()) {
				756	max_value = std::max(max_value,
				757	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
				758	}
				759	}
				760	}
				761	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				762	}
				763	}
				764	}
				765	}
				766
				767	// Compute clamping parameters.
				768	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				769	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				770	const float accumulated_range = accumulated_max - accumulated_min;
				771	const float output_min = accumulated_range == 0.0f ?
				772	-std::numeric_limits<float>::infinity() :
				773	accumulated_min + accumulated_range / 255.0f * float(qmin());
				774	const float output_max = accumulated_range == 0.0f ?
				775	+std::numeric_limits<float>::infinity() :
				776	accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
				777
				778	// Clamp reference results.
				779	for (float& value : output_ref) {
				780	value = std::max(std::min(value, output_max), output_min);
				781	}
				782
				783	// Create, setup, run, and destroy Max Pooling operator.
Marat Dukhan	04f03be	2019-11-19 12:36:47 -0800	[diff] [blame]	784	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	785	xnn_operator_t max_pooling_op = nullptr;
				786
				787	ASSERT_EQ(xnn_status_success,
				788	xnn_create_max_pooling2d_nhwc_f32(
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	789	padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
				790	padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	791	pooling_height(), pooling_width(),
				792	stride_height(), stride_width(),
				793	dilation_height(), dilation_width(),
				794	channels(), input_pixel_stride(), output_pixel_stride(),
				795	output_min, output_max,
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	796	padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,
				797	&max_pooling_op));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	798	ASSERT_NE(nullptr, max_pooling_op);
				799
				800	// Smart pointer to automatically delete max_pooling_op.
				801	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				802
				803	ASSERT_EQ(xnn_status_success,
				804	xnn_setup_max_pooling2d_nhwc_f32(
				805	max_pooling_op,
				806	batch_size(), input_height(), input_width(),
				807	input.data(), output.data(),
				808	nullptr /* thread pool */));
				809
				810	ASSERT_EQ(xnn_status_success,
				811	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				812
				813	// Verify results.
				814	for (size_t i = 0; i < batch_size(); i++) {
				815	for (size_t y = 0; y < output_height(); y++) {
				816	for (size_t x = 0; x < output_width(); x++) {
				817	for (size_t c = 0; c < channels(); c++) {
				818	ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max);
				819	ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min);
				820	ASSERT_EQ(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c],
				821	output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]) <<
				822	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
				823	<< ", min = " << output_min << ", max = " << output_max;
				824	}
				825	}
				826	}
				827	}
				828	}
				829	}
				830
Marat Dukhan	dc5c148	2021-08-16 09:03:15 -0700	[diff] [blame]	831	void TestSetupS8() const {
				832	std::random_device random_device;
				833	auto rng = std::mt19937(random_device());
				834	auto i8rng = std::bind(
				835	std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
				836	std::ref(rng));
				837
				838	std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(
				839	(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
				840	(next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
				841	std::vector<int8_t> output(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(
				842	(batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
				843	(next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
				844	std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
				845	std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
				846	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				847	std::generate(input.begin(), input.end(), std::ref(i8rng));
				848	std::fill(output.begin(), output.end(), 0xA5);
				849
				850	// Compute reference results.
				851	for (size_t i = 0; i < batch_size(); i++) {
				852	for (size_t oy = 0; oy < output_height(); oy++) {
				853	for (size_t ox = 0; ox < output_width(); ox++) {
				854	for (size_t c = 0; c < channels(); c++) {
				855	int8_t max_value = std::numeric_limits<int8_t>::min();
				856	for (size_t py = 0; py < pooling_height(); py++) {
				857	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				858	for (size_t px = 0; px < pooling_width(); px++) {
				859	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				860	if (ix < input_width() && iy < input_height()) {
				861	max_value = std::max(max_value,
				862	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
				863	}
				864	}
				865	}
				866	max_value = std::min(max_value, int8_t(qmax() - 0x80));
				867	max_value = std::max(max_value, int8_t(qmin() - 0x80));
				868	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				869	}
				870	}
				871	}
				872	}
				873
				874	// Create, setup, and run Max Pooling operator once.
				875	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
				876	xnn_operator_t max_pooling_op = nullptr;
				877
				878	ASSERT_EQ(xnn_status_success,
				879	xnn_create_max_pooling2d_nhwc_s8(
				880	padding_top(), padding_right(), padding_bottom(), padding_left(),
				881	pooling_height(), pooling_width(),
				882	stride_height(), stride_width(),
				883	dilation_height(), dilation_width(),
				884	channels(), input_pixel_stride(), output_pixel_stride(),
				885	int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),
				886	0, &max_pooling_op));
				887	ASSERT_NE(nullptr, max_pooling_op);
				888
				889	// Smart pointer to automatically delete max_pooling_op.
				890	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				891
				892	ASSERT_EQ(xnn_status_success,
				893	xnn_setup_max_pooling2d_nhwc_s8(
				894	max_pooling_op,
				895	batch_size(), input_height(), input_width(),
				896	input.data(), output.data(),
				897	nullptr /* thread pool */));
				898
				899	ASSERT_EQ(xnn_status_success,
				900	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				901
				902	// Verify results of the first run.
				903	for (size_t i = 0; i < batch_size(); i++) {
				904	for (size_t y = 0; y < output_height(); y++) {
				905	for (size_t x = 0; x < output_width(); x++) {
				906	for (size_t c = 0; c < channels(); c++) {
				907	ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmax() - 0x80));
				908	ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), int32_t(qmin() - 0x80));
				909	ASSERT_EQ(int32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
				910	int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
				911	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				912	}
				913	}
				914	}
				915	}
				916
				917	// Re-generate data for the second run.
				918	std::generate(input.begin(), input.end(), std::ref(i8rng));
				919	std::fill(output.begin(), output.end(), 0xA5);
				920
				921	// Compute reference results for the second run.
				922	for (size_t i = 0; i < next_batch_size(); i++) {
				923	for (size_t oy = 0; oy < next_output_height(); oy++) {
				924	for (size_t ox = 0; ox < next_output_width(); ox++) {
				925	for (size_t c = 0; c < channels(); c++) {
				926	int8_t max_value = std::numeric_limits<int8_t>::min();
				927	for (size_t py = 0; py < pooling_height(); py++) {
				928	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				929	for (size_t px = 0; px < pooling_width(); px++) {
				930	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				931	if (ix < next_input_width() && iy < next_input_height()) {
				932	max_value = std::max(max_value,
				933	input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]);
				934	}
				935	}
				936	}
				937	max_value = std::min(max_value, int8_t(qmax() - 0x80));
				938	max_value = std::max(max_value, int8_t(qmin() - 0x80));
				939	next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
				940	}
				941	}
				942	}
				943	}
				944
				945	// Setup and run Max Pooling operator the second time, and destroy the operator.
				946	ASSERT_EQ(xnn_status_success,
				947	xnn_setup_max_pooling2d_nhwc_s8(
				948	max_pooling_op,
				949	next_batch_size(), next_input_height(), next_input_width(),
				950	input.data(), output.data(),
				951	nullptr /* thread pool */));
				952
				953	ASSERT_EQ(xnn_status_success,
				954	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				955
				956	// Verify results of the second run.
				957	for (size_t i = 0; i < next_batch_size(); i++) {
				958	for (size_t y = 0; y < next_output_height(); y++) {
				959	for (size_t x = 0; x < next_output_width(); x++) {
				960	for (size_t c = 0; c < channels(); c++) {
				961	ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), int32_t(qmax() - 0x80));
				962	ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), int32_t(qmin() - 0x80));
				963	ASSERT_EQ(int32_t(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]),
				964	int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c])) <<
				965	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				966	}
				967	}
				968	}
				969	}
				970	}
				971	}
				972
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	973	void TestSetupU8() const {
				974	std::random_device random_device;
				975	auto rng = std::mt19937(random_device());
Marat Dukhan	5ce30d9	2020-04-14 03:31:26 -0700	[diff] [blame]	976	auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	977
				978	std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
				979	(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
				980	(next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
				981	std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
				982	(batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
				983	(next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
				984	std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
				985	std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
				986	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				987	std::generate(input.begin(), input.end(), std::ref(u8rng));
				988	std::fill(output.begin(), output.end(), 0xA5);
				989
				990	// Compute reference results.
				991	for (size_t i = 0; i < batch_size(); i++) {
				992	for (size_t oy = 0; oy < output_height(); oy++) {
				993	for (size_t ox = 0; ox < output_width(); ox++) {
				994	for (size_t c = 0; c < channels(); c++) {
				995	uint8_t max_value = 0;
				996	for (size_t py = 0; py < pooling_height(); py++) {
				997	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				998	for (size_t px = 0; px < pooling_width(); px++) {
				999	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				1000	if (ix < input_width() && iy < input_height()) {
				1001	max_value = std::max(max_value,
				1002	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
				1003	}
				1004	}
				1005	}
				1006	max_value = std::min(max_value, qmax());
				1007	max_value = std::max(max_value, qmin());
				1008	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				1009	}
				1010	}
				1011	}
				1012	}
				1013
				1014	// Create, setup, and run Max Pooling operator once.
Marat Dukhan	04f03be	2019-11-19 12:36:47 -0800	[diff] [blame]	1015	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1016	xnn_operator_t max_pooling_op = nullptr;
				1017
				1018	ASSERT_EQ(xnn_status_success,
				1019	xnn_create_max_pooling2d_nhwc_u8(
				1020	padding_top(), padding_right(), padding_bottom(), padding_left(),
				1021	pooling_height(), pooling_width(),
				1022	stride_height(), stride_width(),
				1023	dilation_height(), dilation_width(),
				1024	channels(), input_pixel_stride(), output_pixel_stride(),
				1025	qmin(), qmax(),
				1026	0, &max_pooling_op));
				1027	ASSERT_NE(nullptr, max_pooling_op);
				1028
				1029	// Smart pointer to automatically delete max_pooling_op.
				1030	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				1031
				1032	ASSERT_EQ(xnn_status_success,
				1033	xnn_setup_max_pooling2d_nhwc_u8(
				1034	max_pooling_op,
				1035	batch_size(), input_height(), input_width(),
				1036	input.data(), output.data(),
				1037	nullptr /* thread pool */));
				1038
				1039	ASSERT_EQ(xnn_status_success,
				1040	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				1041
				1042	// Verify results of the first run.
				1043	for (size_t i = 0; i < batch_size(); i++) {
				1044	for (size_t y = 0; y < output_height(); y++) {
				1045	for (size_t x = 0; x < output_width(); x++) {
				1046	for (size_t c = 0; c < channels(); c++) {
				1047	ASSERT_LE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax()));
				1048	ASSERT_GE(uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin()));
				1049	ASSERT_EQ(uint32_t(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]),
				1050	uint32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c])) <<
				1051	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				1052	}
				1053	}
				1054	}
				1055	}
				1056
				1057	// Re-generate data for the second run.
				1058	std::generate(input.begin(), input.end(), std::ref(u8rng));
				1059	std::fill(output.begin(), output.end(), 0xA5);
				1060
				1061	// Compute reference results for the second run.
				1062	for (size_t i = 0; i < next_batch_size(); i++) {
				1063	for (size_t oy = 0; oy < next_output_height(); oy++) {
				1064	for (size_t ox = 0; ox < next_output_width(); ox++) {
				1065	for (size_t c = 0; c < channels(); c++) {
				1066	uint8_t max_value = 0;
				1067	for (size_t py = 0; py < pooling_height(); py++) {
				1068	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				1069	for (size_t px = 0; px < pooling_width(); px++) {
				1070	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				1071	if (ix < next_input_width() && iy < next_input_height()) {
				1072	max_value = std::max(max_value,
				1073	input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]);
				1074	}
				1075	}
				1076	}
				1077	max_value = std::min(max_value, qmax());
				1078	max_value = std::max(max_value, qmin());
				1079	next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
				1080	}
				1081	}
				1082	}
				1083	}
				1084
				1085	// Setup and run Max Pooling operator the second time, and destroy the operator.
				1086	ASSERT_EQ(xnn_status_success,
				1087	xnn_setup_max_pooling2d_nhwc_u8(
				1088	max_pooling_op,
				1089	next_batch_size(), next_input_height(), next_input_width(),
				1090	input.data(), output.data(),
				1091	nullptr /* thread pool */));
				1092
				1093	ASSERT_EQ(xnn_status_success,
				1094	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				1095
				1096	// Verify results of the second run.
				1097	for (size_t i = 0; i < next_batch_size(); i++) {
				1098	for (size_t y = 0; y < next_output_height(); y++) {
				1099	for (size_t x = 0; x < next_output_width(); x++) {
				1100	for (size_t c = 0; c < channels(); c++) {
				1101	ASSERT_LE(uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), uint32_t(qmax()));
				1102	ASSERT_GE(uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), uint32_t(qmin()));
				1103	ASSERT_EQ(uint32_t(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]),
				1104	uint32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c])) <<
				1105	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				1106	}
				1107	}
				1108	}
				1109	}
				1110	}
				1111	}
				1112
Marat Dukhan	5756a92	2022-02-04 01:55:53 -0800	[diff] [blame]	1113	void TestSetupF16() const {
				1114	std::random_device random_device;
				1115	auto rng = std::mt19937(random_device());
				1116	// Note: we need to avoid FP16 denormals in the generated tensor because they might be processed differently in
				1117	// native vs emulated arithmetics, and we use exact comparison to verify the results against reference.
				1118	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.001f, 1.0f), rng);
				1119	auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
				1120
				1121	std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(
				1122	(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
				1123	(next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
				1124	std::vector<uint16_t> output(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(
				1125	(batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
				1126	(next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
				1127	std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
				1128	std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
				1129	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				1130	std::generate(input.begin(), input.end(), std::ref(f16rng));
				1131	std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
				1132
				1133	// Compute reference results, without clamping.
				1134	for (size_t i = 0; i < batch_size(); i++) {
				1135	for (size_t oy = 0; oy < output_height(); oy++) {
				1136	for (size_t ox = 0; ox < output_width(); ox++) {
				1137	for (size_t c = 0; c < channels(); c++) {
				1138	float max_value = -std::numeric_limits<float>::infinity();
				1139	for (size_t py = 0; py < pooling_height(); py++) {
				1140	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				1141	for (size_t px = 0; px < pooling_width(); px++) {
				1142	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				1143	if (ix < input_width() && iy < input_height()) {
				1144	max_value = std::max(max_value,
				1145	fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]));
				1146	}
				1147	}
				1148	}
				1149	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				1150	}
				1151	}
				1152	}
				1153	}
				1154
				1155	// Compute clamping parameters.
				1156	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				1157	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				1158	const float accumulated_range = accumulated_max - accumulated_min;
				1159	float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());
				1160	float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
				1161	output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min));
				1162	output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max));
				1163	if (accumulated_range == 0.0f) {
				1164	output_min = -std::numeric_limits<float>::infinity();
				1165	output_max = +std::numeric_limits<float>::infinity();
				1166	}
				1167	if (qmin() == std::numeric_limits<uint8_t>::min()) {
				1168	output_min = -std::numeric_limits<float>::infinity();
				1169	}
				1170	if (qmax() == std::numeric_limits<uint8_t>::max()) {
				1171	output_max = +std::numeric_limits<float>::infinity();
				1172	}
				1173
				1174	// Clamp reference results.
				1175	for (float& value : output_ref) {
				1176	value = std::max(std::min(value, output_max), output_min);
				1177	}
				1178
				1179	// Create, setup, and run Max Pooling operator once.
				1180	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
				1181	xnn_operator_t max_pooling_op = nullptr;
				1182
				1183	const xnn_status status = xnn_create_max_pooling2d_nhwc_f16(
				1184	padding_top(), padding_right(), padding_bottom(), padding_left(),
				1185	pooling_height(), pooling_width(),
				1186	stride_height(), stride_width(),
				1187	dilation_height(), dilation_width(),
				1188	channels(), input_pixel_stride(), output_pixel_stride(),
				1189	output_min, output_max,
				1190	0, &max_pooling_op);
				1191	if (status == xnn_status_unsupported_hardware) {
				1192	GTEST_SKIP();
				1193	}
				1194	ASSERT_EQ(xnn_status_success, status);
				1195	ASSERT_NE(nullptr, max_pooling_op);
				1196
				1197	// Smart pointer to automatically delete max_pooling_op.
				1198	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				1199
				1200	ASSERT_EQ(xnn_status_success,
				1201	xnn_setup_max_pooling2d_nhwc_f16(
				1202	max_pooling_op,
				1203	batch_size(), input_height(), input_width(),
				1204	input.data(), output.data(),
				1205	nullptr /* thread pool */));
				1206
				1207	ASSERT_EQ(xnn_status_success,
				1208	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				1209
				1210	// Verify results of the first run.
				1211	for (size_t i = 0; i < batch_size(); i++) {
				1212	for (size_t y = 0; y < output_height(); y++) {
				1213	for (size_t x = 0; x < output_width(); x++) {
				1214	for (size_t c = 0; c < channels(); c++) {
				1215	ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_max);
				1216	ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]), output_min);
				1217	ASSERT_EQ(
				1218	fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]),
				1219	output_ref[((i * output_height() + y) * output_width() + x) * channels() + c]) <<
				1220	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
				1221	<< ", min = " << output_min << ", max = " << output_max;
				1222	}
				1223	}
				1224	}
				1225	}
				1226
				1227	// Re-generate data for the second run.
				1228	std::generate(input.begin(), input.end(), std::ref(f16rng));
				1229	std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
				1230
				1231	// Compute reference results for the second run, including clamping.
				1232	for (size_t i = 0; i < next_batch_size(); i++) {
				1233	for (size_t oy = 0; oy < next_output_height(); oy++) {
				1234	for (size_t ox = 0; ox < next_output_width(); ox++) {
				1235	for (size_t c = 0; c < channels(); c++) {
				1236	float max_value = -std::numeric_limits<float>::infinity();
				1237	for (size_t py = 0; py < pooling_height(); py++) {
				1238	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				1239	for (size_t px = 0; px < pooling_width(); px++) {
				1240	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				1241	if (ix < next_input_width() && iy < next_input_height()) {
				1242	max_value = std::max(max_value,
				1243	fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]));
				1244	}
				1245	}
				1246	}
				1247	max_value = std::min(max_value, output_max);
				1248	max_value = std::max(max_value, output_min);
				1249	next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
				1250	}
				1251	}
				1252	}
				1253	}
				1254
				1255	// Setup and run Max Pooling operator the second time, and destroy the operator.
				1256	ASSERT_EQ(xnn_status_success,
				1257	xnn_setup_max_pooling2d_nhwc_f16(
				1258	max_pooling_op,
				1259	next_batch_size(), next_input_height(), next_input_width(),
				1260	input.data(), output.data(),
				1261	nullptr /* thread pool */));
				1262
				1263	ASSERT_EQ(xnn_status_success,
				1264	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				1265
				1266	// Verify results of the second run.
				1267	for (size_t i = 0; i < next_batch_size(); i++) {
				1268	for (size_t y = 0; y < next_output_height(); y++) {
				1269	for (size_t x = 0; x < next_output_width(); x++) {
				1270	for (size_t c = 0; c < channels(); c++) {
				1271	ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), output_max);
				1272	ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]), output_min);
				1273	ASSERT_EQ(
				1274	fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]),
				1275	next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c]) <<
				1276	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c
				1277	<< ", min = " << output_min << ", max = " << output_max;
				1278	}
				1279	}
				1280	}
				1281	}
				1282	}
				1283	}
				1284
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1285	void TestSetupF32() const {
				1286	std::random_device random_device;
				1287	auto rng = std::mt19937(random_device());
				1288	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
				1289
				1290	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
				1291	(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + channels(),
				1292	(next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + channels()));
				1293	std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) + std::max(
				1294	(batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + channels(),
				1295	(next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + channels()));
				1296	std::vector<float> output_ref(batch_size() * output_height() * output_width() * channels());
				1297	std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * channels());
				1298	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				1299	std::generate(input.begin(), input.end(), std::ref(f32rng));
				1300	std::fill(output.begin(), output.end(), nanf(""));
				1301
				1302	// Compute reference results, without clamping.
				1303	for (size_t i = 0; i < batch_size(); i++) {
				1304	for (size_t oy = 0; oy < output_height(); oy++) {
				1305	for (size_t ox = 0; ox < output_width(); ox++) {
				1306	for (size_t c = 0; c < channels(); c++) {
				1307	float max_value = -std::numeric_limits<float>::infinity();
				1308	for (size_t py = 0; py < pooling_height(); py++) {
				1309	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				1310	for (size_t px = 0; px < pooling_width(); px++) {
				1311	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				1312	if (ix < input_width() && iy < input_height()) {
				1313	max_value = std::max(max_value,
				1314	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + c]);
				1315	}
				1316	}
				1317	}
				1318	output_ref[((i * output_height() + oy) * output_width() + ox) * channels() + c] = max_value;
				1319	}
				1320	}
				1321	}
				1322	}
				1323
				1324	// Compute clamping parameters.
				1325	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				1326	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				1327	const float accumulated_range = accumulated_max - accumulated_min;
				1328	const float output_min = accumulated_range == 0.0f ?
				1329	-std::numeric_limits<float>::infinity() :
				1330	accumulated_min + accumulated_range / 255.0f * float(qmin());
				1331	const float output_max = accumulated_range == 0.0f ?
				1332	+std::numeric_limits<float>::infinity() :
				1333	accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
				1334
				1335	// Clamp reference results.
				1336	for (float& value : output_ref) {
				1337	value = std::max(std::min(value, output_max), output_min);
				1338	}
				1339
				1340	// Create, setup, and run Max Pooling operator once.
Marat Dukhan	04f03be	2019-11-19 12:36:47 -0800	[diff] [blame]	1341	ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1342	xnn_operator_t max_pooling_op = nullptr;
				1343
				1344	ASSERT_EQ(xnn_status_success,
				1345	xnn_create_max_pooling2d_nhwc_f32(
				1346	padding_top(), padding_right(), padding_bottom(), padding_left(),
				1347	pooling_height(), pooling_width(),
				1348	stride_height(), stride_width(),
				1349	dilation_height(), dilation_width(),
				1350	channels(), input_pixel_stride(), output_pixel_stride(),
				1351	output_min, output_max,
				1352	0, &max_pooling_op));
				1353	ASSERT_NE(nullptr, max_pooling_op);
				1354
				1355	// Smart pointer to automatically delete max_pooling_op.
				1356	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_max_pooling_op(max_pooling_op, xnn_delete_operator);
				1357
				1358	ASSERT_EQ(xnn_status_success,
				1359	xnn_setup_max_pooling2d_nhwc_f32(
				1360	max_pooling_op,
				1361	batch_size(), input_height(), input_width(),
				1362	input.data(), output.data(),
				1363	nullptr /* thread pool */));
				1364
				1365	ASSERT_EQ(xnn_status_success,
				1366	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				1367
				1368	// Verify results of the first run.
				1369	for (size_t i = 0; i < batch_size(); i++) {
				1370	for (size_t y = 0; y < output_height(); y++) {
				1371	for (size_t x = 0; x < output_width(); x++) {
				1372	for (size_t c = 0; c < channels(); c++) {
				1373	ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max);
				1374	ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min);
				1375	ASSERT_EQ(output_ref[((i * output_height() + y) * output_width() + x) * channels() + c],
				1376	output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c]) <<
				1377	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				1378	}
				1379	}
				1380	}
				1381	}
				1382
				1383	// Re-generate data for the second run.
				1384	std::generate(input.begin(), input.end(), std::ref(f32rng));
				1385	std::fill(output.begin(), output.end(), 0xA5);
				1386
				1387	// Compute reference results for the second run, including clamping.
				1388	for (size_t i = 0; i < next_batch_size(); i++) {
				1389	for (size_t oy = 0; oy < next_output_height(); oy++) {
				1390	for (size_t ox = 0; ox < next_output_width(); ox++) {
				1391	for (size_t c = 0; c < channels(); c++) {
				1392	float max_value = -std::numeric_limits<float>::infinity();
				1393	for (size_t py = 0; py < pooling_height(); py++) {
				1394	const size_t iy = oy * stride_height() + py * dilation_height() - padding_top();
				1395	for (size_t px = 0; px < pooling_width(); px++) {
				1396	const size_t ix = ox * stride_width() + px * dilation_width() - padding_left();
				1397	if (ix < next_input_width() && iy < next_input_height()) {
				1398	max_value = std::max(max_value,
				1399	input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + c]);
				1400	}
				1401	}
				1402	}
				1403	max_value = std::min(max_value, output_max);
				1404	max_value = std::max(max_value, output_min);
				1405	next_output_ref[((i * next_output_height() + oy) * next_output_width() + ox) * channels() + c] = max_value;
				1406	}
				1407	}
				1408	}
				1409	}
				1410
				1411	// Setup and run Max Pooling operator the second time, and destroy the operator.
				1412	ASSERT_EQ(xnn_status_success,
				1413	xnn_setup_max_pooling2d_nhwc_f32(
				1414	max_pooling_op,
				1415	next_batch_size(), next_input_height(), next_input_width(),
				1416	input.data(), output.data(),
				1417	nullptr /* thread pool */));
				1418
				1419	ASSERT_EQ(xnn_status_success,
				1420	xnn_run_operator(max_pooling_op, nullptr /* thread pool */));
				1421
				1422	// Verify results of the second run.
				1423	for (size_t i = 0; i < next_batch_size(); i++) {
				1424	for (size_t y = 0; y < next_output_height(); y++) {
				1425	for (size_t x = 0; x < next_output_width(); x++) {
				1426	for (size_t c = 0; c < channels(); c++) {
				1427	ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], output_max);
				1428	ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c], output_min);
				1429	ASSERT_EQ(next_output_ref[((i * next_output_height() + y) * next_output_width() + x) * channels() + c],
				1430	output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + c]) <<
				1431	"in batch index " << i << ", pixel (" << y << ", " << x << "), channel " << c;
				1432	}
				1433	}
				1434	}
				1435	}
				1436	}
				1437	}
				1438
				1439	private:
				1440	uint32_t padding_top_{0};
				1441	uint32_t padding_right_{0};
				1442	uint32_t padding_bottom_{0};
				1443	uint32_t padding_left_{0};
Marat Dukhan	bee7825	2020-02-27 23:52:08 -0800	[diff] [blame]	1444	bool padding_tf_same_{false};
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1445	size_t input_height_{1};
				1446	size_t input_width_{1};
				1447	size_t channels_{1};
				1448	size_t batch_size_{1};
				1449	size_t input_pixel_stride_{0};
				1450	size_t output_pixel_stride_{0};
				1451	uint32_t pooling_height_{1};
				1452	uint32_t pooling_width_{1};
				1453	uint32_t stride_height_{1};
				1454	uint32_t stride_width_{1};
				1455	uint32_t dilation_height_{1};
				1456	uint32_t dilation_width_{1};
				1457	size_t next_input_height_{0};
				1458	size_t next_input_width_{0};
				1459	size_t next_batch_size_{0};
				1460	uint8_t qmin_{0};
				1461	uint8_t qmax_{255};
				1462	size_t iterations_{1};
				1463	};