Blame - test/convolution-operator-tester.h - platform/external/XNNPACK

blob: e637fac1703df60c58b1a9dce0660cfa083ef5b2 [file] [log] [blame]

XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1	// Copyright (c) Facebook, Inc. and its affiliates.
				2	// All rights reserved.
				3	//
				4	// Copyright 2019 Google LLC
				5	//
				6	// This source code is licensed under the BSD-style license found in the
				7	// LICENSE file in the root directory of this source tree.
				8
				9	#pragma once
				10
				11	#include <gtest/gtest.h>
				12
				13	#include <algorithm>
				14	#include <cassert>
				15	#include <cmath>
				16	#include <cstddef>
				17	#include <cstdlib>
				18	#include <functional>
				19	#include <random>
				20	#include <vector>
				21
				22	#include <xnnpack.h>
				23
				24
				25	class ConvolutionOperatorTester {
				26	public:
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	27	inline ConvolutionOperatorTester& padding_tf_same(bool padding_same) {
				28	if (padding_same) {
				29	assert(padding_top() == 0);
				30	assert(padding_left() == 0);
				31	assert(padding_bottom() == 0);
				32	assert(padding_right() == 0);
				33	}
				34	this->padding_tf_same_ = padding_same;
				35	return *this;
				36	}
				37
				38	inline bool padding_tf_same() const {
				39	return this->padding_tf_same_;
				40	}
				41
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	42	inline ConvolutionOperatorTester& padding(uint32_t padding) {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	43	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	44	this->padding_top_ = padding;
				45	this->padding_right_ = padding;
				46	this->padding_bottom_ = padding;
				47	this->padding_left_ = padding;
				48	return *this;
				49	}
				50
				51	inline ConvolutionOperatorTester& padding(uint32_t padding_height, uint32_t padding_width) {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	52	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	53	this->padding_top_ = padding_height;
				54	this->padding_right_ = padding_width;
				55	this->padding_bottom_ = padding_height;
				56	this->padding_left_ = padding_width;
				57	return *this;
				58	}
				59
				60	inline ConvolutionOperatorTester& padding_height(uint32_t padding_height) {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	61	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	62	this->padding_top_ = padding_height;
				63	this->padding_bottom_ = padding_height;
				64	return *this;
				65	}
				66
				67	inline ConvolutionOperatorTester& padding_width(uint32_t padding_width) {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	68	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	69	this->padding_right_ = padding_width;
				70	this->padding_left_ = padding_width;
				71	return *this;
				72	}
				73
				74	inline ConvolutionOperatorTester& padding_top(uint32_t padding_top) {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	75	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	76	this->padding_top_ = padding_top;
				77	return *this;
				78	}
				79
				80	inline uint32_t padding_top() const {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	81	if (padding_tf_same()) {
				82	const uint32_t total_padding_height =
				83	(output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
				84	return total_padding_height / 2;
				85	} else {
				86	return this->padding_top_;
				87	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	88	}
				89
				90	inline ConvolutionOperatorTester& padding_left(uint32_t padding_left) {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	91	assert(!padding_tf_same());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	92	this->padding_left_ = padding_left;
				93	return *this;
				94	}
				95
				96	inline uint32_t padding_left() const {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	97	if (padding_tf_same()) {
				98	const uint32_t total_padding_width =
				99	(output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
				100	return total_padding_width / 2;
				101	} else {
				102	return this->padding_left_;
				103	}
				104	}
				105
				106	inline ConvolutionOperatorTester& padding_bottom(uint32_t padding_bottom) {
				107	assert(!padding_tf_same());
				108	this->padding_bottom_ = padding_bottom;
				109	return *this;
				110	}
				111
				112	inline uint32_t padding_bottom() const {
				113	if (padding_tf_same()) {
				114	const uint32_t total_padding_height =
				115	(output_height() - 1) * subsampling_height() + dilated_kernel_height() - input_height();
				116	return total_padding_height - total_padding_height / 2;
				117	} else {
				118	return this->padding_bottom_;
				119	}
				120	}
				121
				122	inline ConvolutionOperatorTester& padding_right(uint32_t padding_right) {
				123	assert(!padding_tf_same());
				124	this->padding_right_ = padding_right;
				125	return *this;
				126	}
				127
				128	inline uint32_t padding_right() const {
				129	if (padding_tf_same()) {
				130	const uint32_t total_padding_width =
				131	(output_width() - 1) * subsampling_width() + dilated_kernel_width() - input_width();
				132	return total_padding_width - total_padding_width / 2;
				133	} else {
				134	return this->padding_right_;
				135	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	136	}
				137
				138	inline ConvolutionOperatorTester& input_size(uint32_t input_height, uint32_t input_width) {
				139	assert(input_height >= 1);
				140	assert(input_width >= 1);
				141	this->input_height_ = input_height;
				142	this->input_width_ = input_width;
				143	return *this;
				144	}
				145
				146	inline ConvolutionOperatorTester& input_height(uint32_t input_height) {
				147	assert(input_height >= 1);
				148	this->input_height_ = input_height;
				149	return *this;
				150	}
				151
				152	inline uint32_t input_height() const {
				153	return this->input_height_;
				154	}
				155
				156	inline ConvolutionOperatorTester& input_width(uint32_t input_width) {
				157	assert(input_width >= 1);
				158	this->input_width_ = input_width;
				159	return *this;
				160	}
				161
				162	inline uint32_t input_width() const {
				163	return this->input_width_;
				164	}
				165
				166	inline ConvolutionOperatorTester& groups(uint32_t groups) {
				167	assert(groups >= 1);
				168	this->groups_ = groups;
				169	return *this;
				170	}
				171
				172	inline uint32_t groups() const {
				173	return this->groups_;
				174	}
				175
				176	inline ConvolutionOperatorTester& group_input_channels(size_t group_input_channels) {
				177	assert(group_input_channels >= 1);
				178	this->group_input_channels_ = group_input_channels;
				179	return *this;
				180	}
				181
				182	inline size_t group_input_channels() const {
				183	return this->group_input_channels_;
				184	}
				185
				186	inline ConvolutionOperatorTester& group_output_channels(size_t group_output_channels) {
				187	assert(group_output_channels >= 1);
				188	this->group_output_channels_ = group_output_channels;
				189	return *this;
				190	}
				191
				192	inline size_t group_output_channels() const {
				193	return this->group_output_channels_;
				194	}
				195
				196	inline ConvolutionOperatorTester& batch_size(size_t batch_size) {
				197	assert(batch_size >= 1);
				198	this->batch_size_ = batch_size;
				199	return *this;
				200	}
				201
				202	inline size_t batch_size() const {
				203	return this->batch_size_;
				204	}
				205
				206	inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_size) {
				207	assert(kernel_size >= 1);
				208	this->kernel_height_ = kernel_size;
				209	this->kernel_width_ = kernel_size;
				210	return *this;
				211	}
				212
				213	inline ConvolutionOperatorTester& kernel_size(uint32_t kernel_height, uint32_t kernel_width) {
				214	assert(kernel_height >= 1);
				215	assert(kernel_width >= 1);
				216	this->kernel_height_ = kernel_height;
				217	this->kernel_width_ = kernel_width;
				218	return *this;
				219	}
				220
				221	inline ConvolutionOperatorTester& kernel_height(uint32_t kernel_height) {
				222	assert(kernel_height >= 1);
				223	this->kernel_height_ = kernel_height;
				224	return *this;
				225	}
				226
				227	inline uint32_t kernel_height() const {
				228	return this->kernel_height_;
				229	}
				230
				231	inline ConvolutionOperatorTester& kernel_width(uint32_t kernel_width) {
				232	assert(kernel_width >= 1);
				233	this->kernel_width_ = kernel_width;
				234	return *this;
				235	}
				236
				237	inline uint32_t kernel_width() const {
				238	return this->kernel_width_;
				239	}
				240
				241	inline ConvolutionOperatorTester& dilation(uint32_t dilation) {
				242	assert(dilation >= 1);
				243	this->dilation_height_ = dilation;
				244	this->dilation_width_ = dilation;
				245	return *this;
				246	}
				247
				248	inline ConvolutionOperatorTester& dilation(uint32_t dilation_height, uint32_t dilation_width) {
				249	assert(dilation_height >= 1);
				250	assert(dilation_width >= 1);
				251	this->dilation_height_ = dilation_height;
				252	this->dilation_width_ = dilation_width;
				253	return *this;
				254	}
				255
				256	inline ConvolutionOperatorTester& dilation_height(uint32_t dilation_height) {
				257	assert(dilation_height >= 1);
				258	this->dilation_height_ = dilation_height;
				259	return *this;
				260	}
				261
				262	inline uint32_t dilation_height() const {
				263	return this->dilation_height_;
				264	}
				265
				266	inline ConvolutionOperatorTester& dilation_width(uint32_t dilation_width) {
				267	assert(dilation_width >= 1);
				268	this->dilation_width_ = dilation_width;
				269	return *this;
				270	}
				271
				272	inline uint32_t dilation_width() const {
				273	return this->dilation_width_;
				274	}
				275
				276	inline ConvolutionOperatorTester& subsampling(uint32_t subsampling) {
				277	assert(subsampling >= 1);
				278	this->subsampling_height_ = subsampling;
				279	this->subsampling_width_ = subsampling;
				280	return *this;
				281	}
				282
				283	inline ConvolutionOperatorTester& subsampling(uint32_t subsampling_height, uint32_t subsampling_width) {
				284	assert(subsampling_height >= 1);
				285	assert(subsampling_width >= 1);
				286	this->subsampling_height_ = subsampling_height;
				287	this->subsampling_width_ = subsampling_width;
				288	return *this;
				289	}
				290
				291	inline ConvolutionOperatorTester& subsampling_height(uint32_t subsampling_height) {
				292	assert(subsampling_height >= 1);
				293	this->subsampling_height_ = subsampling_height;
				294	return *this;
				295	}
				296
				297	inline uint32_t subsampling_height() const {
				298	return this->subsampling_height_;
				299	}
				300
				301	inline ConvolutionOperatorTester& subsampling_width(uint32_t subsampling_width) {
				302	assert(subsampling_width >= 1);
				303	this->subsampling_width_ = subsampling_width;
				304	return *this;
				305	}
				306
				307	inline uint32_t subsampling_width() const {
				308	return this->subsampling_width_;
				309	}
				310
				311	inline ConvolutionOperatorTester& input_pixel_stride(size_t input_pixel_stride) {
				312	assert(input_pixel_stride >= 1);
				313	this->input_pixel_stride_ = input_pixel_stride;
				314	return *this;
				315	}
				316
				317	inline size_t input_pixel_stride() const {
				318	if (this->input_pixel_stride_ == 0) {
				319	return group_input_channels() * groups();
				320	} else {
				321	assert(this->input_pixel_stride_ >= group_input_channels() * groups());
				322	return this->input_pixel_stride_;
				323	}
				324	}
				325
				326	inline ConvolutionOperatorTester& output_pixel_stride(size_t output_pixel_stride) {
				327	assert(output_pixel_stride >= 1);
				328	this->output_pixel_stride_ = output_pixel_stride;
				329	return *this;
				330	}
				331
				332	inline size_t output_pixel_stride() const {
				333	if (this->output_pixel_stride_ == 0) {
				334	return group_output_channels() * groups();
				335	} else {
				336	assert(this->output_pixel_stride_ >= group_output_channels() * groups());
				337	return this->output_pixel_stride_;
				338	}
				339	}
				340
				341	inline uint32_t dilated_kernel_height() const {
				342	return (kernel_height() - 1) * dilation_height() + 1;
				343	}
				344
				345	inline uint32_t dilated_kernel_width() const {
				346	return (kernel_width() - 1) * dilation_width() + 1;
				347	}
				348
				349	inline size_t output_height() const {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	350	if (padding_tf_same()) {
				351	return (input_height() + subsampling_height() - 1) / subsampling_height();
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	352	} else {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	353	const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
				354	if (padded_input_height <= dilated_kernel_height()) {
				355	return 1;
				356	} else {
				357	return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
				358	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	359	}
				360	}
				361
				362	inline size_t output_width() const {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	363	if (padding_tf_same()) {
				364	return (input_width() + subsampling_width() - 1) / subsampling_width();
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	365	} else {
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	366	const size_t padded_input_width = padding_left() + input_width() + padding_right();
				367	if (padded_input_width <= dilated_kernel_width()) {
				368	return 1;
				369	} else {
				370	return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
				371	}
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	372	}
				373	}
				374
				375	inline ConvolutionOperatorTester& next_input_size(uint32_t next_input_height, uint32_t next_input_width) {
				376	assert(next_input_height >= 1);
				377	assert(next_input_width >= 1);
				378	this->next_input_height_ = next_input_height;
				379	this->next_input_width_ = next_input_width;
				380	return *this;
				381	}
				382
				383	inline ConvolutionOperatorTester& next_input_height(uint32_t next_input_height) {
				384	assert(next_input_height >= 1);
				385	this->next_input_height_ = next_input_height;
				386	return *this;
				387	}
				388
				389	inline uint32_t next_input_height() const {
				390	if (this->next_input_height_ == 0) {
				391	return input_height();
				392	} else {
				393	return this->next_input_height_;
				394	}
				395	}
				396
				397	inline ConvolutionOperatorTester& next_input_width(uint32_t next_input_width) {
				398	assert(next_input_width >= 1);
				399	this->next_input_width_ = next_input_width;
				400	return *this;
				401	}
				402
				403	inline uint32_t next_input_width() const {
				404	if (this->next_input_width_ == 0) {
				405	return input_width();
				406	} else {
				407	return this->next_input_width_;
				408	}
				409	}
				410
				411	inline size_t next_output_height() const {
				412	const size_t padded_input_height = padding_top() + next_input_height() + padding_bottom();
				413	if (padded_input_height <= dilated_kernel_height()) {
				414	return 1;
				415	} else {
				416	return (padded_input_height - dilated_kernel_height()) / subsampling_height() + 1;
				417	}
				418	}
				419
				420	inline size_t next_output_width() const {
				421	const size_t padded_input_width = padding_left() + next_input_width() + padding_right();
				422	if (padded_input_width <= dilated_kernel_width()) {
				423	return 1;
				424	} else {
				425	return (padded_input_width - dilated_kernel_width()) / subsampling_width() + 1;
				426	}
				427	}
				428
				429	inline ConvolutionOperatorTester& next_batch_size(size_t next_batch_size) {
				430	assert(next_batch_size >= 1);
				431	this->next_batch_size_ = next_batch_size;
				432	return *this;
				433	}
				434
				435	inline size_t next_batch_size() const {
				436	if (this->next_batch_size_ == 0) {
				437	return batch_size();
				438	} else {
				439	return this->next_batch_size_;
				440	}
				441	}
				442
				443	inline ConvolutionOperatorTester& qmin(uint8_t qmin) {
				444	this->qmin_ = qmin;
				445	return *this;
				446	}
				447
				448	inline uint8_t qmin() const {
				449	return this->qmin_;
				450	}
				451
				452	inline ConvolutionOperatorTester& qmax(uint8_t qmax) {
				453	this->qmax_ = qmax;
				454	return *this;
				455	}
				456
				457	inline uint8_t qmax() const {
				458	return this->qmax_;
				459	}
				460
				461	inline ConvolutionOperatorTester& depthwise_layout(bool depthwise_layout) {
				462	this->depthwise_layout_ = depthwise_layout;
				463	return *this;
				464	}
				465
				466	inline bool depthwise_layout() const {
				467	return this->depthwise_layout_;
				468	}
				469
				470	inline ConvolutionOperatorTester& iterations(size_t iterations) {
				471	this->iterations_ = iterations;
				472	return *this;
				473	}
				474
				475	inline size_t iterations() const {
				476	return this->iterations_;
				477	}
				478
				479	void TestQ8() const {
				480	std::random_device random_device;
				481	auto rng = std::mt19937(random_device());
				482	auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
				483	auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
				484
				485	std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
				486	batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()) + 8);
				487	std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
				488	std::vector<int32_t> bias(groups() * group_output_channels());
				489	std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
				490	std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
				491	std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
				492
				493	const uint8_t input_zero_point = 127;
				494	const uint8_t kernel_zero_point = 127;
				495
				496	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				497	std::generate(input.begin(), input.end(), std::ref(u8rng));
				498	std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
				499	std::generate(bias.begin(), bias.end(), std::ref(s32rng));
				500	std::fill(output.begin(), output.end(), 0xA5);
				501
				502	// Compute reference results, without renormalization.
				503	for (size_t i = 0; i < batch_size(); i++) {
				504	for (size_t oy = 0; oy < output_height(); oy++) {
				505	for (size_t ox = 0; ox < output_width(); ox++) {
				506	for (size_t g = 0; g < groups(); g++) {
				507	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				508	accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
				509	bias[g * group_output_channels() + oc];
				510	}
				511	}
				512	}
				513	}
				514	}
				515	if (depthwise_layout()) {
				516	ASSERT_EQ(group_input_channels(), 1);
				517
				518	for (size_t i = 0; i < batch_size(); i++) {
				519	for (size_t oy = 0; oy < output_height(); oy++) {
				520	for (size_t ox = 0; ox < output_width(); ox++) {
				521	for (size_t ky = 0; ky < kernel_height(); ky++) {
				522	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				523	if (iy < input_height()) {
				524	for (size_t kx = 0; kx < kernel_width(); kx++) {
				525	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				526	if (ix < input_width()) {
				527	for (size_t g = 0; g < groups(); g++) {
				528	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				529	accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				530	(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g]) - int32_t(input_zero_point)) *
				531	(int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));
				532	}
				533	}
				534	}
				535	}
				536	}
				537	}
				538	}
				539	}
				540	}
				541	} else {
				542	for (size_t i = 0; i < batch_size(); i++) {
				543	for (size_t oy = 0; oy < output_height(); oy++) {
				544	for (size_t ox = 0; ox < output_width(); ox++) {
				545	for (size_t ky = 0; ky < kernel_height(); ky++) {
				546	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				547	if (iy < input_height()) {
				548	for (size_t kx = 0; kx < kernel_width(); kx++) {
				549	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				550	if (ix < input_width()) {
				551	for (size_t g = 0; g < groups(); g++) {
				552	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				553	for (size_t ic = 0; ic < group_input_channels(); ic++) {
				554	accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				555	(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
				556	(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
				557	}
				558	}
				559	}
				560	}
				561	}
				562	}
				563	}
				564	}
				565	}
				566	}
				567	}
				568
				569	// Compute renormalization parameters.
				570	const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
				571	const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
				572
				573	const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
				574	const uint8_t output_zero_point = uint8_t(std::max(std::min(
				575	lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
				576	long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
				577
				578	// Renormalize reference results.
				579	std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
				580	[this, output_scale, output_zero_point](int32_t x) -> double {
				581	return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
				582	});
				583
				584	// Create, setup, run, and destroy Convolution operator.
				585	ASSERT_EQ(xnn_status_success, xnn_initialize());
				586	xnn_operator_t convolution_op = nullptr;
				587
				588	ASSERT_EQ(xnn_status_success,
				589	xnn_create_convolution2d_nhwc_q8(
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	590	padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
				591	padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	592	kernel_height(), kernel_width(),
				593	subsampling_height(), subsampling_width(),
				594	dilation_height(), dilation_width(),
				595	groups(), group_input_channels(), group_output_channels(),
				596	input_pixel_stride(), output_pixel_stride(),
				597	input_zero_point, 1.0f /* input scale */,
				598	kernel_zero_point, 1.0f /* kernel scale */,
				599	kernel.data(), bias.data(),
				600	output_zero_point, output_scale, qmin(), qmax(),
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	601	(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) \| (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	602	&convolution_op));
				603
				604	// Smart pointer to automatically delete convolution_op.
				605	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
				606
				607	ASSERT_EQ(xnn_status_success,
				608	xnn_setup_convolution2d_nhwc_q8(
				609	convolution_op,
				610	batch_size(), input_height(), input_width(),
				611	input.data(), output.data(),
				612	nullptr /* thread pool */));
				613
				614	ASSERT_EQ(xnn_status_success,
				615	xnn_run_operator(convolution_op, nullptr /* thread pool */));
				616
				617	// Verify results.
				618	for (size_t i = 0; i < batch_size(); i++) {
				619	for (size_t y = 0; y < output_height(); y++) {
				620	for (size_t x = 0; x < output_width(); x++) {
				621	for (size_t g = 0; g < groups(); g++) {
				622	for (size_t c = 0; c < group_output_channels(); c++) {
				623	ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
				624	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				625	ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
				626	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				627	ASSERT_NEAR(
				628	output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
				629	double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
				630	0.9)
				631	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				632	}
				633	}
				634	}
				635	}
				636	}
				637	}
				638	}
				639
				640	void TestF32() const {
				641	std::random_device random_device;
				642	auto rng = std::mt19937(random_device());
				643	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
				644
				645	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
				646	batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));
				647	std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
				648	std::vector<float> bias(groups() * group_output_channels());
				649	std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));
				650	std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
				651
				652	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				653	std::generate(input.begin(), input.end(), std::ref(f32rng));
				654	std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
				655	std::generate(bias.begin(), bias.end(), std::ref(f32rng));
				656	std::fill(output.begin(), output.end(), nanf(""));
				657
				658	// Compute reference results, without clamping.
				659	for (size_t i = 0; i < batch_size(); i++) {
				660	for (size_t oy = 0; oy < output_height(); oy++) {
				661	for (size_t ox = 0; ox < output_width(); ox++) {
				662	for (size_t g = 0; g < groups(); g++) {
				663	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				664	output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
				665	bias[g * group_output_channels() + oc];
				666	}
				667	}
				668	}
				669	}
				670	}
				671	if (depthwise_layout()) {
				672	ASSERT_EQ(group_input_channels(), 1);
				673
				674	for (size_t i = 0; i < batch_size(); i++) {
				675	for (size_t oy = 0; oy < output_height(); oy++) {
				676	for (size_t ox = 0; ox < output_width(); ox++) {
				677	for (size_t ky = 0; ky < kernel_height(); ky++) {
				678	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				679	if (iy < input_height()) {
				680	for (size_t kx = 0; kx < kernel_width(); kx++) {
				681	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				682	if (ix < input_width()) {
				683	for (size_t g = 0; g < groups(); g++) {
				684	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				685	output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				686	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g] *
				687	kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];
				688	}
				689	}
				690	}
				691	}
				692	}
				693	}
				694	}
				695	}
				696	}
				697	} else {
				698	for (size_t i = 0; i < batch_size(); i++) {
				699	for (size_t oy = 0; oy < output_height(); oy++) {
				700	for (size_t ox = 0; ox < output_width(); ox++) {
				701	for (size_t ky = 0; ky < kernel_height(); ky++) {
				702	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				703	if (iy < input_height()) {
				704	for (size_t kx = 0; kx < kernel_width(); kx++) {
				705	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				706	if (ix < input_width()) {
				707	for (size_t g = 0; g < groups(); g++) {
				708	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				709	for (size_t ic = 0; ic < group_input_channels(); ic++) {
				710	output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				711	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
				712	kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
				713	}
				714	}
				715	}
				716	}
				717	}
				718	}
				719	}
				720	}
				721	}
				722	}
				723	}
				724
				725	// Compute clamping parameters.
				726	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				727	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				728
				729	const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
				730	const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
				731
				732	// Clamp reference results.
				733	for (float& value : output_ref) {
				734	value = std::max(std::min(value, output_max), output_min);
				735	}
				736
				737	// Create, setup, run, and destroy Convolution operator.
				738	ASSERT_EQ(xnn_status_success, xnn_initialize());
				739	xnn_operator_t convolution_op = nullptr;
				740
				741	ASSERT_EQ(xnn_status_success,
				742	xnn_create_convolution2d_nhwc_f32(
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	743	padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),
				744	padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	745	kernel_height(), kernel_width(),
				746	subsampling_height(), subsampling_width(),
				747	dilation_height(), dilation_width(),
				748	groups(), group_input_channels(), group_output_channels(),
				749	input_pixel_stride(), output_pixel_stride(),
				750	kernel.data(), bias.data(),
				751	output_min, output_max,
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	752	(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) \| (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	753	&convolution_op));
				754
				755	// Smart pointer to automatically delete convolution_op.
				756	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
				757
				758	ASSERT_EQ(xnn_status_success,
				759	xnn_setup_convolution2d_nhwc_f32(
				760	convolution_op,
				761	batch_size(), input_height(), input_width(),
				762	input.data(), output.data(),
				763	nullptr /* thread pool */));
				764
				765	ASSERT_EQ(xnn_status_success,
				766	xnn_run_operator(convolution_op, nullptr /* thread pool */));
				767
				768	// Verify results.
				769	for (size_t i = 0; i < batch_size(); i++) {
				770	for (size_t y = 0; y < output_height(); y++) {
				771	for (size_t x = 0; x < output_width(); x++) {
				772	for (size_t g = 0; g < groups(); g++) {
				773	for (size_t c = 0; c < group_output_channels(); c++) {
				774	ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
				775	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				776	ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
				777	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				778	ASSERT_NEAR(
				779	output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
				780	output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
				781	1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
				782	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				783	}
				784	}
				785	}
				786	}
				787	}
				788	}
				789	}
				790
				791	void TestSetupQ8() const {
				792	ASSERT_FALSE(depthwise_layout());
				793
				794	std::random_device random_device;
				795	auto rng = std::mt19937(random_device());
				796	auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
				797	auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
				798
				799	std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(
				800	batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
				801	next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())) + 8);
				802	std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
				803	std::vector<int32_t> bias(groups() * group_output_channels());
				804	std::vector<uint8_t> output(std::max(
				805	batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
				806	next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
				807	std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());
				808	std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
				809	std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
				810	std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
				811
				812	const uint8_t input_zero_point = 127;
				813	const uint8_t kernel_zero_point = 127;
				814
				815	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				816	std::generate(input.begin(), input.end(), std::ref(u8rng));
				817	std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
				818	std::generate(bias.begin(), bias.end(), std::ref(s32rng));
				819	std::fill(output.begin(), output.end(), 0xA5);
				820
				821	// Compute reference results, without renormalization.
				822	for (size_t i = 0; i < batch_size(); i++) {
				823	for (size_t oy = 0; oy < output_height(); oy++) {
				824	for (size_t ox = 0; ox < output_width(); ox++) {
				825	for (size_t g = 0; g < groups(); g++) {
				826	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				827	accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
				828	bias[g * group_output_channels() + oc];
				829	}
				830	}
				831	}
				832	}
				833	}
				834	for (size_t i = 0; i < batch_size(); i++) {
				835	for (size_t oy = 0; oy < output_height(); oy++) {
				836	for (size_t ox = 0; ox < output_width(); ox++) {
				837	for (size_t ky = 0; ky < kernel_height(); ky++) {
				838	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				839	if (iy < input_height()) {
				840	for (size_t kx = 0; kx < kernel_width(); kx++) {
				841	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				842	if (ix < input_width()) {
				843	for (size_t g = 0; g < groups(); g++) {
				844	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				845	for (size_t ic = 0; ic < group_input_channels(); ic++) {
				846	accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				847	(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
				848	(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
				849	}
				850	}
				851	}
				852	}
				853	}
				854	}
				855	}
				856	}
				857	}
				858	}
				859
				860	// Compute renormalization parameters.
				861	const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
				862	const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
				863
				864	const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
				865	const uint8_t output_zero_point = uint8_t(std::max(std::min(
				866	lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
				867	long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
				868
				869	// Renormalize reference results.
				870	std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),
				871	[this, output_scale, output_zero_point](int32_t x) -> double {
				872	return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
				873	});
				874
				875	// Create, setup, and run Convolution operator once.
				876	ASSERT_EQ(xnn_status_success, xnn_initialize());
				877	xnn_operator_t convolution_op = nullptr;
				878
				879	ASSERT_EQ(xnn_status_success,
				880	xnn_create_convolution2d_nhwc_q8(
				881	padding_top(), padding_right(), padding_bottom(), padding_left(),
				882	kernel_height(), kernel_width(),
				883	subsampling_height(), subsampling_width(),
				884	dilation_height(), dilation_width(),
				885	groups(), group_input_channels(), group_output_channels(),
				886	input_pixel_stride(), output_pixel_stride(),
				887	input_zero_point, 1.0f /* input scale */,
				888	kernel_zero_point, 1.0f /* kernel scale */,
				889	kernel.data(), bias.data(),
				890	output_zero_point, output_scale, qmin(), qmax(),
				891	0, &convolution_op));
				892
				893	// Smart pointer to automatically delete convolution_op.
				894	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
				895
				896	ASSERT_EQ(xnn_status_success,
				897	xnn_setup_convolution2d_nhwc_q8(
				898	convolution_op,
				899	batch_size(), input_height(), input_width(),
				900	input.data(), output.data(),
				901	nullptr /* thread pool */));
				902
				903	ASSERT_EQ(xnn_status_success,
				904	xnn_run_operator(convolution_op, nullptr /* thread pool */));
				905
				906	// Verify results of the first run.
				907	for (size_t i = 0; i < batch_size(); i++) {
				908	for (size_t y = 0; y < output_height(); y++) {
				909	for (size_t x = 0; x < output_width(); x++) {
				910	for (size_t g = 0; g < groups(); g++) {
				911	for (size_t c = 0; c < group_output_channels(); c++) {
				912	ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
				913	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				914	ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
				915	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				916	ASSERT_NEAR(
				917	output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
				918	double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
				919	0.9)
				920	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				921	}
				922	}
				923	}
				924	}
				925	}
				926
				927	// Re-generate data for the second run.
				928	std::generate(input.begin(), input.end(), std::ref(u8rng));
				929	std::fill(output.begin(), output.end(), 0xA5);
				930
				931	// Compute reference results for the second run, including renormalization.
				932	for (size_t i = 0; i < next_batch_size(); i++) {
				933	for (size_t oy = 0; oy < next_output_height(); oy++) {
				934	for (size_t ox = 0; ox < next_output_width(); ox++) {
				935	for (size_t g = 0; g < groups(); g++) {
				936	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				937	next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
				938	bias[g * group_output_channels() + oc];
				939	}
				940	}
				941	}
				942	}
				943	}
				944	for (size_t i = 0; i < next_batch_size(); i++) {
				945	for (size_t oy = 0; oy < next_output_height(); oy++) {
				946	for (size_t ox = 0; ox < next_output_width(); ox++) {
				947	for (size_t ky = 0; ky < kernel_height(); ky++) {
				948	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				949	if (iy < next_input_height()) {
				950	for (size_t kx = 0; kx < kernel_width(); kx++) {
				951	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				952	if (ix < next_input_width()) {
				953	for (size_t g = 0; g < groups(); g++) {
				954	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				955	for (size_t ic = 0; ic < group_input_channels(); ic++) {
				956	next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				957	(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *
				958	(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));
				959	}
				960	}
				961	}
				962	}
				963	}
				964	}
				965	}
				966	}
				967	}
				968	}
				969	std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),
				970	[this, output_scale, output_zero_point](int32_t x) -> double {
				971	return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);
				972	});
				973
				974	// Setup and run Convolution operator the second time, and destroy the operator.
				975	ASSERT_EQ(xnn_status_success,
				976	xnn_setup_convolution2d_nhwc_q8(
				977	convolution_op,
				978	next_batch_size(), next_input_height(), next_input_width(),
				979	input.data(), output.data(),
				980	nullptr /* thread pool */));
				981
				982	ASSERT_EQ(xnn_status_success,
				983	xnn_run_operator(convolution_op, nullptr /* thread pool */));
				984
				985	// Verify results of the second run.
				986	for (size_t i = 0; i < next_batch_size(); i++) {
				987	for (size_t y = 0; y < next_output_height(); y++) {
				988	for (size_t x = 0; x < next_output_width(); x++) {
				989	for (size_t g = 0; g < groups(); g++) {
				990	for (size_t c = 0; c < group_output_channels(); c++) {
				991	ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))
				992	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				993	ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))
				994	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				995	ASSERT_NEAR(
				996	next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
				997	double(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),
				998	0.9)
				999	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1000	}
				1001	}
				1002	}
				1003	}
				1004	}
				1005	}
				1006	}
				1007
				1008	void TestSetupF32() const {
				1009	ASSERT_FALSE(depthwise_layout());
				1010
				1011	std::random_device random_device;
				1012	auto rng = std::mt19937(random_device());
				1013	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
				1014
				1015	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(
				1016	batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()),
				1017	next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels())));
				1018	std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());
				1019	std::vector<float> bias(groups() * group_output_channels());
				1020	std::vector<float> output(std::max(
				1021	batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()),
				1022	next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels())));
				1023	std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());
				1024	std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());
				1025
				1026	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				1027	std::generate(input.begin(), input.end(), std::ref(f32rng));
				1028	std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
				1029	std::generate(bias.begin(), bias.end(), std::ref(f32rng));
				1030	std::fill(output.begin(), output.end(), nanf(""));
				1031
				1032	// Compute reference results, without clamping.
				1033	for (size_t i = 0; i < batch_size(); i++) {
				1034	for (size_t oy = 0; oy < output_height(); oy++) {
				1035	for (size_t ox = 0; ox < output_width(); ox++) {
				1036	for (size_t g = 0; g < groups(); g++) {
				1037	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				1038	output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
				1039	bias[g * group_output_channels() + oc];
				1040	}
				1041	}
				1042	}
				1043	}
				1044	}
				1045	for (size_t i = 0; i < batch_size(); i++) {
				1046	for (size_t oy = 0; oy < output_height(); oy++) {
				1047	for (size_t ox = 0; ox < output_width(); ox++) {
				1048	for (size_t ky = 0; ky < kernel_height(); ky++) {
				1049	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				1050	if (iy < input_height()) {
				1051	for (size_t kx = 0; kx < kernel_width(); kx++) {
				1052	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				1053	if (ix < input_width()) {
				1054	for (size_t g = 0; g < groups(); g++) {
				1055	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				1056	for (size_t ic = 0; ic < group_input_channels(); ic++) {
				1057	output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				1058	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
				1059	kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
				1060	}
				1061	}
				1062	}
				1063	}
				1064	}
				1065	}
				1066	}
				1067	}
				1068	}
				1069	}
				1070
				1071	// Compute clamping parameters.
				1072	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				1073	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				1074
				1075	const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
				1076	const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
				1077
				1078	// Clamp reference results.
				1079	for (float& value : output_ref) {
				1080	value = std::max(std::min(value, output_max), output_min);
				1081	}
				1082
				1083	// Create, setup, and run Convolution operator once.
				1084	ASSERT_EQ(xnn_status_success, xnn_initialize());
				1085	xnn_operator_t convolution_op = nullptr;
				1086
				1087	ASSERT_EQ(xnn_status_success,
				1088	xnn_create_convolution2d_nhwc_f32(
				1089	padding_top(), padding_right(), padding_bottom(), padding_left(),
				1090	kernel_height(), kernel_width(),
				1091	subsampling_height(), subsampling_width(),
				1092	dilation_height(), dilation_width(),
				1093	groups(), group_input_channels(), group_output_channels(),
				1094	input_pixel_stride(), output_pixel_stride(),
				1095	kernel.data(), bias.data(),
				1096	output_min, output_max,
				1097	0, &convolution_op));
				1098
				1099	// Smart pointer to automatically delete convolution_op.
				1100	std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);
				1101
				1102	ASSERT_EQ(xnn_status_success,
				1103	xnn_setup_convolution2d_nhwc_f32(
				1104	convolution_op,
				1105	batch_size(), input_height(), input_width(),
				1106	input.data(), output.data(),
				1107	nullptr /* thread pool */));
				1108
				1109	ASSERT_EQ(xnn_status_success,
				1110	xnn_run_operator(convolution_op, nullptr /* thread pool */));
				1111
				1112	// Verify results of the first run.
				1113	for (size_t i = 0; i < batch_size(); i++) {
				1114	for (size_t y = 0; y < output_height(); y++) {
				1115	for (size_t x = 0; x < output_width(); x++) {
				1116	for (size_t g = 0; g < groups(); g++) {
				1117	for (size_t c = 0; c < group_output_channels(); c++) {
				1118	ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
				1119	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1120	ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
				1121	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1122	ASSERT_NEAR(
				1123	output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
				1124	output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
				1125	1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
				1126	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1127	}
				1128	}
				1129	}
				1130	}
				1131	}
				1132
				1133	// Re-generate data for the second run.
				1134	std::generate(input.begin(), input.end(), std::ref(f32rng));
				1135	std::fill(output.begin(), output.end(), nanf(""));
				1136
				1137	// Compute reference results for the second run, including clamping.
				1138	for (size_t i = 0; i < next_batch_size(); i++) {
				1139	for (size_t oy = 0; oy < next_output_height(); oy++) {
				1140	for (size_t ox = 0; ox < next_output_width(); ox++) {
				1141	for (size_t g = 0; g < groups(); g++) {
				1142	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				1143	next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
				1144	bias[g * group_output_channels() + oc];
				1145	}
				1146	}
				1147	}
				1148	}
				1149	}
				1150	for (size_t i = 0; i < next_batch_size(); i++) {
				1151	for (size_t oy = 0; oy < next_output_height(); oy++) {
				1152	for (size_t ox = 0; ox < next_output_width(); ox++) {
				1153	for (size_t ky = 0; ky < kernel_height(); ky++) {
				1154	const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();
				1155	if (iy < next_input_height()) {
				1156	for (size_t kx = 0; kx < kernel_width(); kx++) {
				1157	const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();
				1158	if (ix < next_input_width()) {
				1159	for (size_t g = 0; g < groups(); g++) {
				1160	for (size_t oc = 0; oc < group_output_channels(); oc++) {
				1161	for (size_t ic = 0; ic < group_input_channels(); ic++) {
				1162	next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=
				1163	input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *
				1164	kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];
				1165	}
				1166	}
				1167	}
				1168	}
				1169	}
				1170	}
				1171	}
				1172	}
				1173	}
				1174	}
				1175	for (float& value : next_output_ref) {
				1176	value = std::max(std::min(value, output_max), output_min);
				1177	}
				1178
				1179	// Setup and run Convolution operator the second time, and destroy the operator.
				1180	ASSERT_EQ(xnn_status_success,
				1181	xnn_setup_convolution2d_nhwc_f32(
				1182	convolution_op,
				1183	next_batch_size(), next_input_height(), next_input_width(),
				1184	input.data(), output.data(),
				1185	nullptr /* thread pool */));
				1186
				1187	ASSERT_EQ(xnn_status_success,
				1188	xnn_run_operator(convolution_op, nullptr /* thread pool */));
				1189
				1190	// Verify results of the second run.
				1191	for (size_t i = 0; i < next_batch_size(); i++) {
				1192	for (size_t y = 0; y < next_output_height(); y++) {
				1193	for (size_t x = 0; x < next_output_width(); x++) {
				1194	for (size_t g = 0; g < groups(); g++) {
				1195	for (size_t c = 0; c < group_output_channels(); c++) {
				1196	ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)
				1197	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1198	ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)
				1199	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1200	ASSERT_NEAR(
				1201	next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
				1202	output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],
				1203	1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
				1204	<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
				1205	}
				1206	}
				1207	}
				1208	}
				1209	}
				1210	}
				1211	}
				1212
				1213	private:
				1214	uint32_t padding_top_{0};
				1215	uint32_t padding_right_{0};
				1216	uint32_t padding_bottom_{0};
				1217	uint32_t padding_left_{0};
Marat Dukhan	8440fde	2019-10-24 12:46:13 -0700	[diff] [blame^]	1218	bool padding_tf_same_{false};
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1219	size_t input_height_{1};
				1220	size_t input_width_{1};
				1221	uint32_t groups_{1};
				1222	size_t group_input_channels_{1};
				1223	size_t input_pixel_stride_{0};
				1224	size_t group_output_channels_{1};
				1225	size_t output_pixel_stride_{0};
				1226	size_t batch_size_{1};
				1227	uint32_t kernel_height_{1};
				1228	uint32_t kernel_width_{1};
				1229	uint32_t dilation_height_{1};
				1230	uint32_t dilation_width_{1};
				1231	uint32_t subsampling_height_{1};
				1232	uint32_t subsampling_width_{1};
				1233	size_t next_input_height_{0};
				1234	size_t next_input_width_{0};
				1235	size_t next_batch_size_{0};
				1236	uint8_t qmin_{0};
				1237	uint8_t qmax_{255};
				1238	bool depthwise_layout_{false};
				1239	size_t iterations_{1};
				1240	};