Blame - test/conv-hwc2spchw-microkernel-tester.h - platform/external/XNNPACK

blob: 91666ceee311d5d45cd80dc4e1ad7bfa429122f2 [file] [log] [blame]

XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1	// Copyright 2019 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#pragma once
				7
				8	#include <gtest/gtest.h>
				9
				10	#include <algorithm>
				11	#include <cassert>
				12	#include <cmath>
				13	#include <cstddef>
				14	#include <cstdlib>
				15	#include <functional>
				16	#include <limits>
				17	#include <random>
				18	#include <vector>
				19
				20	#include <xnnpack/AlignedAllocator.h>
				21	#include <xnnpack/pack.h>
Marat Dukhan	eeaa7bd	2019-10-25 17:31:25 -0700	[diff] [blame]	22	#include <xnnpack/params-init.h>
Frank Barchard	e0601b5	2019-10-25 17:43:34 -0700	[diff] [blame]	23	#include <xnnpack/params.h>
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	24	#include <xnnpack.h>
				25
				26
				27	class ConvHWC2SpCHWMicrokernelTester {
				28	public:
				29	enum class Variant {
				30	Native,
				31	Scalar,
				32	};
				33
				34	inline ConvHWC2SpCHWMicrokernelTester& output_channels_tile(uint32_t output_channels_tile) {
				35	this->output_channels_tile_ = output_channels_tile;
				36	return *this;
				37	}
				38
				39	inline uint32_t output_channels_tile() const {
				40	return this->output_channels_tile_;
				41	}
				42
				43	inline ConvHWC2SpCHWMicrokernelTester& padding(uint32_t padding) {
				44	this->padding_top_ = padding;
				45	this->padding_right_ = padding;
				46	this->padding_bottom_ = padding;
				47	this->padding_left_ = padding;
				48	return *this;
				49	}
				50
				51	inline ConvHWC2SpCHWMicrokernelTester& padding_height(uint32_t padding_height) {
				52	this->padding_top_ = padding_height;
				53	this->padding_bottom_ = padding_height;
				54	return *this;
				55	}
				56
				57	inline ConvHWC2SpCHWMicrokernelTester& padding_width(uint32_t padding_width) {
				58	this->padding_right_ = padding_width;
				59	this->padding_left_ = padding_width;
				60	return *this;
				61	}
				62
				63	inline ConvHWC2SpCHWMicrokernelTester& padding_top(uint32_t padding_top) {
				64	this->padding_top_ = padding_top;
				65	return *this;
				66	}
				67
				68	inline uint32_t padding_top() const {
				69	return this->padding_top_;
				70	}
				71
				72	inline ConvHWC2SpCHWMicrokernelTester& padding_right(uint32_t padding_right) {
				73	this->padding_right_ = padding_right;
				74	return *this;
				75	}
				76
				77	inline uint32_t padding_right() const {
				78	return this->padding_right_;
				79	}
				80
				81	inline ConvHWC2SpCHWMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
				82	this->padding_bottom_ = padding_bottom;
				83	return *this;
				84	}
				85
				86	inline uint32_t padding_bottom() const {
				87	return this->padding_bottom_;
				88	}
				89
				90	inline ConvHWC2SpCHWMicrokernelTester& padding_left(uint32_t padding_left) {
				91	this->padding_left_ = padding_left;
				92	return *this;
				93	}
				94
				95	inline uint32_t padding_left() const {
				96	return this->padding_left_;
				97	}
				98
				99	inline ConvHWC2SpCHWMicrokernelTester& input_size(uint32_t input_height, uint32_t input_width) {
				100	assert(input_height >= 1);
				101	assert(input_width >= 1);
				102	this->input_height_ = input_height;
				103	this->input_width_ = input_width;
				104	return *this;
				105	}
				106
				107	inline ConvHWC2SpCHWMicrokernelTester& input_height(uint32_t input_height) {
				108	assert(input_height >= 1);
				109	this->input_height_ = input_height;
				110	return *this;
				111	}
				112
				113	inline uint32_t input_height() const {
				114	return this->input_height_;
				115	}
				116
				117	inline ConvHWC2SpCHWMicrokernelTester& input_width(uint32_t input_width) {
				118	assert(input_width >= 1);
				119	this->input_width_ = input_width;
				120	return *this;
				121	}
				122
				123	inline uint32_t input_width() const {
				124	return this->input_width_;
				125	}
				126
				127	inline ConvHWC2SpCHWMicrokernelTester& input_channels(size_t input_channels) {
				128	assert(input_channels >= 1);
				129	this->input_channels_ = input_channels;
				130	return *this;
				131	}
				132
				133	inline size_t input_channels() const {
				134	return this->input_channels_;
				135	}
				136
				137	inline ConvHWC2SpCHWMicrokernelTester& output_channels(size_t output_channels) {
				138	assert(output_channels >= 1);
				139	this->output_channels_ = output_channels;
				140	return *this;
				141	}
				142
				143	inline size_t output_channels() const {
				144	return this->output_channels_;
				145	}
				146
				147	inline size_t packed_output_channels() const {
				148	return output_channels() % output_channels_tile() == 0 ? output_channels() : output_channels() / output_channels_tile() * output_channels_tile() + output_channels_tile();
				149	}
				150
				151	inline ConvHWC2SpCHWMicrokernelTester& batch_size(size_t batch_size) {
				152	assert(batch_size >= 1);
				153	this->batch_size_ = batch_size;
				154	return *this;
				155	}
				156
				157	inline size_t batch_size() const {
				158	return this->batch_size_;
				159	}
				160
				161	inline ConvHWC2SpCHWMicrokernelTester& kernel_size(uint32_t kernel_size) {
				162	assert(kernel_size >= 1);
				163	this->kernel_height_ = kernel_size;
				164	this->kernel_width_ = kernel_size;
				165	return *this;
				166	}
				167
				168	inline ConvHWC2SpCHWMicrokernelTester& kernel_height(uint32_t kernel_height) {
				169	assert(kernel_height >= 1);
				170	this->kernel_height_ = kernel_height;
				171	return *this;
				172	}
				173
				174	inline uint32_t kernel_height() const {
				175	return this->kernel_height_;
				176	}
				177
				178	inline ConvHWC2SpCHWMicrokernelTester& kernel_width(uint32_t kernel_width) {
				179	assert(kernel_width >= 1);
				180	this->kernel_width_ = kernel_width;
				181	return *this;
				182	}
				183
				184	inline uint32_t kernel_width() const {
				185	return this->kernel_width_;
				186	}
				187
				188	inline ConvHWC2SpCHWMicrokernelTester& subsampling(uint32_t subsampling) {
				189	assert(subsampling >= 1);
				190	this->subsampling_height_ = subsampling;
				191	this->subsampling_width_ = subsampling;
				192	return *this;
				193	}
				194
				195	inline ConvHWC2SpCHWMicrokernelTester& subsampling_height(uint32_t subsampling_height) {
				196	assert(subsampling_height >= 1);
				197	this->subsampling_height_ = subsampling_height;
				198	return *this;
				199	}
				200
				201	inline uint32_t subsampling_height() const {
				202	return this->subsampling_height_;
				203	}
				204
				205	inline ConvHWC2SpCHWMicrokernelTester& subsampling_width(uint32_t subsampling_width) {
				206	assert(subsampling_width >= 1);
				207	this->subsampling_width_ = subsampling_width;
				208	return *this;
				209	}
				210
				211	inline uint32_t subsampling_width() const {
				212	return this->subsampling_width_;
				213	}
				214
				215	inline ConvHWC2SpCHWMicrokernelTester& output_y_start(uint32_t output_y_start) {
				216	this->output_y_start_ = output_y_start;
				217	return *this;
				218	}
				219
				220	inline uint32_t output_y_start() const {
				221	return this->output_y_start_;
				222	}
				223
				224	inline ConvHWC2SpCHWMicrokernelTester& output_y_end(uint32_t output_y_end) {
				225	this->output_y_end_ = output_y_end;
				226	return *this;
				227	}
				228
				229	inline uint32_t output_y_end() const {
				230	if (this->output_y_end_ == std::numeric_limits<uint32_t>::max()) {
				231	return output_height();
				232	} else {
				233	return this->output_y_end_;
				234	}
				235	}
				236
				237	inline size_t input_pixel_stride() const {
				238	return input_channels();
				239	}
				240
				241	inline size_t output_pixel_stride() const {
				242	return output_channels();
				243	}
				244
				245	inline size_t output_height() const {
				246	const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
Marat Dukhan	441e221	2019-12-04 18:30:49 -0800	[diff] [blame]	247	if (padded_input_height < kernel_height()) {
				248	return 0;
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	249	} else {
				250	return (padded_input_height - kernel_height()) / subsampling_height() + 1;
				251	}
				252	}
				253
				254	inline size_t output_width() const {
				255	const size_t padded_input_width = padding_left() + input_width() + padding_right();
Marat Dukhan	441e221	2019-12-04 18:30:49 -0800	[diff] [blame]	256	if (padded_input_width < kernel_width()) {
				257	return 0;
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	258	} else {
				259	return (padded_input_width - kernel_width()) / subsampling_width() + 1;
				260	}
				261	}
				262
				263	inline ConvHWC2SpCHWMicrokernelTester& qmin(uint8_t qmin) {
				264	this->qmin_ = qmin;
				265	return *this;
				266	}
				267
				268	inline uint8_t qmin() const {
				269	return this->qmin_;
				270	}
				271
				272	inline ConvHWC2SpCHWMicrokernelTester& qmax(uint8_t qmax) {
				273	this->qmax_ = qmax;
				274	return *this;
				275	}
				276
				277	inline uint8_t qmax() const {
				278	return this->qmax_;
				279	}
				280
				281	inline ConvHWC2SpCHWMicrokernelTester& iterations(size_t iterations) {
				282	this->iterations_ = iterations;
				283	return *this;
				284	}
				285
				286	inline size_t iterations() const {
				287	return this->iterations_;
				288	}
				289
				290	void Test(xnn_f32_conv_hwc2spchw_ukernel_function conv, Variant variant = Variant::Native) const {
				291	ASSERT_LT(output_y_start(), output_height());
				292	ASSERT_LE(output_y_end(), output_height());
				293	ASSERT_GT(output_y_end(), output_y_start());
Marat Dukhan	441e221	2019-12-04 18:30:49 -0800	[diff] [blame]	294	ASSERT_GE(output_width(), 1);
				295	ASSERT_GE(output_height(), 1);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	296
				297	std::random_device random_device;
				298	auto rng = std::mt19937(random_device());
				299	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
				300
				301	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
				302	batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + input_channels()));
				303	std::vector<float> zero(XNN_EXTRA_BYTES / sizeof(float) + input_width() * input_channels());
				304	std::vector<float> kernel(output_channels() * kernel_height() * kernel_width() * input_channels());
				305	std::vector<float> bias(output_channels());
				306	std::vector<float> output(batch_size() * output_channels() * output_height() * output_width());
				307	std::vector<float> output_ref(batch_size() * output_channels() * output_height() * output_width());
Marat Dukhan	9594db0	2019-12-05 14:32:37 -0800	[diff] [blame]	308	std::vector<float, AlignedAllocator<float, 64>> packed_weights((input_channels() * kernel_height() * kernel_width() + 1) * packed_output_channels());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	309
				310	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				311	std::generate(input.begin(), input.end(), std::ref(f32rng));
				312	std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
				313	std::generate(bias.begin(), bias.end(), std::ref(f32rng));
				314	std::fill(output.begin(), output.end(), nanf(""));
				315	std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
				316
				317	xnn_pack_f32_dconv_oki_w(
				318	output_channels(),
				319	input_channels(),
				320	output_channels_tile(),
				321	kernel_height(), kernel_width(),
				322	kernel.data(), bias.data(), packed_weights.data());
				323
				324	// Compute reference results, without clamping.
				325	for (size_t i = 0; i < batch_size(); i++) {
				326	for (size_t oy = 0; oy < output_height(); oy++) {
				327	for (size_t ox = 0; ox < output_width(); ox++) {
				328	for (size_t oc = 0; oc < output_channels(); oc++) {
				329	float acc = bias[oc];
				330	for (size_t ky = 0; ky < kernel_height(); ky++) {
				331	const size_t iy = oy * subsampling_height() + ky - padding_top();
				332	if (iy < input_height()) {
				333	for (size_t kx = 0; kx < kernel_width(); kx++) {
				334	const size_t ix = ox * subsampling_width() + kx - padding_left();
				335	if (ix < input_width()) {
				336	for (size_t ic = 0; ic < input_channels(); ic++) {
				337	acc +=
				338	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + ic] *
				339	kernel[((oc * kernel_height() + ky) * kernel_width() + kx) * input_channels() + ic];
				340	}
				341	}
				342	}
				343	}
				344	}
				345	output_ref[((i * output_channels() + oc) * output_height() + oy) * output_width() + ox] = acc;
				346	}
				347	}
				348	}
				349	}
				350
				351	// Compute clamping parameters.
				352	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				353	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				354
				355	const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
				356	const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
				357
				358	// Clamp reference results.
				359	for (float& value : output_ref) {
				360	value = std::max(std::min(value, output_max), output_min);
				361	}
				362
				363	// Prepare output parameters.
				364	xnn_f32_output_params output_params = { };
				365	switch (variant) {
				366	case Variant::Native:
Marat Dukhan	eeaa7bd	2019-10-25 17:31:25 -0700	[diff] [blame]	367	output_params = xnn_init_f32_output_params(output_min, output_max);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	368	break;
				369	case Variant::Scalar:
Marat Dukhan	eeaa7bd	2019-10-25 17:31:25 -0700	[diff] [blame]	370	output_params = xnn_init_scalar_f32_output_params(output_min, output_max);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	371	break;
				372	}
				373
				374	// Call optimized micro-kernel.
				375	conv(
				376	input_height(), input_width(),
				377	output_y_start(), output_y_end(),
				378	input.data(), zero.data(), packed_weights.data(), output.data(),
				379	padding_top(), output_channels(),
				380	output_width() * sizeof(float),
				381	output_height() * output_width() * sizeof(float),
				382	&output_params);
				383
				384	// Verify results.
				385	for (size_t i = 0; i < batch_size(); i++) {
				386	for (size_t y = output_y_start(); y < output_y_end(); y++) {
				387	for (size_t x = 0; x < output_width(); x++) {
				388	for (size_t c = 0; c < output_channels(); c++) {
				389	ASSERT_GE(output[((i * output_channels() + c) * output_height() + y) * output_width() + x], output_min)
				390	<< "(x, y) = (" << x << ", " << y << "), channel = " << c;
				391	ASSERT_LE(output[((i * output_channels() + c) * output_height() + y) * output_width() + x], output_max)
				392	<< "(x, y) = (" << x << ", " << y << "), channel = " << c;
				393	ASSERT_NEAR(
				394	output_ref[((i * output_channels() + c) * output_height() + y) * output_width() + x],
				395	output[((i * output_channels() + c) * output_height() + y) * output_width() + x],
				396	1.0e-4 * std::abs(output_ref[((i * output_channels() + c) * output_height() + y) * output_width() + x]))
				397	<< "(x, y) = (" << x << ", " << y << "), channel = " << c;
				398	}
				399	}
				400	}
				401	}
				402	}
				403	}
				404
				405	private:
				406	uint32_t padding_top_{0};
				407	uint32_t padding_right_{0};
				408	uint32_t padding_bottom_{0};
				409	uint32_t padding_left_{0};
				410	size_t input_height_{1};
				411	size_t input_width_{1};
				412	size_t input_channels_{1};
				413	size_t output_channels_{1};
				414	uint32_t output_channels_tile_{1};
				415	size_t batch_size_{1};
				416	uint32_t kernel_height_{1};
				417	uint32_t kernel_width_{1};
				418	uint32_t subsampling_height_{1};
				419	uint32_t subsampling_width_{1};
				420	uint32_t output_y_start_{0};
				421	uint32_t output_y_end_{std::numeric_limits<uint32_t>::max()};
				422	uint8_t qmin_{0};
				423	uint8_t qmax_{255};
				424	size_t iterations_{1};
				425	};