Blame - test/conv-hwc-microkernel-tester.h - platform/external/XNNPACK

blob: e8235ff418f9b91fa52b1083ccb1426ddc53ebb9 [file] [log] [blame]

XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1	// Copyright 2019 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#pragma once
				7
				8	#include <gtest/gtest.h>
				9
				10	#include <algorithm>
				11	#include <cassert>
				12	#include <cmath>
				13	#include <cstddef>
				14	#include <cstdlib>
				15	#include <functional>
				16	#include <limits>
				17	#include <random>
				18	#include <vector>
				19
				20	#include <xnnpack/AlignedAllocator.h>
				21	#include <xnnpack/pack.h>
Marat Dukhan	eeaa7bd	2019-10-25 17:31:25 -0700	[diff] [blame]	22	#include <xnnpack/params-init.h>
Frank Barchard	e0601b5	2019-10-25 17:43:34 -0700	[diff] [blame]	23	#include <xnnpack/params.h>
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	24	#include <xnnpack.h>
				25
				26
				27	class ConvHWCMicrokernelTester {
				28	public:
				29	enum class Variant {
				30	Native,
				31	Scalar,
				32	};
				33
				34	inline ConvHWCMicrokernelTester& output_channels_tile(uint32_t output_channels_tile) {
				35	this->output_channels_tile_ = output_channels_tile;
				36	return *this;
				37	}
				38
				39	inline uint32_t output_channels_tile() const {
				40	return this->output_channels_tile_;
				41	}
				42
				43	inline ConvHWCMicrokernelTester& padding(uint32_t padding) {
				44	this->padding_top_ = padding;
				45	this->padding_right_ = padding;
				46	this->padding_bottom_ = padding;
				47	this->padding_left_ = padding;
				48	return *this;
				49	}
				50
				51	inline ConvHWCMicrokernelTester& padding_height(uint32_t padding_height) {
				52	this->padding_top_ = padding_height;
				53	this->padding_bottom_ = padding_height;
				54	return *this;
				55	}
				56
				57	inline ConvHWCMicrokernelTester& padding_width(uint32_t padding_width) {
				58	this->padding_right_ = padding_width;
				59	this->padding_left_ = padding_width;
				60	return *this;
				61	}
				62
				63	inline ConvHWCMicrokernelTester& padding_top(uint32_t padding_top) {
				64	this->padding_top_ = padding_top;
				65	return *this;
				66	}
				67
				68	inline uint32_t padding_top() const {
				69	return this->padding_top_;
				70	}
				71
				72	inline ConvHWCMicrokernelTester& padding_right(uint32_t padding_right) {
				73	this->padding_right_ = padding_right;
				74	return *this;
				75	}
				76
				77	inline uint32_t padding_right() const {
				78	return this->padding_right_;
				79	}
				80
				81	inline ConvHWCMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
				82	this->padding_bottom_ = padding_bottom;
				83	return *this;
				84	}
				85
				86	inline uint32_t padding_bottom() const {
				87	return this->padding_bottom_;
				88	}
				89
				90	inline ConvHWCMicrokernelTester& padding_left(uint32_t padding_left) {
				91	this->padding_left_ = padding_left;
				92	return *this;
				93	}
				94
				95	inline uint32_t padding_left() const {
				96	return this->padding_left_;
				97	}
				98
				99	inline ConvHWCMicrokernelTester& input_size(uint32_t input_height, uint32_t input_width) {
				100	assert(input_height >= 1);
				101	assert(input_width >= 1);
				102	this->input_height_ = input_height;
				103	this->input_width_ = input_width;
				104	return *this;
				105	}
				106
				107	inline ConvHWCMicrokernelTester& input_height(uint32_t input_height) {
				108	assert(input_height >= 1);
				109	this->input_height_ = input_height;
				110	return *this;
				111	}
				112
				113	inline uint32_t input_height() const {
				114	return this->input_height_;
				115	}
				116
				117	inline ConvHWCMicrokernelTester& input_width(uint32_t input_width) {
				118	assert(input_width >= 1);
				119	this->input_width_ = input_width;
				120	return *this;
				121	}
				122
				123	inline uint32_t input_width() const {
				124	return this->input_width_;
				125	}
				126
				127	inline ConvHWCMicrokernelTester& input_channels(size_t input_channels) {
				128	assert(input_channels >= 1);
				129	this->input_channels_ = input_channels;
				130	return *this;
				131	}
				132
				133	inline size_t input_channels() const {
				134	return this->input_channels_;
				135	}
				136
				137	inline ConvHWCMicrokernelTester& output_channels(size_t output_channels) {
				138	assert(output_channels >= 1);
				139	this->output_channels_ = output_channels;
				140	return *this;
				141	}
				142
				143	inline size_t output_channels() const {
				144	return this->output_channels_;
				145	}
				146
				147	inline size_t packed_output_channels() const {
				148	return output_channels() % output_channels_tile() == 0 ? output_channels() : output_channels() / output_channels_tile() * output_channels_tile() + output_channels_tile();
				149	}
				150
				151	inline ConvHWCMicrokernelTester& batch_size(size_t batch_size) {
				152	assert(batch_size >= 1);
				153	this->batch_size_ = batch_size;
				154	return *this;
				155	}
				156
				157	inline size_t batch_size() const {
				158	return this->batch_size_;
				159	}
				160
				161	inline ConvHWCMicrokernelTester& kernel_size(uint32_t kernel_size) {
				162	assert(kernel_size >= 1);
				163	this->kernel_height_ = kernel_size;
				164	this->kernel_width_ = kernel_size;
				165	return *this;
				166	}
				167
				168	inline ConvHWCMicrokernelTester& kernel_height(uint32_t kernel_height) {
				169	assert(kernel_height >= 1);
				170	this->kernel_height_ = kernel_height;
				171	return *this;
				172	}
				173
				174	inline uint32_t kernel_height() const {
				175	return this->kernel_height_;
				176	}
				177
				178	inline ConvHWCMicrokernelTester& kernel_width(uint32_t kernel_width) {
				179	assert(kernel_width >= 1);
				180	this->kernel_width_ = kernel_width;
				181	return *this;
				182	}
				183
				184	inline uint32_t kernel_width() const {
				185	return this->kernel_width_;
				186	}
				187
				188	inline ConvHWCMicrokernelTester& subsampling(uint32_t subsampling) {
				189	assert(subsampling >= 1);
				190	this->subsampling_height_ = subsampling;
				191	this->subsampling_width_ = subsampling;
				192	return *this;
				193	}
				194
				195	inline ConvHWCMicrokernelTester& subsampling_height(uint32_t subsampling_height) {
				196	assert(subsampling_height >= 1);
				197	this->subsampling_height_ = subsampling_height;
				198	return *this;
				199	}
				200
				201	inline uint32_t subsampling_height() const {
				202	return this->subsampling_height_;
				203	}
				204
				205	inline ConvHWCMicrokernelTester& subsampling_width(uint32_t subsampling_width) {
				206	assert(subsampling_width >= 1);
				207	this->subsampling_width_ = subsampling_width;
				208	return *this;
				209	}
				210
				211	inline uint32_t subsampling_width() const {
				212	return this->subsampling_width_;
				213	}
				214
				215	inline ConvHWCMicrokernelTester& output_y_start(uint32_t output_y_start) {
				216	this->output_y_start_ = output_y_start;
				217	return *this;
				218	}
				219
				220	inline uint32_t output_y_start() const {
				221	return this->output_y_start_;
				222	}
				223
				224	inline ConvHWCMicrokernelTester& output_y_end(uint32_t output_y_end) {
				225	this->output_y_end_ = output_y_end;
				226	return *this;
				227	}
				228
				229	inline uint32_t output_y_end() const {
				230	if (this->output_y_end_ == std::numeric_limits<uint32_t>::max()) {
				231	return output_height();
				232	} else {
				233	return this->output_y_end_;
				234	}
				235	}
				236
				237	inline size_t input_pixel_stride() const {
				238	return input_channels();
				239	}
				240
				241	inline size_t output_pixel_stride() const {
				242	return output_channels();
				243	}
				244
				245	inline size_t output_height() const {
				246	const size_t padded_input_height = padding_top() + input_height() + padding_bottom();
Marat Dukhan	7e4ca40	2020-05-15 18:50:12 -0700	[diff] [blame]	247	return (std::max<size_t>(padded_input_height + subsampling_height(), kernel_height()) - kernel_height())
				248	/ subsampling_height();
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	249	}
				250
				251	inline size_t output_width() const {
				252	const size_t padded_input_width = padding_left() + input_width() + padding_right();
Marat Dukhan	7e4ca40	2020-05-15 18:50:12 -0700	[diff] [blame]	253	return (std::max<size_t>(padded_input_width + subsampling_width(), kernel_width()) - kernel_width())
				254	/ subsampling_width();
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	255	}
				256
				257	inline ConvHWCMicrokernelTester& qmin(uint8_t qmin) {
				258	this->qmin_ = qmin;
				259	return *this;
				260	}
				261
				262	inline uint8_t qmin() const {
				263	return this->qmin_;
				264	}
				265
				266	inline ConvHWCMicrokernelTester& qmax(uint8_t qmax) {
				267	this->qmax_ = qmax;
				268	return *this;
				269	}
				270
				271	inline uint8_t qmax() const {
				272	return this->qmax_;
				273	}
				274
				275	inline ConvHWCMicrokernelTester& iterations(size_t iterations) {
				276	this->iterations_ = iterations;
				277	return *this;
				278	}
				279
				280	inline size_t iterations() const {
				281	return this->iterations_;
				282	}
				283
				284	void Test(xnn_f32_conv_hwc_ukernel_function conv, Variant variant = Variant::Native) const {
				285	ASSERT_LT(output_y_start(), output_height());
				286	ASSERT_LE(output_y_end(), output_height());
				287	ASSERT_GT(output_y_end(), output_y_start());
Marat Dukhan	441e221	2019-12-04 18:30:49 -0800	[diff] [blame]	288	ASSERT_GE(output_width(), 1);
				289	ASSERT_GE(output_height(), 1);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	290
				291	std::random_device random_device;
				292	auto rng = std::mt19937(random_device());
				293	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
				294
				295	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
				296	batch_size() * ((input_height() * input_width() - 1) * input_pixel_stride() + input_channels()));
				297	std::vector<float> zero(XNN_EXTRA_BYTES / sizeof(float) + input_width() * input_channels());
				298	std::vector<float> kernel(output_channels() * kernel_height() * kernel_width() * input_channels());
				299	std::vector<float> bias(output_channels());
				300	std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_pixel_stride() + output_channels()));
				301	std::vector<float> output_ref(batch_size() * output_height() * output_width() * output_channels());
Marat Dukhan	9594db0	2019-12-05 14:32:37 -0800	[diff] [blame]	302	std::vector<float, AlignedAllocator<float, 64>> packed_weights((input_channels() * kernel_height() * kernel_width() + 1) * packed_output_channels());
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	303
				304	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				305	std::generate(input.begin(), input.end(), std::ref(f32rng));
				306	std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
				307	std::generate(bias.begin(), bias.end(), std::ref(f32rng));
				308	std::fill(output.begin(), output.end(), nanf(""));
				309	std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
				310
				311	xnn_pack_f32_dconv_oki_w(
				312	output_channels(),
				313	input_channels(),
				314	output_channels_tile(),
				315	kernel_height(), kernel_width(),
Marat Dukhan	b42f866	2020-07-06 20:46:13 -0700	[diff] [blame]	316	kernel.data(), bias.data(), packed_weights.data(), nullptr);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	317
				318	// Compute reference results, without clamping.
				319	for (size_t i = 0; i < batch_size(); i++) {
				320	for (size_t oy = 0; oy < output_height(); oy++) {
				321	for (size_t ox = 0; ox < output_width(); ox++) {
				322	for (size_t oc = 0; oc < output_channels(); oc++) {
				323	float acc = bias[oc];
				324	for (size_t ky = 0; ky < kernel_height(); ky++) {
				325	const size_t iy = oy * subsampling_height() + ky - padding_top();
				326	if (iy < input_height()) {
				327	for (size_t kx = 0; kx < kernel_width(); kx++) {
				328	const size_t ix = ox * subsampling_width() + kx - padding_left();
				329	if (ix < input_width()) {
				330	for (size_t ic = 0; ic < input_channels(); ic++) {
				331	acc +=
				332	input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + ic] *
				333	kernel[((oc * kernel_height() + ky) * kernel_width() + kx) * input_channels() + ic];
				334	}
				335	}
				336	}
				337	}
				338	}
				339	output_ref[((i * output_height() + oy) * output_width() + ox) * output_channels() + oc] = acc;
				340	}
				341	}
				342	}
				343	}
				344
				345	// Compute clamping parameters.
				346	const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
				347	const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
				348
				349	const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());
				350	const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());
				351
				352	// Clamp reference results.
				353	for (float& value : output_ref) {
				354	value = std::max(std::min(value, output_max), output_min);
				355	}
				356
Frank Barchard	9f3a843	2020-06-02 13:59:35 -0700	[diff] [blame]	357	// Prepare parameters.
Frank Barchard	e70dbeb	2020-05-01 15:46:41 -0700	[diff] [blame]	358	xnn_f32_minmax_params params = { };
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	359	switch (variant) {
				360	case Variant::Native:
Frank Barchard	e70dbeb	2020-05-01 15:46:41 -0700	[diff] [blame]	361	params = xnn_init_f32_minmax_params(output_min, output_max);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	362	break;
				363	case Variant::Scalar:
Frank Barchard	e70dbeb	2020-05-01 15:46:41 -0700	[diff] [blame]	364	params = xnn_init_scalar_f32_minmax_params(output_min, output_max);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	365	break;
				366	}
				367
				368	// Call optimized micro-kernel.
				369	conv(
				370	input_height(), input_width(),
				371	output_y_start(), output_y_end(),
				372	input.data(), zero.data(), packed_weights.data(), output.data(),
				373	padding_top(), output_channels(),
				374	output_pixel_stride() * output_width() * sizeof(float),
				375	output_pixel_stride() * sizeof(float),
Frank Barchard	e70dbeb	2020-05-01 15:46:41 -0700	[diff] [blame]	376	&params);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	377
				378	// Verify results.
				379	for (size_t i = 0; i < batch_size(); i++) {
				380	for (size_t y = output_y_start(); y < output_y_end(); y++) {
				381	for (size_t x = 0; x < output_width(); x++) {
				382	for (size_t c = 0; c < output_channels(); c++) {
				383	ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_min)
				384	<< "(x, y) = (" << x << ", " << y << "), channel = " << c;
				385	ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c], output_max)
				386	<< "(x, y) = (" << x << ", " << y << "), channel = " << c;
				387	ASSERT_NEAR(
				388	output_ref[((i * output_height() + y) * output_width() + x) * output_channels() + c],
				389	output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + c],
				390	1.0e-4 * std::abs(output_ref[((i * output_height() + y) * output_width() + x) * output_channels() + c]))
				391	<< "(x, y) = (" << x << ", " << y << "), channel = " << c;
				392	}
				393	}
				394	}
				395	}
				396	}
				397	}
				398
				399	private:
				400	uint32_t padding_top_{0};
				401	uint32_t padding_right_{0};
				402	uint32_t padding_bottom_{0};
				403	uint32_t padding_left_{0};
				404	size_t input_height_{1};
				405	size_t input_width_{1};
				406	size_t input_channels_{1};
				407	size_t output_channels_{1};
				408	uint32_t output_channels_tile_{1};
				409	size_t batch_size_{1};
				410	uint32_t kernel_height_{1};
				411	uint32_t kernel_width_{1};
				412	uint32_t subsampling_height_{1};
				413	uint32_t subsampling_width_{1};
				414	uint32_t output_y_start_{0};
				415	uint32_t output_y_end_{std::numeric_limits<uint32_t>::max()};
				416	uint8_t qmin_{0};
				417	uint8_t qmax_{255};
				418	size_t iterations_{1};
				419	};