Blame - test/ibilinear-microkernel-tester.h - platform/external/XNNPACK

blob: 5857285c89c9c55e3f929b716049b64c08601dae [file] [log] [blame]

Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	1	// Copyright 2019 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#pragma once
				7
				8	#include <gtest/gtest.h>
				9
				10	#include <algorithm>
				11	#include <cassert>
				12	#include <cmath>
				13	#include <cstddef>
Marat Dukhan	9fab3f9	2019-11-08 14:55:19 -0800	[diff] [blame]	14	#include <cstdint>
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	15	#include <functional>
				16	#include <random>
				17	#include <vector>
				18
				19	#include <xnnpack.h>
				20	#include <xnnpack/AlignedAllocator.h>
Marat Dukhan	cdb42a5	2021-11-22 20:09:32 -0800	[diff] [blame]	21	#include <xnnpack/math.h>
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	22	#include <xnnpack/params.h>
				23
				24
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	25	class IBilinearMicrokernelTester {
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	26	public:
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	27	inline IBilinearMicrokernelTester& pixels(uint32_t pixels) {
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	28	assert(pixels >= 1);
				29	this->pixels_ = pixels;
				30	return *this;
				31	}
				32
				33	inline uint32_t pixels() const {
				34	return this->pixels_;
				35	}
				36
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	37	inline IBilinearMicrokernelTester& channels(uint32_t channels) {
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	38	assert(channels >= 1);
				39	this->channels_ = channels;
				40	return *this;
				41	}
				42
				43	inline uint32_t channels() const {
				44	return this->channels_;
				45	}
				46
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	47	inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) {
Marat Dukhan	9fab3f9	2019-11-08 14:55:19 -0800	[diff] [blame]	48	this->input_offset_ = input_offset;
				49	return *this;
				50	}
				51
				52	inline uint32_t input_offset() const {
				53	return this->input_offset_;
				54	}
				55
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	56	inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) {
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	57	assert(output_stride != 0);
				58	this->output_stride_ = output_stride;
				59	return *this;
				60	}
				61
				62	inline uint32_t output_stride() const {
				63	if (this->output_stride_ == 0) {
				64	return channels();
				65	} else {
				66	assert(this->output_stride_ >= channels());
				67	return this->output_stride_;
				68	}
				69	}
				70
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	71	inline IBilinearMicrokernelTester& iterations(size_t iterations) {
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	72	this->iterations_ = iterations;
				73	return *this;
				74	}
				75
				76	inline size_t iterations() const {
				77	return this->iterations_;
				78	}
				79
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	80	inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) {
				81	assert(input_stride != 0);
				82	this->input_stride_ = input_stride;
				83	return *this;
				84	}
				85
				86	inline uint32_t input_stride() const {
				87	if (this->input_stride_ == 0) {
				88	return 4 * pixels();
				89	} else {
				90	assert(this->input_stride_ >= 4 * pixels());
				91	return this->input_stride_;
				92	}
				93	}
				94
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	95	void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const {
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	96	std::random_device random_device;
				97	auto rng = std::mt19937(random_device());
				98	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
				99
				100	std::vector<const float> indirection(pixels() 4);
				101	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());
Marat Dukhan	9594db0	2019-12-05 14:32:37 -0800	[diff] [blame]	102	std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	103	std::vector<float> output((pixels() - 1) * output_stride() + channels());
				104	std::vector<float> output_ref(pixels() * channels());
				105
				106	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				107	std::generate(input.begin(), input.end(), std::ref(f32rng));
				108	std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
				109	std::fill(output.begin(), output.end(), nanf(""));
				110
				111	for (size_t i = 0; i < indirection.size(); i++) {
Marat Dukhan	9fab3f9	2019-11-08 14:55:19 -0800	[diff] [blame]	112	indirection[i] = input.data() + i * channels() - input_offset();
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	113	}
				114	std::shuffle(indirection.begin(), indirection.end(), rng);
				115
				116	// Compute reference results.
				117	for (size_t i = 0; i < pixels(); i++) {
				118	for (size_t c = 0; c < channels(); c++) {
				119	const float alpha_h = packed_weights[i * 2 + 0];
				120	const float alpha_v = packed_weights[i * 2 + 1];
				121	output_ref[i * channels() + c] =
Marat Dukhan	9fab3f9	2019-11-08 14:55:19 -0800	[diff] [blame]	122	indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
				123	indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) +
				124	indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v +
				125	indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v;
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	126	}
				127	}
				128
				129	// Call optimized micro-kernel.
Marat Dukhan	660fd19	2020-03-10 04:55:30 -0700	[diff] [blame]	130	ibilinear(
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	131	pixels(), channels() * sizeof(float),
Marat Dukhan	9fab3f9	2019-11-08 14:55:19 -0800	[diff] [blame]	132	indirection.data(), input_offset() * sizeof(float),
				133	packed_weights.data(), output.data(),
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	134	(output_stride() - channels()) * sizeof(float));
				135
				136	// Verify results.
				137	for (size_t i = 0; i < pixels(); i++) {
				138	for (size_t c = 0; c < channels(); c++) {
				139	ASSERT_NEAR(
				140	output_ref[i * channels() + c],
				141	output[i * output_stride() + c],
Marat Dukhan	0183625	2020-04-13 19:17:43 -0700	[diff] [blame]	142	std::abs(output_ref[i * channels() + c]) * 1.0e-4)
Marat Dukhan	cdb42a5	2021-11-22 20:09:32 -0800	[diff] [blame]	143	<< "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
				144	}
				145	}
				146	}
				147	}
				148
				149	void Test(xnn_s8_ibilinear_ukernel_function ibilinear) const {
				150	std::random_device random_device;
				151	auto rng = std::mt19937(random_device());
				152	auto i8rng = std::bind(
				153	std::uniform_int_distribution<int16_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
				154	std::ref(rng));
				155	auto w11rng = std::bind(std::uniform_int_distribution<int16_t>(0, 2047), std::ref(rng));
				156
				157	std::vector<const int8_t> indirection(pixels() 4);
				158	std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + indirection.size() * channels());
				159	std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
				160	std::vector<int8_t> output((pixels() - 1) * output_stride() + channels());
				161	std::vector<int8_t> output_ref(pixels() * channels());
				162
				163	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				164	std::generate(input.begin(), input.end(), std::ref(i8rng));
				165	std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng));
				166	std::fill(output.begin(), output.end(), INT8_C(0xFA));
				167
				168	for (size_t i = 0; i < indirection.size(); i++) {
				169	indirection[i] = input.data() + i * channels() - input_offset();
				170	}
				171	std::shuffle(indirection.begin(), indirection.end(), rng);
				172
				173	// Compute reference results.
				174	for (size_t i = 0; i < pixels(); i++) {
				175	for (size_t c = 0; c < channels(); c++) {
				176	const int32_t alpha_h = packed_weights[i * 2 + 0];
				177	const int32_t alpha_v = packed_weights[i * 2 + 1];
				178	const int32_t acc = asr_s32(
				179	int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
				180	int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
				181	int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
				182	int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v +
				183	2097152, 22);
				184	ASSERT_GE(acc, std::numeric_limits<int8_t>::min());
				185	ASSERT_LE(acc, std::numeric_limits<int8_t>::max());
				186	output_ref[i * channels() + c] = (int8_t) acc;
				187	}
				188	}
				189
				190	// Call optimized micro-kernel.
				191	ibilinear(
				192	pixels(), channels() * sizeof(int8_t),
				193	indirection.data(), input_offset() * sizeof(int8_t),
				194	packed_weights.data(), output.data(),
				195	(output_stride() - channels()) * sizeof(int8_t));
				196
				197	// Verify results.
				198	for (size_t i = 0; i < pixels(); i++) {
				199	for (size_t c = 0; c < channels(); c++) {
				200	ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c]))
				201	<< "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
				202	}
				203	}
				204	}
				205	}
				206
				207	void Test(xnn_u8_ibilinear_ukernel_function ibilinear) const {
				208	std::random_device random_device;
				209	auto rng = std::mt19937(random_device());
				210	auto u8rng = std::bind(
				211	std::uniform_int_distribution<uint16_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
				212	auto w11rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 2047), std::ref(rng));
				213
				214	std::vector<const uint8_t> indirection(pixels() 4);
				215	std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels());
				216	std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
				217	std::vector<uint8_t> output((pixels() - 1) * output_stride() + channels());
				218	std::vector<uint8_t> output_ref(pixels() * channels());
				219
				220	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				221	std::generate(input.begin(), input.end(), std::ref(u8rng));
				222	std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng));
				223	std::fill(output.begin(), output.end(), UINT8_C(0xFA));
				224
				225	for (size_t i = 0; i < indirection.size(); i++) {
				226	indirection[i] = input.data() + i * channels() - input_offset();
				227	}
				228	std::shuffle(indirection.begin(), indirection.end(), rng);
				229
				230	// Compute reference results.
				231	for (size_t i = 0; i < pixels(); i++) {
				232	for (size_t c = 0; c < channels(); c++) {
				233	const uint32_t alpha_h = uint32_t(int32_t(packed_weights[i * 2 + 0]));
				234	const uint32_t alpha_v = uint32_t(int32_t(packed_weights[i * 2 + 1]));
				235	const uint32_t acc = (2097152 +
				236	int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
				237	int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
				238	int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
				239	int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v) >> 22;
				240	ASSERT_LE(acc, std::numeric_limits<uint8_t>::max());
				241	output_ref[i * channels() + c] = (uint8_t) acc;
				242	}
				243	}
				244
				245	// Call optimized micro-kernel.
				246	ibilinear(
				247	pixels(), channels() * sizeof(uint8_t),
				248	indirection.data(), input_offset() * sizeof(uint8_t),
				249	packed_weights.data(), output.data(),
				250	(output_stride() - channels()) * sizeof(uint8_t));
				251
				252	// Verify results.
				253	for (size_t i = 0; i < pixels(); i++) {
				254	for (size_t c = 0; c < channels(); c++) {
				255	ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c]))
				256	<< "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	257	}
				258	}
				259	}
				260	}
				261
XNNPACK Team	cb2b667	2020-10-23 19:30:50 -0700	[diff] [blame]	262	void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const {
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	263	std::random_device random_device;
				264	auto rng = std::mt19937(random_device());
				265	auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
				266
XNNPACK Team	3155c47	2020-10-23 19:36:50 -0700	[diff] [blame]	267	std::vector<const float> indirection(pixels() 2);
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	268	std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels());
XNNPACK Team	cb2b667	2020-10-23 19:30:50 -0700	[diff] [blame]	269	std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	270	std::vector<float> output(pixels() * channels());
				271	std::vector<float> output_ref(pixels() * channels());
				272
				273	for (size_t iteration = 0; iteration < iterations(); iteration++) {
				274	std::generate(input.begin(), input.end(), std::ref(f32rng));
XNNPACK Team	cb2b667	2020-10-23 19:30:50 -0700	[diff] [blame]	275	std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	276	std::fill(output.begin(), output.end(), nanf(""));
				277
XNNPACK Team	3155c47	2020-10-23 19:36:50 -0700	[diff] [blame]	278	// Indirection will point to the even ("left") pixels of the input.
				279	// The kernels will expect "right" pixels to be placed right next to them.
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	280	for (size_t i = 0; i < indirection.size(); i++) {
XNNPACK Team	3155c47	2020-10-23 19:36:50 -0700	[diff] [blame]	281	const float* left_corner = input.data() + 2 * i - input_offset();
				282	indirection[i] = left_corner;
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	283	}
				284	std::shuffle(indirection.begin(), indirection.end(), rng);
				285
				286	// Compute reference results.
				287	for (size_t i = 0; i < pixels(); i++) {
				288	for (size_t c = 0; c < channels(); c++) {
XNNPACK Team	cb2b667	2020-10-23 19:30:50 -0700	[diff] [blame]	289	const float alpha_h = packed_weights[i * 2 + 0];
				290	const float alpha_v = packed_weights[i * 2 + 1];
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	291	// `c * pixels() + i` because the output is NCHW.
				292	output_ref[c * pixels() + i] =
				293	// `c * indirection.size()` because the input is NCHW.
XNNPACK Team	3155c47	2020-10-23 19:36:50 -0700	[diff] [blame]	294	(indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
				295	(indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) +
				296	(indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v +
				297	(indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v;
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	298	}
				299	}
				300
				301	// Call optimized micro-kernel.
				302	ibilinear(
				303	pixels(), channels(),
				304	indirection.data(), input_offset() * sizeof(float),
XNNPACK Team	cb2b667	2020-10-23 19:30:50 -0700	[diff] [blame]	305	packed_weights.data(), output.data(), input_stride() * sizeof(float));
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	306
				307	// Verify results.
				308	for (size_t c = 0; c < channels(); c++) {
				309	for (size_t i = 0; i < pixels(); i++) {
				310	ASSERT_NEAR(
				311	output_ref[c * pixels() + i],
				312	output[c * pixels() + i],
				313	std::abs(output_ref[c * pixels() + i]) * 1.0e-4)
				314	<< "i = " << i << ", channel = " << c;
				315	}
				316	}
				317	}
				318	}
				319
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	320	private:
				321	uint32_t channels_{1};
				322	uint32_t pixels_{1};
				323	uint32_t output_stride_{0};
XNNPACK Team	6be46b2	2020-10-22 23:34:54 -0700	[diff] [blame]	324	uint32_t input_stride_{0};
Marat Dukhan	9fab3f9	2019-11-08 14:55:19 -0800	[diff] [blame]	325	uint32_t input_offset_{0};
Marat Dukhan	35dacfb	2019-11-07 19:18:16 -0800	[diff] [blame]	326	size_t iterations_{3};
				327	};