Blame - src/runtime/NEON/functions/NEConvolution.cpp - platform/external/ComputeLibrary

blob: 582c9a4375fc6e563f7da74fcb3f2b86b1f9b7ad [file] [log] [blame]

Anthony Barbier	871448e	2017-03-24 14:54:29 +0000	[diff] [blame^]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
				25
				26	#include "arm_compute/core/Error.h"
				27	#include "arm_compute/core/Helpers.h"
				28	#include "arm_compute/core/ITensor.h"
				29	#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
				30	#include "arm_compute/core/PixelValue.h"
				31	#include "arm_compute/core/TensorInfo.h"
				32	#include "arm_compute/core/Utils.h"
				33	#include "arm_compute/core/Validate.h"
				34	#include "arm_compute/runtime/NEON/NEScheduler.h"
				35	#include "arm_compute/runtime/TensorAllocator.h"
				36
				37	#include <array>
				38	#include <utility>
				39
				40	using namespace arm_compute;
				41
				42	void NEConvolution3x3::configure(ITensor input, ITensor output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
				43	{
				44	auto k = arm_compute::cpp14::make_unique<NEConvolution3x3Kernel>();
				45	k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
				46	_kernel = std::move(k);
				47	_border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
				48	}
				49
				50	NEConvolution5x5::NEConvolution5x5()
				51	: _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
				52	{
				53	}
				54
				55	void NEConvolution5x5::configure(ITensor input, ITensor output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
				56	{
				57	ARM_COMPUTE_ERROR_ON(conv == nullptr);
				58	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
				59	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
				60
				61	std::array<int16_t, 5> conv_col{ { 0 } };
				62	std::array<int16_t, 5> conv_row{ { 0 } };
				63
				64	_is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), 5);
				65
				66	if(_is_separable)
				67	{
				68	DataType intermediate_type = DataType::UNKNOWN;
				69	std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), 5);
				70
				71	_tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
				72
				73	if(scale == 0)
				74	{
				75	scale = calculate_matrix_scale(conv, 5);
				76	}
				77
				78	_kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
				79	_kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
				80
				81	_tmp.allocator()->allocate();
				82
				83	_border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
				84	}
				85	else
				86	{
				87	_kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
				88	_border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
				89	}
				90	}
				91
				92	void NEConvolution5x5::run()
				93	{
				94	_border_handler.run(_border_handler.window());
				95
				96	if(_is_separable)
				97	{
				98	NEScheduler::get().multithread(&_kernel_hor);
				99	NEScheduler::get().multithread(&_kernel_vert);
				100	}
				101	else
				102	{
				103	NEScheduler::get().multithread(&_kernel);
				104	}
				105	}
				106
				107	NEConvolution7x7::NEConvolution7x7()
				108	: _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
				109	{
				110	}
				111
				112	void NEConvolution7x7::configure(ITensor input, ITensor output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
				113	{
				114	ARM_COMPUTE_ERROR_ON(conv == nullptr);
				115	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
				116	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
				117
				118	std::array<int16_t, 7> conv_col{ { 0 } };
				119	std::array<int16_t, 7> conv_row{ { 0 } };
				120
				121	_is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), 7);
				122
				123	if(_is_separable)
				124	{
				125	DataType intermediate_type = DataType::UNKNOWN;
				126	std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), 7);
				127
				128	_tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
				129
				130	if(scale == 0)
				131	{
				132	scale = calculate_matrix_scale(conv, 7);
				133	}
				134
				135	_kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
				136	_kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
				137
				138	_tmp.allocator()->allocate();
				139
				140	_border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
				141	}
				142	else
				143	{
				144	_kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
				145	_border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
				146	}
				147	}
				148
				149	void NEConvolution7x7::run()
				150	{
				151	_border_handler.run(_border_handler.window());
				152
				153	if(_is_separable)
				154	{
				155	NEScheduler::get().multithread(&_kernel_hor);
				156	NEScheduler::get().multithread(&_kernel_vert);
				157	}
				158	else
				159	{
				160	NEScheduler::get().multithread(&_kernel);
				161	}
				162	}
				163
				164	NEConvolution9x9::NEConvolution9x9()
				165	: _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
				166	{
				167	}
				168
				169	void NEConvolution9x9::configure(ITensor input, ITensor output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
				170	{
				171	ARM_COMPUTE_ERROR_ON(conv == nullptr);
				172	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
				173	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
				174
				175	std::array<int16_t, 9> conv_col{ { 0 } };
				176	std::array<int16_t, 9> conv_row{ { 0 } };
				177
				178	_is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), 9);
				179
				180	if(_is_separable)
				181	{
				182	DataType intermediate_type = DataType::UNKNOWN;
				183	std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), 9);
				184
				185	_tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
				186
				187	if(scale == 0)
				188	{
				189	scale = calculate_matrix_scale(conv, 9);
				190	}
				191
				192	_kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
				193	_kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
				194
				195	_tmp.allocator()->allocate();
				196
				197	_border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
				198	}
				199	else
				200	{
				201	_kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
				202	_border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
				203	}
				204	}
				205
				206	void NEConvolution9x9::run()
				207	{
				208	_border_handler.run(_border_handler.window());
				209
				210	if(_is_separable)
				211	{
				212	NEScheduler::get().multithread(&_kernel_hor);
				213	NEScheduler::get().multithread(&_kernel_vert);
				214	}
				215	else
				216	{
				217	NEScheduler::get().multithread(&_kernel);
				218	}
				219	}
				220
				221	void NEConvolutionRectangle::configure(ITensor input, ITensor output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
				222	{
				223	auto k = arm_compute::cpp14::make_unique<NEConvolutionRectangleKernel>();
				224	k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
				225	_kernel = std::move(k);
				226	_border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
				227	}