Blame - src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp - platform/external/ComputeLibrary

blob: 24b12f4969f04d2d8af8ea1979726d6ddf6e43d4 [file] [log] [blame]

Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	1	/*
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
				25
				26	#include "arm_compute/core/Helpers.h"
				27	#include "arm_compute/core/ITensor.h"
				28	#include "arm_compute/core/PixelValue.h"
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	29	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	30	#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	31	#include "arm_compute/runtime/NEON/NEScheduler.h"
				32	#include "support/ToolchainSupport.h"
				33
				34	using namespace arm_compute;
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	35	using namespace arm_compute::misc;
				36	using namespace arm_compute::misc::shape_calculator;
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	37
				38	NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	39	: _dwc_kernel(), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(),
				40	_has_bias(false), _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false), _is_nchw(true), _is_first_run(true), _permute(false)
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	41	{
				42	}
				43
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	44	void NEDepthwiseConvolutionLayer3x3::configure(ITensor input, const ITensor weights, const ITensor biases, ITensor output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	45	{
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	46	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
				47	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	48
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	49	PixelValue zero_value(0.f);
				50
				51	_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
				52	_has_bias = biases != nullptr;
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	53	_is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->info()->tensor_shape(),
				54	conv_info,
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	55	input->info()->data_type(),
				56	depth_multiplier,
				57	input->info()->data_layout());
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	58	_are_weights_reshaped = false;
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	59	_is_nchw = input->info()->data_layout() == DataLayout::NCHW;
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	60	_permute = _is_optimized == _is_nchw;
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	61
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	62	if(_is_optimized)
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	63	{
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	64	if(_is_nchw)
				65	{
				66	// Configure the function to transform the input tensor from NCHW -> NHWC
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	67	_permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
				68	_permuted_input.info()->set_data_layout(DataLayout::NHWC);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	69
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	70	// Configure the function to transform the weights tensor from IHW -> HWI
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	71	_permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
				72	_permuted_weights.info()->set_data_layout(DataLayout::NHWC);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	73
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	74	// Configure optimized depthwise
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	75	_dwc_kernel.configure(&_permuted_input, &_permuted_weights, &_permuted_output, conv_info, depth_multiplier, DataLayout::NHWC);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	76
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	77	// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	78	_permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
				79	_permuted_output.info()->set_data_layout(DataLayout::NCHW);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	80
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	81	// Allocate tensors
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	82	_permuted_input.allocator()->allocate();
				83	_permuted_weights.allocator()->allocate();
				84	_permuted_output.allocator()->allocate();
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	85	}
				86	else
				87	{
				88	_dwc_kernel.configure(input, weights, output, conv_info, depth_multiplier, DataLayout::NHWC);
				89	}
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	90	}
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	91	else
				92	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	93	// Allocate the intermediate accumulator tensor in case of quantized input
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	94	if(_is_quantized)
				95	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	96	TensorShape accum_shape = output->info()->tensor_shape();
				97
				98	if(!_is_nchw)
				99	{
				100	permute(accum_shape, PermutationVector(1U, 2U, 0U));
				101	}
				102
				103	_accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32));
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	104	_accumulator.info()->set_quantization_info(input->info()->quantization_info());
				105	zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));
				106	}
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	107
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	108	if(!_is_nchw)
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	109	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	110	// Configure the function to transform the input tensor from NHWC -> NCHW
				111	_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
				112	_permuted_input.info()->set_data_layout(DataLayout::NCHW);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	113
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	114	// Configure the function to transform the weights tensor from HWI -> IHW
				115	_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
				116	_permuted_weights.info()->set_data_layout(DataLayout::NCHW);
				117
				118	// Configure optimized depthwise
				119	_dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier);
				120
				121	// Configure border handler
				122	_border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
				123
				124	// Allocate tensors
				125	_permuted_input.allocator()->allocate();
				126	_permuted_weights.allocator()->allocate();
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	127	}
				128	else
				129	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	130	// Configure depthwise convolution kernel
				131	_dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier);
				132
				133	// Configure border handler
				134	_border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	135	}
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	136	}
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	137
				138	// Configure biases accumulation
				139	if(_is_quantized)
				140	{
				141	const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
				142
				143	float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
				144	int output_multiplier, output_shift;
				145	quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
				146	_output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, output_quant_info.offset);
				147	_accumulator.allocator()->allocate();
				148	}
				149	else if(_has_bias)
				150	{
				151	_output_stage_kernel.configure((_is_nchw \|\| _is_optimized) ? output : &_permuted_output, biases);
				152	}
				153
				154	if(!_is_optimized && !_is_nchw)
				155	{
				156	// Configure the function to transform the convoluted output to NHWC
				157	_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
				158	_permuted_output.allocator()->allocate();
				159	}
				160	}
				161
				162	Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
				163	unsigned int depth_multiplier)
				164	{
				165	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
				166	ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW && input->data_layout() != DataLayout::NHWC);
				167
				168	if(biases != nullptr)
				169	{
				170	ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
				171	ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
				172	}
				173
				174	return NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, output, conv_info, depth_multiplier);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	175	}
				176
				177	void NEDepthwiseConvolutionLayer3x3::run()
				178	{
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	179	if(_is_first_run && _is_optimized)
				180	{
				181	_is_first_run = false;
				182	// Create convolver (deferred)
				183	_dwc_kernel.generate_convolver();
				184	}
				185
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	186	// Permute weights
				187	if(_permute)
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	188	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	189	if(!_are_weights_reshaped)
				190	{
				191	_are_weights_reshaped = true;
				192	_permute_weights.run();
				193	}
				194
				195	_permute_input.run();
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	196	}
				197
				198	// Handle input
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	199	if(!_is_optimized)
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	200	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	201	// Fill border
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	202	NEScheduler::get().schedule(&_border_handler, Window::DimX);
				203	}
				204
				205	// Execute depthwise convolution
				206	NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
				207
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	208	// Permute output
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	209	if(_is_optimized && _is_nchw)
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	210	{
				211	_permute_output.run();
				212	}
				213
				214	// Add biases
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	215	if(_has_bias \|\| _is_quantized)
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	216	{
Anthony Barbier	f45d5a9	2018-01-24 16:23:15 +0000	[diff] [blame]	217	NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	218	}
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	219
				220	// Permute output
				221	if(!_is_optimized && !_is_nchw)
				222	{
				223	_permute_output.run();
				224	}
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	225	}
				226
				227	NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	228	: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _permute_input(),
				229	_permute_weights(), _permute_output(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false),
				230	_is_quantized(false), _is_nhwc(false), _original_weights(nullptr)
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	231	{
				232	}
				233
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	234	void NEDepthwiseConvolutionLayer::configure(ITensor input, const ITensor weights, const ITensor biases, ITensor output, const PadStrideInfo &conv_info, unsigned int depth_multiplier)
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	235	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	236	const unsigned int channel_idx = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
				237	ARM_COMPUTE_UNUSED(channel_idx);
				238
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	239	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	240	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	241	ARM_COMPUTE_ERROR_ON((input->info()->dimension(channel_idx) * depth_multiplier) != weights->info()->dimension(channel_idx));
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	242
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	243	_is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
				244
				245	ITensor *input_to_use = input;
				246	const ITensor *weights_to_use = weights;
				247	ITensor *output_to_use = output;
				248
				249	if(_is_nhwc)
				250	{
				251	_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
				252	_permuted_input.info()->set_data_layout(DataLayout::NCHW);
				253	input_to_use = &_permuted_input;
				254
				255	_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
				256	_permuted_weights.info()->set_data_layout(DataLayout::NCHW);
				257	weights_to_use = &_permuted_weights;
				258	}
				259
				260	const size_t weights_w = weights_to_use->info()->dimension(0);
				261	const size_t weights_h = weights_to_use->info()->dimension(1);
				262	const size_t weights_z = weights_to_use->info()->dimension(2);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	263
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	264	_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	265	_is_prepared = false;
				266	_original_weights = weights_to_use;
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	267
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	268	// Should bias be appended ?
				269	bool append_bias = (biases != nullptr) && !_is_quantized;
				270
				271	// Calculate output shape
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	272	TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(input->info(), weights->info(), conv_info, depth_multiplier);
				273
				274	// Output auto inizialitation if not yet initialized
				275	auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
				276	ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	277
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	278	if(_is_nhwc)
				279	{
				280	permute(output_shape, PermutationVector(1U, 2U, 0U));
				281	_permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
				282	_permuted_output.info()->set_data_layout(DataLayout::NCHW);
				283	output_to_use = &_permuted_output;
				284	}
				285
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	286	// Output width and height
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	287	const unsigned int conv_w = output_shape.x();
				288	const unsigned int conv_h = output_shape.y();
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	289
				290	// Set up intermediate tensors
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	291	const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	292	const size_t conv_size = conv_w * conv_h;
				293
				294	// Im2Col configuration
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	295	TensorShape shape_im2col = input_to_use->info()->tensor_shape();
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	296	shape_im2col.set(0, patch_size);
				297	shape_im2col.set(1, conv_size);
				298	shape_im2col.set(2, weights_z);
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	299	_input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));
				300	_im2col_kernel.configure(input_to_use, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	301
				302	// Weights reshape configuration
				303	const TensorShape shape_weights_reshape(patch_size, weights_z);
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	304	_weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));
				305	_weights_reshape_kernel.configure(weights_to_use, &_weights_reshaped, append_bias ? biases : nullptr);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	306
				307	// GEMV configuration
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	308	DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type();
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	309	TensorShape shape_v2mm_out = input_to_use->info()->tensor_shape();
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	310	shape_v2mm_out.set(0, conv_size * weights_z);
				311	shape_v2mm_out.set(1, 1);
				312	shape_v2mm_out.set(2, 1);
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	313	_v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	314	_v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	315	_output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	316	_vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output_to_use, conv_w, conv_h);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	317
				318	// Output staged configuration
				319	if(_is_quantized)
				320	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	321	const QuantizationInfo output_quant_info = output->info()->quantization_info();
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	322
				323	float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
				324	int output_multiplier, output_shift;
				325	quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	326	_output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, output_quant_info.offset);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	327	_output_reshaped.allocator()->allocate();
				328	}
				329
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	330	if(_is_nhwc)
				331	{
				332	_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
				333
				334	_permuted_input.allocator()->allocate();
				335	_permuted_weights.allocator()->allocate();
				336	_permuted_output.allocator()->allocate();
				337	}
				338
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	339	// Fill borders on inputs
				340	PixelValue zero_in(static_cast<int32_t>(0));
				341	PixelValue zero_w(static_cast<int32_t>(0));
				342	if(_is_quantized)
				343	{
				344	zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset));
				345	zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset));
				346	}
				347	BorderSize border_size = _v2mm_kernel.border_size();
				348	_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
				349
				350	border_size.bottom = 0;
				351	_v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	352
				353	// Allocate intermediate tensors
				354	_input_reshaped.allocator()->allocate();
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	355	_v2mm_output.allocator()->allocate();
				356	}
				357
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	358	Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
				359	unsigned int depth_multiplier)
				360	{
				361	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
				362	ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW && input->data_layout() != DataLayout::NHWC);
				363
				364	// Clone output to use auto init
				365	auto output_clone = output->clone();
				366
				367	const ITensorInfo *input_to_use = input;
				368	const ITensorInfo *weights_to_use = weights;
				369	const ITensorInfo *output_to_use = output_clone.get();
				370
				371	TensorShape permuted_input_shape = input->tensor_shape();
				372	TensorShape permuted_weights_shape = weights->tensor_shape();
				373	TensorInfo permuted_input;
				374	TensorInfo permuted_weights;
				375
				376	if(input->data_layout() == DataLayout::NHWC)
				377	{
				378	permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));
				379	permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));
				380
				381	permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW));
				382	permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW));
				383
				384	input_to_use = &permuted_input;
				385	weights_to_use = &permuted_weights;
				386	}
				387
				388	const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
				389	const bool append_bias = (biases != nullptr) && !is_quantized;
				390	TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(input, weights, conv_info, depth_multiplier);
				391	const size_t weights_w = weights_to_use->dimension(0);
				392	const size_t weights_h = weights_to_use->dimension(1);
				393	const size_t weights_z = weights_to_use->dimension(2);
				394	const unsigned int conv_w = output_shape.x();
				395	const unsigned int conv_h = output_shape.y();
				396	const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
				397	const size_t conv_size = conv_w * conv_h;
				398
				399	// Output auto inizialitation if not yet initialized
				400	auto_init_if_empty(*output_clone, input->clone()->set_tensor_shape(output_shape));
				401	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
				402
				403	TensorInfo permuted_output;
				404	if(input->data_layout() == DataLayout::NHWC)
				405	{
				406	permute(output_shape, PermutationVector(1U, 2U, 0U));
				407	permuted_output = TensorInfo(output_clone->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_layout(DataLayout::NCHW));
				408	output_to_use = &permuted_output;
				409	}
				410
				411	// Im2Col configuration
				412	TensorShape shape_im2col = input_to_use->tensor_shape();
				413	shape_im2col.set(0, patch_size);
				414	shape_im2col.set(1, conv_size);
				415	shape_im2col.set(2, weights_z);
				416	TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));
				417	ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input_to_use, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier));
				418
				419	// Weights reshape configuration
				420	const TensorShape shape_weights_reshape(patch_size, weights_z);
				421	TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));
				422	ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights_to_use, &weights_reshaped, append_bias ? biases : nullptr));
				423
				424	// GEMV configuration
				425	DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
				426	TensorShape shape_v2mm_out = input_to_use->tensor_shape();
				427	shape_v2mm_out.set(0, conv_size * weights_z);
				428	shape_v2mm_out.set(1, 1);
				429	shape_v2mm_out.set(2, 1);
				430	TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));
				431	ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));
				432
				433	TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_to_use->tensor_shape()));
				434	ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output_to_use, conv_w, conv_h));
				435
				436	if(is_quantized)
				437	{
				438	ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use));
				439	}
				440
				441	return Status{};
				442	}
				443
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	444	void NEDepthwiseConvolutionLayer::run()
				445	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	446	prepare();
				447
				448	if(_is_nhwc)
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	449	{
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	450	_permute_input.run();
Jenkins	b3a371b	2018-05-23 11:36:53 +0100	[diff] [blame]	451	}
				452
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	453	NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	454	NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);
Anthony Barbier	8140e1e	2017-12-14 23:48:46 +0000	[diff] [blame]	455	NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);
				456	NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	457	if(_is_quantized)
				458	{
				459	NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
				460	}
Jenkins	52ba29e	2018-08-29 15:32:11 +0000	[diff] [blame^]	461
				462	if(_is_nhwc)
				463	{
				464	_permute_output.run();
				465	}
				466	}
				467
				468	void NEDepthwiseConvolutionLayer::prepare()
				469	{
				470	if(!_is_prepared)
				471	{
				472	ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
				473
				474	if(_is_nhwc)
				475	{
				476	_permute_weights.run();
				477	}
				478
				479	// Run reshape and mark original weights as unused
				480	_weights_reshaped.allocator()->allocate();
				481	NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
				482	NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);
				483	_original_weights->mark_as_unused();
				484
				485	_is_prepared = true;
				486	}
Anthony Barbier	06ea048	2018-02-22 15:45:35 +0000	[diff] [blame]	487	}