Blame - src/runtime/CL/functions/CLConvolutionLayer.cpp - platform/external/ComputeLibrary

blob: 47889994a7c3251e9b0e81b330c2c8a1d3e2cf03 [file] [log] [blame]

Anthony Barbier	871448e	2017-03-24 14:54:29 +0000	[diff] [blame^]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
				25
				26	#include "arm_compute/core/PixelValue.h"
				27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
				29	#include "arm_compute/runtime/CL/CLScheduler.h"
				30
				31	#include <cmath>
				32	#include <tuple>
				33
				34	using namespace arm_compute;
				35
				36	CLConvolutionLayer::CLConvolutionLayer()
				37	: _input_im2col_kernel(), _weights_reshape_kernel(), _input_interleave_kernel(), _weights_transposed_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(),
				38	_input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _is_first_run(false), _has_bias(false), _is_fc(false)
				39	{
				40	}
				41
				42	void CLConvolutionLayer::configure(const ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info)
				43	{
				44	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
				45	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32);
				46	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
				47	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
				48	ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
				49	ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
				50
				51	if(biases != nullptr)
				52	{
				53	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F16, DataType::F32);
				54	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				55	ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
				56	ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
				57	}
				58
				59	_has_bias = (biases != nullptr);
				60	_is_first_run = true;
				61
				62	// Get parameters for conv_info
				63	unsigned int stride_x, stride_y, pad_x, pad_y = 0;
				64	std::tie(stride_x, stride_y) = conv_info.stride();
				65	std::tie(pad_x, pad_y) = conv_info.pad();
				66
				67	bool is_same_dimension = true;
				68	// Make sure the input and weights have same low three dimensions
				69	for(int i = 0; i < 3; i++)
				70	{
				71	is_same_dimension = (is_same_dimension) && (input->info()->dimension(i) == weights->info()->dimension(i));
				72	}
				73
				74	// Run the fully connected path if is_same_dimension is true and conv_stride_x/conv_stride_y are 1, and conv_pad_x/conv_pad_y are 0 and skip col2im
				75	_is_fc = (is_same_dimension) && ((stride_x & stride_y) == 1) && ((pad_x \| pad_y) == 0);
				76
				77	// Get convolved dimensions
				78	unsigned int conv_w = 0;
				79	unsigned int conv_h = 0;
				80
				81	std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0),
				82	stride_x, stride_y, pad_x, pad_y, conv_info.round());
				83
				84	// Create tensor to store the reshaped weights
				85	const size_t mat_weights_cols = weights->info()->dimension(3);
				86	const size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0);
				87	const TensorShape shape_wr(mat_weights_cols, mat_weights_rows);
				88	_weights_reshaped.allocator()->init(TensorInfo(shape_wr, 1, weights->info()->data_type()));
				89
				90	// Create tensor to store transposed weights
				91	TensorShape shape_wt(mat_weights_rows * 4, static_cast<size_t>(std::ceil(mat_weights_cols / 4.f)));
				92	TensorInfo info_wt(shape_wt, 1, weights->info()->data_type());
				93	_weights_transposed.allocator()->init(info_wt);
				94
				95	// Create tensor to store im2col reshaped inputs
				96	const size_t mat_input_cols = mat_weights_rows;
				97	const size_t mat_input_rows = _is_fc ? (input->info()->dimension(3)) : (conv_w * conv_h);
				98	TensorShape shape_im2col = input->info()->tensor_shape();
				99	shape_im2col.set(0, mat_input_cols);
				100	shape_im2col.set(1, mat_input_rows);
				101	shape_im2col.set(2, 1);
				102	if(_is_fc)
				103	{
				104	shape_im2col.set(3, 1);
				105	}
				106	_input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type()));
				107
				108	// Create tensor to prepare input tensor for GEMM
				109	TensorShape shape_interleaved = shape_im2col;
				110	shape_interleaved.set(0, shape_interleaved.x() * 4);
				111	shape_interleaved.set(1, std::ceil(static_cast<float>(shape_interleaved.y()) / 4));
				112	_input_interleaved_reshaped.allocator()->init(TensorInfo(shape_interleaved, 1, input->info()->data_type()));
				113
				114	// Create GEMM output tensor
				115	TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape();
				116	shape_gemm.set(0, mat_weights_cols);
				117	shape_gemm.set(1, mat_input_rows);
				118	_gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
				119
				120	// Configure kernels
				121	_input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
				122	_input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped);
				123	_weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
				124	_weights_transposed_kernel.configure(&_weights_reshaped, &_weights_transposed);
				125	if(_is_fc)
				126	{
				127	_mm_kernel.configure(&_input_interleaved_reshaped, &_weights_transposed, output, 1.0f);
				128	}
				129	else
				130	{
				131	_mm_kernel.configure(&_input_interleaved_reshaped, &_weights_transposed, &_gemm_output, 1.0f);
				132	_output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
				133	}
				134
				135	// Allocate intermediate tensors
				136	_weights_reshaped.allocator()->allocate();
				137	_weights_transposed.allocator()->allocate();
				138	_input_im2col_reshaped.allocator()->allocate();
				139	_input_interleaved_reshaped.allocator()->allocate();
				140	_gemm_output.allocator()->allocate();
				141	}
				142
				143	void CLConvolutionLayer::run()
				144	{
				145	// Run weights reshaping (Runs once for every configure)
				146	if(_is_first_run)
				147	{
				148	_is_first_run = false;
				149	CLScheduler::get().enqueue(_weights_reshape_kernel);
				150	CLScheduler::get().enqueue(_weights_transposed_kernel);
				151	}
				152
				153	// Run input reshaping
				154	CLScheduler::get().enqueue(_input_im2col_kernel);
				155	CLScheduler::get().enqueue(_input_interleave_kernel);
				156
				157	// Runs matrix multiply on reshaped matrices
				158	CLScheduler::get().enqueue(_mm_kernel);
				159
				160	// Reshape output matrix
				161
				162	if(!_is_fc)
				163	{
				164	CLScheduler::get().enqueue(_output_col2im_kernel, false);
				165	}
				166	}