Blame - src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp - platform/external/ComputeLibrary

blob: d4be939c52aef8f734599ddf6e25a4e11dabe5ad [file] [log] [blame]

Jenkins	b9abeae	2018-11-22 11:58:08 +0000	[diff] [blame]	1	/*
Jenkins	0e205f7	2019-11-28 16:53:35 +0000	[diff] [blame^]	2	* Copyright (c) 2018-2019 ARM Limited.
Jenkins	b9abeae	2018-11-22 11:58:08 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
				25
				26	#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
Jenkins	0e205f7	2019-11-28 16:53:35 +0000	[diff] [blame^]	27	#include "arm_compute/runtime/Scheduler.h"
Jenkins	b9abeae	2018-11-22 11:58:08 +0000	[diff] [blame]	28
Jenkins	0e205f7	2019-11-28 16:53:35 +0000	[diff] [blame^]	29	namespace arm_compute
				30	{
				31	namespace
				32	{
				33	void dequantize_tensor(const ITensor input, ITensor output)
				34	{
				35	const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
				36	const DataType data_type = input->info()->data_type();
				37
				38	Window window;
				39	window.use_tensor_dimensions(input->info()->tensor_shape());
				40	Iterator input_it(input, window);
				41	Iterator output_it(output, window);
				42
				43	switch(data_type)
				44	{
				45	case DataType::QASYMM8:
				46	execute_window_loop(window, [&](const Coordinates &)
				47	{
				48	reinterpret_cast<float >(output_it.ptr()) = dequantize(reinterpret_cast<const uint8_t >(input_it.ptr()), qinfo.scale, qinfo.offset);
				49	},
				50	input_it, output_it);
				51	break;
				52	case DataType::QASYMM16:
				53	execute_window_loop(window, [&](const Coordinates &)
				54	{
				55	reinterpret_cast<float >(output_it.ptr()) = dequantize(reinterpret_cast<const uint16_t >(input_it.ptr()), qinfo.scale, qinfo.offset);
				56	},
				57	input_it, output_it);
				58	break;
				59	default:
				60	ARM_COMPUTE_ERROR("Unsupported data type");
				61	}
				62	}
				63
				64	void quantize_tensor(const ITensor input, ITensor output)
				65	{
				66	const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
				67	const DataType data_type = output->info()->data_type();
				68
				69	Window window;
				70	window.use_tensor_dimensions(input->info()->tensor_shape());
				71	Iterator input_it(input, window);
				72	Iterator output_it(output, window);
				73
				74	switch(data_type)
				75	{
				76	case DataType::QASYMM8:
				77	execute_window_loop(window, [&](const Coordinates &)
				78	{
				79	reinterpret_cast<uint8_t >(output_it.ptr()) = quantize_qasymm8(reinterpret_cast<const float >(input_it.ptr()), qinfo);
				80	},
				81	input_it, output_it);
				82	break;
				83	case DataType::QASYMM16:
				84	execute_window_loop(window, [&](const Coordinates &)
				85	{
				86	reinterpret_cast<uint16_t >(output_it.ptr()) = quantize_qasymm16(reinterpret_cast<const float >(input_it.ptr()), qinfo);
				87	},
				88	input_it, output_it);
				89	break;
				90	default:
				91	ARM_COMPUTE_ERROR("Unsupported data type");
				92	}
				93	}
				94	} // namespace
				95
				96	CPPBoxWithNonMaximaSuppressionLimit::CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr<IMemoryManager> memory_manager)
				97	: _memory_group(std::move(memory_manager)),
				98	_box_with_nms_limit_kernel(),
				99	_scores_in(),
				100	_boxes_in(),
				101	_batch_splits_in(),
				102	_scores_out(),
				103	_boxes_out(),
				104	_classes(),
				105	_batch_splits_out(),
				106	_keeps(),
				107	_scores_in_f32(),
				108	_boxes_in_f32(),
				109	_batch_splits_in_f32(),
				110	_scores_out_f32(),
				111	_boxes_out_f32(),
				112	_classes_f32(),
				113	_batch_splits_out_f32(),
				114	_keeps_f32(),
				115	_is_qasymm8(false)
				116	{
				117	}
Jenkins	b9abeae	2018-11-22 11:58:08 +0000	[diff] [blame]	118
				119	void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor scores_in, const ITensor boxes_in, const ITensor batch_splits_in, ITensor scores_out, ITensor boxes_out, ITensor classes,
				120	ITensor batch_splits_out, ITensor keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
				121	{
Jenkins	0e205f7	2019-11-28 16:53:35 +0000	[diff] [blame^]	122	ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
				123
				124	_is_qasymm8 = scores_in->info()->data_type() == DataType::QASYMM8;
				125
				126	_scores_in = scores_in;
				127	_boxes_in = boxes_in;
				128	_batch_splits_in = batch_splits_in;
				129	_scores_out = scores_out;
				130	_boxes_out = boxes_out;
				131	_classes = classes;
				132	_batch_splits_out = batch_splits_out;
				133	_keeps = keeps;
				134
				135	if(_is_qasymm8)
				136	{
				137	// Manage intermediate buffers
				138	_memory_group.manage(&_scores_in_f32);
				139	_memory_group.manage(&_boxes_in_f32);
				140	_memory_group.manage(&_scores_out_f32);
				141	_memory_group.manage(&_boxes_out_f32);
				142	_memory_group.manage(&_classes_f32);
				143	_scores_in_f32.allocator()->init(scores_in->info()->clone()->set_data_type(DataType::F32));
				144	_boxes_in_f32.allocator()->init(boxes_in->info()->clone()->set_data_type(DataType::F32));
				145	if(batch_splits_in != nullptr)
				146	{
				147	_memory_group.manage(&_batch_splits_in_f32);
				148	_batch_splits_in_f32.allocator()->init(batch_splits_in->info()->clone()->set_data_type(DataType::F32));
				149	}
				150	_scores_out_f32.allocator()->init(scores_out->info()->clone()->set_data_type(DataType::F32));
				151	_boxes_out_f32.allocator()->init(boxes_out->info()->clone()->set_data_type(DataType::F32));
				152	_classes_f32.allocator()->init(classes->info()->clone()->set_data_type(DataType::F32));
				153	if(batch_splits_out != nullptr)
				154	{
				155	_memory_group.manage(&_batch_splits_out_f32);
				156	_batch_splits_out_f32.allocator()->init(batch_splits_out->info()->clone()->set_data_type(DataType::F32));
				157	}
				158	if(keeps != nullptr)
				159	{
				160	_memory_group.manage(&_keeps_f32);
				161	_keeps_f32.allocator()->init(keeps->info()->clone()->set_data_type(DataType::F32));
				162	}
				163
				164	_box_with_nms_limit_kernel.configure(&_scores_in_f32, &_boxes_in_f32, (batch_splits_in != nullptr) ? &_batch_splits_in_f32 : nullptr,
				165	&_scores_out_f32, &_boxes_out_f32, &_classes_f32,
				166	(batch_splits_out != nullptr) ? &_batch_splits_out_f32 : nullptr, (keeps != nullptr) ? &_keeps_f32 : nullptr,
				167	keeps_size, info);
				168	}
				169	else
				170	{
				171	_box_with_nms_limit_kernel.configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
				172	}
				173
				174	if(_is_qasymm8)
				175	{
				176	_scores_in_f32.allocator()->allocate();
				177	_boxes_in_f32.allocator()->allocate();
				178	if(_batch_splits_in != nullptr)
				179	{
				180	_batch_splits_in_f32.allocator()->allocate();
				181	}
				182	_scores_out_f32.allocator()->allocate();
				183	_boxes_out_f32.allocator()->allocate();
				184	_classes_f32.allocator()->allocate();
				185	if(batch_splits_out != nullptr)
				186	{
				187	_batch_splits_out_f32.allocator()->allocate();
				188	}
				189	if(keeps != nullptr)
				190	{
				191	_keeps_f32.allocator()->allocate();
				192	}
				193	}
				194	}
				195
				196	Status validate(const ITensorInfo scores_in, const ITensorInfo boxes_in, const ITensorInfo batch_splits_in, const ITensorInfo scores_out, const ITensorInfo boxes_out, const ITensorInfo classes,
				197	const ITensorInfo batch_splits_out, const ITensorInfo keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
				198	{
				199	ARM_COMPUTE_UNUSED(batch_splits_in, batch_splits_out, keeps, keeps_size, info);
				200	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
				201	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
				202
				203	const bool is_qasymm8 = scores_in->data_type() == DataType::QASYMM8;
				204	if(is_qasymm8)
				205	{
				206	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(boxes_in, 1, DataType::QASYMM16);
				207	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes_in, boxes_out);
				208	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(boxes_in, boxes_out);
				209	const UniformQuantizationInfo boxes_qinfo = boxes_in->quantization_info().uniform();
				210	ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f);
				211	ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0);
				212	}
				213
				214	return Status{};
				215	}
				216
				217	void CPPBoxWithNonMaximaSuppressionLimit::run()
				218	{
				219	// Acquire all the temporaries
				220	MemoryGroupResourceScope scope_mg(_memory_group);
				221
				222	if(_is_qasymm8)
				223	{
				224	dequantize_tensor(_scores_in, &_scores_in_f32);
				225	dequantize_tensor(_boxes_in, &_boxes_in_f32);
				226	if(_batch_splits_in != nullptr)
				227	{
				228	dequantize_tensor(_batch_splits_in, &_batch_splits_in_f32);
				229	}
				230	}
				231
				232	Scheduler::get().schedule(&_box_with_nms_limit_kernel, Window::DimY);
				233
				234	if(_is_qasymm8)
				235	{
				236	quantize_tensor(&_scores_out_f32, _scores_out);
				237	quantize_tensor(&_boxes_out_f32, _boxes_out);
				238	quantize_tensor(&_classes_f32, _classes);
				239	if(_batch_splits_out != nullptr)
				240	{
				241	quantize_tensor(&_batch_splits_out_f32, _batch_splits_out);
				242	}
				243	if(_keeps != nullptr)
				244	{
				245	quantize_tensor(&_keeps_f32, _keeps);
				246	}
				247	}
				248	}
				249	} // namespace arm_compute