arm_compute v17.05
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp
index 582c9a4..c2b3d7a 100644
--- a/src/runtime/NEON/functions/NEConvolution.cpp
+++ b/src/runtime/NEON/functions/NEConvolution.cpp
@@ -47,32 +47,34 @@
_border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
-NEConvolution5x5::NEConvolution5x5()
+template <unsigned int matrix_size>
+NEConvolutionSquare<matrix_size>::NEConvolutionSquare()
: _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
{
}
-void NEConvolution5x5::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+template <unsigned int matrix_size>
+void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
{
ARM_COMPUTE_ERROR_ON(conv == nullptr);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- std::array<int16_t, 5> conv_col{ { 0 } };
- std::array<int16_t, 5> conv_row{ { 0 } };
+ std::array<int16_t, matrix_size> conv_col{ { 0 } };
+ std::array<int16_t, matrix_size> conv_row{ { 0 } };
- _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), 5);
+ _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
if(_is_separable)
{
DataType intermediate_type = DataType::UNKNOWN;
- std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), 5);
+ std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size);
_tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
if(scale == 0)
{
- scale = calculate_matrix_scale(conv, 5);
+ scale = calculate_matrix_scale(conv, matrix_size);
}
_kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
@@ -89,7 +91,8 @@
}
}
-void NEConvolution5x5::run()
+template <unsigned int matrix_size>
+void NEConvolutionSquare<matrix_size>::run()
{
_border_handler.run(_border_handler.window());
@@ -104,119 +107,9 @@
}
}
-NEConvolution7x7::NEConvolution7x7()
- : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
-{
-}
-
-void NEConvolution7x7::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(conv == nullptr);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
-
- std::array<int16_t, 7> conv_col{ { 0 } };
- std::array<int16_t, 7> conv_row{ { 0 } };
-
- _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), 7);
-
- if(_is_separable)
- {
- DataType intermediate_type = DataType::UNKNOWN;
- std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), 7);
-
- _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
-
- if(scale == 0)
- {
- scale = calculate_matrix_scale(conv, 7);
- }
-
- _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
-
- _tmp.allocator()->allocate();
-
- _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
- }
- else
- {
- _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
- }
-}
-
-void NEConvolution7x7::run()
-{
- _border_handler.run(_border_handler.window());
-
- if(_is_separable)
- {
- NEScheduler::get().multithread(&_kernel_hor);
- NEScheduler::get().multithread(&_kernel_vert);
- }
- else
- {
- NEScheduler::get().multithread(&_kernel);
- }
-}
-
-NEConvolution9x9::NEConvolution9x9()
- : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
-{
-}
-
-void NEConvolution9x9::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(conv == nullptr);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
-
- std::array<int16_t, 9> conv_col{ { 0 } };
- std::array<int16_t, 9> conv_row{ { 0 } };
-
- _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), 9);
-
- if(_is_separable)
- {
- DataType intermediate_type = DataType::UNKNOWN;
- std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), 9);
-
- _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
-
- if(scale == 0)
- {
- scale = calculate_matrix_scale(conv, 9);
- }
-
- _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
-
- _tmp.allocator()->allocate();
-
- _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
- }
- else
- {
- _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
- }
-}
-
-void NEConvolution9x9::run()
-{
- _border_handler.run(_border_handler.window());
-
- if(_is_separable)
- {
- NEScheduler::get().multithread(&_kernel_hor);
- NEScheduler::get().multithread(&_kernel_vert);
- }
- else
- {
- NEScheduler::get().multithread(&_kernel);
- }
-}
+template class arm_compute::NEConvolutionSquare<5>;
+template class arm_compute::NEConvolutionSquare<7>;
+template class arm_compute::NEConvolutionSquare<9>;
void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
{