arm_compute v17.10
Change-Id: If1489af40eccd0219ede8946577afbf04db31b29
diff --git a/src/runtime/CL/functions/CLDepthConcatenate.cpp b/src/runtime/CL/functions/CLDepthConcatenate.cpp
index f42627f..89e44ca 100644
--- a/src/runtime/CL/functions/CLDepthConcatenate.cpp
+++ b/src/runtime/CL/functions/CLDepthConcatenate.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
@@ -51,6 +52,11 @@
_concat_kernels_vector = arm_compute::support::cpp14::make_unique<CLDepthConcatenateKernel[]>(_num_inputs);
_border_handlers_vector = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(_num_inputs);
+ TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+
for(unsigned int i = 0; i < _num_inputs; i++)
{
_concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index 9be1df6..c27ff2f 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -162,7 +162,7 @@
_edge_trace.configure(&_nonmax, output);
// Fill border with "No edge" to stop recursion in edge trace
- _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, 0);
+ _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
// Allocate intermediate tensors
_nonmax.allocator()->allocate();
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 40862fc..f34f497 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -190,8 +190,9 @@
{
if(_are_weights_reshaped)
{
- mat_weights_cols = weights_info.num_kernels();
- mat_weights_rows = weights->info()->dimension(0) / 4 + (_has_bias ? 1 : 0);
+ const unsigned int transpose_width = 16 / input->info()->element_size();
+ mat_weights_cols = weights_info.num_kernels();
+ mat_weights_rows = weights->info()->dimension(0) / transpose_width + (_has_bias ? 1 : 0);
}
else
{
diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp
index ddf7e90..f8ad2ab 100644
--- a/src/runtime/NEON/functions/NEDepthConcatenate.cpp
+++ b/src/runtime/NEON/functions/NEDepthConcatenate.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
@@ -48,11 +49,16 @@
_concat_kernels_vector = arm_compute::support::cpp14::make_unique<NEDepthConcatenateKernel[]>(_num_inputs);
_border_handlers_vector = arm_compute::support::cpp14::make_unique<NEFillBorderKernel[]>(_num_inputs);
+ TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+
unsigned int depth_offset = 0;
for(unsigned int i = 0; i < _num_inputs; ++i)
{
_concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
- _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+ _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
depth_offset += inputs_vector.at(i)->info()->dimension(2);
}
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index b831a6a..52a4cc1 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -82,7 +82,7 @@
}
// Add zero padding XY
- _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+ _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
}
void NEDirectConvolutionLayer::run()
diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp
new file mode 100644
index 0000000..408eff5
--- /dev/null
+++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
+
+#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
+#include "arm_compute/core/Size2D.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void NEFlattenLayer::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<NEIm2ColKernel>();
+ k->configure(input, output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+ _kernel = std::move(k);
+}
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp
index 2e94ed5..fa8aaeb 100644
--- a/src/runtime/NEON/functions/NEIntegralImage.cpp
+++ b/src/runtime/NEON/functions/NEIntegralImage.cpp
@@ -36,5 +36,5 @@
auto k = arm_compute::support::cpp14::make_unique<NEIntegralImageKernel>();
k->configure(input, output);
_kernel = std::move(k);
- _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, 0);
+ _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
}
diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
index 3b59820..0854c9d 100644
--- a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
+++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
@@ -38,10 +38,10 @@
if(border_mode != BorderMode::UNDEFINED)
{
- _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, 0);
+ _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
}
else
{
- _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, 0);
+ _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
}
}
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index 4c4e11f..f8a85b9 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -23,19 +23,36 @@
*/
#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
#include "support/ToolchainSupport.h"
using namespace arm_compute;
+NEPoolingLayer::NEPoolingLayer()
+ : _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false)
+{
+}
+
void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
{
+ // Check if we have Global Pooling Layer
+ _is_global_pooling_layer = (input->info()->dimension(0) == pool_info.pool_size()) && (input->info()->dimension(1) == pool_info.pool_size());
+
// Configure pooling kernel
- auto k = arm_compute::support::cpp14::make_unique<NEPoolingLayerKernel>();
- k->configure(input, output, pool_info);
- _kernel = std::move(k);
+ _pooling_layer_kernel.configure(input, output, pool_info);
// Configure border depending on operation required
BorderMode border_mode = (pool_info.pool_type() == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0));
+ _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, PixelValue(static_cast<float>(0.f)));
}
+
+void NEPoolingLayer::run()
+{
+ // Fill border
+ NEScheduler::get().schedule(&_border_handler, Window::DimY);
+
+ // Run pooling layer
+ NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY);
+}
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 45c3e5d..f1a9145 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp
@@ -74,7 +74,7 @@
// Configure fill border kernel
BorderSize fill_border_size = (axis == 0) ? _reduction_kernel.border_size() : BorderSize();
BorderMode fill_border_mode = reduction_operation_border_mode(op);
- _fill_border_kernel.configure(input, fill_border_size, fill_border_mode, PixelValue(0));
+ _fill_border_kernel.configure(input, fill_border_size, fill_border_mode, PixelValue(static_cast<float>(0.f)));
}
void NEReductionOperation::run()
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index be81641..1dd2511 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -51,32 +51,34 @@
void OMPScheduler::set_num_threads(unsigned int num_threads)
{
const unsigned int num_cores = omp_get_max_threads();
- _num_threads = num_threads == 0 ? num_cores : num_threads;
+ _num_threads = (num_threads == 0) ? num_cores : num_threads;
}
void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
{
ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
+ ThreadInfo info;
+ info.cpu_info = _info;
+
const Window &max_window = kernel->window();
const unsigned int num_iterations = max_window.num_iterations(split_dimension);
- const unsigned int num_threads = std::min(num_iterations, _num_threads);
+ info.num_threads = std::min(num_iterations, _num_threads);
- if(!kernel->is_parallelisable() || 1 == num_threads)
+ if(!kernel->is_parallelisable() || info.num_threads == 1)
{
- kernel->run(max_window);
+ kernel->run(max_window, info);
}
else
{
- #pragma omp parallel num_threads(num_threads)
+ #pragma omp parallel num_threads(info.num_threads)
{
#pragma omp for
- for(unsigned int t = 0; t < num_threads; ++t)
+ for(int t = 0; t < info.num_threads; ++t)
{
- Window win = max_window.split_window(split_dimension, t, num_threads);
- win.set_thread_id(t);
- win.set_num_threads(num_threads);
- kernel->run(win);
+ Window win = max_window.split_window(split_dimension, t, info.num_threads);
+ info.thread_id = t;
+ kernel->run(win, info);
}
}
}
diff --git a/src/runtime/Utils.cpp b/src/runtime/Utils.cpp
index 1b06117..81de782 100644
--- a/src/runtime/Utils.cpp
+++ b/src/runtime/Utils.cpp
@@ -28,6 +28,10 @@
using namespace arm_compute;
+static const std::string information =
+#include "arm_compute_version.embed"
+ ;
+
const std::string &arm_compute::string_from_scheduler_type(Scheduler::Type t)
{
static std::map<Scheduler::Type, const std::string> scheduler_type_map =