arm_compute v17.10 Change-Id: If1489af40eccd0219ede8946577afbf04db31b29

commit: bf8b01dfbfdca124673ade33c5eac8f3748d7abd [log] [tgz]
author: Kaizen <kaizen@arm.com> Thu Oct 12 14:26:51 2017 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Thu Oct 12 15:42:58 2017 +0100
tree: 9504f134ca292b988e7065b22229e9bbe0493d00
parent: 8938bd3f40ea62ff56d6ed4e2db0a8aee34dd64a [diff]
diff --git a/src/runtime/CL/functions/CLDepthConcatenate.cpp b/src/runtime/CL/functions/CLDepthConcatenate.cpp
index f42627f..89e44ca 100644
--- a/src/runtime/CL/functions/CLDepthConcatenate.cpp
+++ b/src/runtime/CL/functions/CLDepthConcatenate.cpp

@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
@@ -51,6 +52,11 @@
     _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<CLDepthConcatenateKernel[]>(_num_inputs);
     _border_handlers_vector = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(_num_inputs);
 
+    TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+
     for(unsigned int i = 0; i < _num_inputs; i++)
     {
         _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);

diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index 9be1df6..c27ff2f 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp

@@ -162,7 +162,7 @@
     _edge_trace.configure(&_nonmax, output);
 
     // Fill border with "No edge" to stop recursion in edge trace
-    _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, 0);
+    _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
 
     // Allocate intermediate tensors
     _nonmax.allocator()->allocate();

diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 40862fc..f34f497 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp

@@ -190,8 +190,9 @@
     {
         if(_are_weights_reshaped)
         {
-            mat_weights_cols = weights_info.num_kernels();
-            mat_weights_rows = weights->info()->dimension(0) / 4 + (_has_bias ? 1 : 0);
+            const unsigned int transpose_width = 16 / input->info()->element_size();
+            mat_weights_cols                   = weights_info.num_kernels();
+            mat_weights_rows                   = weights->info()->dimension(0) / transpose_width + (_has_bias ? 1 : 0);
         }
         else
         {

diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp
index ddf7e90..f8ad2ab 100644
--- a/src/runtime/NEON/functions/NEDepthConcatenate.cpp
+++ b/src/runtime/NEON/functions/NEDepthConcatenate.cpp

@@ -24,6 +24,7 @@
 #include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
 
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
@@ -48,11 +49,16 @@
     _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<NEDepthConcatenateKernel[]>(_num_inputs);
     _border_handlers_vector = arm_compute::support::cpp14::make_unique<NEFillBorderKernel[]>(_num_inputs);
 
+    TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+
     unsigned int depth_offset = 0;
     for(unsigned int i = 0; i < _num_inputs; ++i)
     {
         _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
-        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
 
         depth_offset += inputs_vector.at(i)->info()->dimension(2);
     }

diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index b831a6a..52a4cc1 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp

@@ -82,7 +82,7 @@
     }
 
     // Add zero padding XY
-    _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+    _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
 }
 
 void NEDirectConvolutionLayer::run()

diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp
new file mode 100644
index 0000000..408eff5
--- /dev/null
+++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp

@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
+
+#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
+#include "arm_compute/core/Size2D.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void NEFlattenLayer::configure(const ITensor *input, ITensor *output)
+{
+    auto k = arm_compute::support::cpp14::make_unique<NEIm2ColKernel>();
+    k->configure(input, output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+    _kernel = std::move(k);
+}
\ No newline at end of file

diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp
index 2e94ed5..fa8aaeb 100644
--- a/src/runtime/NEON/functions/NEIntegralImage.cpp
+++ b/src/runtime/NEON/functions/NEIntegralImage.cpp

@@ -36,5 +36,5 @@
     auto k = arm_compute::support::cpp14::make_unique<NEIntegralImageKernel>();
     k->configure(input, output);
     _kernel = std::move(k);
-    _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, 0);
+    _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, static_cast<float>(0.f));
 }

diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
index 3b59820..0854c9d 100644
--- a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
+++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp

@@ -38,10 +38,10 @@
 
     if(border_mode != BorderMode::UNDEFINED)
     {
-        _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, 0);
+        _border_handler.configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
     }
     else
     {
-        _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, 0);
+        _border_handler.configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
     }
 }

diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index 4c4e11f..f8a85b9 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp

@@ -23,19 +23,36 @@
  */
 #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
 
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
 #include "support/ToolchainSupport.h"
 
 using namespace arm_compute;
 
+NEPoolingLayer::NEPoolingLayer()
+    : _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false)
+{
+}
+
 void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
 {
+    // Check if we have Global Pooling Layer
+    _is_global_pooling_layer = (input->info()->dimension(0) == pool_info.pool_size()) && (input->info()->dimension(1) == pool_info.pool_size());
+
     // Configure pooling kernel
-    auto k = arm_compute::support::cpp14::make_unique<NEPoolingLayerKernel>();
-    k->configure(input, output, pool_info);
-    _kernel = std::move(k);
+    _pooling_layer_kernel.configure(input, output, pool_info);
 
     // Configure border depending on operation required
     BorderMode border_mode = (pool_info.pool_type() == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
-    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0));
+    _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, PixelValue(static_cast<float>(0.f)));
 }
+
+void NEPoolingLayer::run()
+{
+    // Fill border
+    NEScheduler::get().schedule(&_border_handler, Window::DimY);
+
+    // Run pooling layer
+    NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY);
+}
\ No newline at end of file

diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp
index 45c3e5d..f1a9145 100644
--- a/src/runtime/NEON/functions/NEReductionOperation.cpp
+++ b/src/runtime/NEON/functions/NEReductionOperation.cpp

@@ -74,7 +74,7 @@
     // Configure fill border kernel
     BorderSize fill_border_size = (axis == 0) ? _reduction_kernel.border_size() : BorderSize();
     BorderMode fill_border_mode = reduction_operation_border_mode(op);
-    _fill_border_kernel.configure(input, fill_border_size, fill_border_mode, PixelValue(0));
+    _fill_border_kernel.configure(input, fill_border_size, fill_border_mode, PixelValue(static_cast<float>(0.f)));
 }
 
 void NEReductionOperation::run()

diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index be81641..1dd2511 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp

@@ -51,32 +51,34 @@
 void OMPScheduler::set_num_threads(unsigned int num_threads)
 {
     const unsigned int num_cores = omp_get_max_threads();
-    _num_threads                 = num_threads == 0 ? num_cores : num_threads;
+    _num_threads                 = (num_threads == 0) ? num_cores : num_threads;
 }
 
 void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
 
+    ThreadInfo info;
+    info.cpu_info = _info;
+
     const Window      &max_window     = kernel->window();
     const unsigned int num_iterations = max_window.num_iterations(split_dimension);
-    const unsigned int num_threads    = std::min(num_iterations, _num_threads);
+    info.num_threads                  = std::min(num_iterations, _num_threads);
 
-    if(!kernel->is_parallelisable() || 1 == num_threads)
+    if(!kernel->is_parallelisable() || info.num_threads == 1)
     {
-        kernel->run(max_window);
+        kernel->run(max_window, info);
     }
     else
     {
-        #pragma omp parallel num_threads(num_threads)
+        #pragma omp parallel num_threads(info.num_threads)
         {
             #pragma omp for
-            for(unsigned int t = 0; t < num_threads; ++t)
+            for(int t = 0; t < info.num_threads; ++t)
             {
-                Window win = max_window.split_window(split_dimension, t, num_threads);
-                win.set_thread_id(t);
-                win.set_num_threads(num_threads);
-                kernel->run(win);
+                Window win     = max_window.split_window(split_dimension, t, info.num_threads);
+                info.thread_id = t;
+                kernel->run(win, info);
             }
         }
     }

diff --git a/src/runtime/Utils.cpp b/src/runtime/Utils.cpp
index 1b06117..81de782 100644
--- a/src/runtime/Utils.cpp
+++ b/src/runtime/Utils.cpp

@@ -28,6 +28,10 @@
 
 using namespace arm_compute;
 
+static const std::string information =
+#include "arm_compute_version.embed"
+    ;
+
 const std::string &arm_compute::string_from_scheduler_type(Scheduler::Type t)
 {
     static std::map<Scheduler::Type, const std::string> scheduler_type_map =
commit	bf8b01dfbfdca124673ade33c5eac8f3748d7abd	[log] [tgz]
author	Kaizen <kaizen@arm.com>	Thu Oct 12 14:26:51 2017 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Thu Oct 12 15:42:58 2017 +0100
tree	9504f134ca292b988e7065b22229e9bbe0493d00
parent	8938bd3f40ea62ff56d6ed4e2db0a8aee34dd64a [diff]