arm_compute v17.09

Change-Id: I4bf8f4e6e5f84ce0d5b6f5ba570d276879f42a81
diff --git a/src/runtime/CL/functions/CLConvolution.cpp b/src/runtime/CL/functions/CLConvolution.cpp
index 21b5d47..a9b0867 100644
--- a/src/runtime/CL/functions/CLConvolution.cpp
+++ b/src/runtime/CL/functions/CLConvolution.cpp
@@ -26,13 +26,13 @@
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/ITensorAllocator.h"
+#include "support/ToolchainSupport.h"
 
 #include <utility>
 
@@ -40,15 +40,15 @@
 
 void CLConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
 {
-    auto k = arm_compute::cpp14::make_unique<CLConvolution3x3Kernel>();
+    auto k = arm_compute::support::cpp14::make_unique<CLConvolution3x3Kernel>();
     k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
     _kernel = std::move(k);
     _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
 }
 
 template <unsigned int matrix_size>
-CLConvolutionSquare<matrix_size>::CLConvolutionSquare()
-    : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
+CLConvolutionSquare<matrix_size>::CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
 {
 }
 
@@ -66,6 +66,9 @@
         std::pair<DataType, DataType> type_pair = data_type_for_convolution(conv_col, conv_row, matrix_size);
         _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, type_pair.first));
 
+        // Manage intermediate buffers
+        _memory_group.manage(&_tmp);
+
         if(scale == 0)
         {
             scale = calculate_matrix_scale(conv, matrix_size);
@@ -92,8 +95,12 @@
 
     if(_is_separable)
     {
+        _memory_group.acquire();
+
         CLScheduler::get().enqueue(_kernel_hor, false);
         CLScheduler::get().enqueue(_kernel_vert);
+
+        _memory_group.release();
     }
     else
     {
@@ -107,7 +114,7 @@
 
 void CLConvolutionRectangle::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
 {
-    auto k = arm_compute::cpp14::make_unique<CLConvolutionRectangleKernel>();
+    auto k = arm_compute::support::cpp14::make_unique<CLConvolutionRectangleKernel>();
     k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
     _kernel = std::move(k);
     _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));