arm_compute v18.05
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index c1ba5dd..40ada8f 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -34,6 +34,7 @@
 NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
     : _memory_group(std::move(memory_manager)),
       _conv_f(),
+      _upsample_f(),
       _scaled_output(),
       _input(nullptr),
       _info(),
@@ -41,13 +42,64 @@
 {
 }
 
+Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info,
+                                      unsigned int inner_border_right, unsigned int inner_border_top)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
+    ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
+
+    const unsigned int stride_x = info.stride().first;
+    const unsigned int stride_y = info.stride().second;
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1, "inner_border_right must be smaller than stride_x");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1, "inner_border_top must be smaller than stride_y");
+
+    auto out_dims = deconvolution_output_dimensions(input->dimension(0), input->dimension(1), weights->dimension(0), weights->dimension(1),
+                                                    info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, bias);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, bias);
+
+    if(bias != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, bias);
+    }
+
+    if(output->tensor_shape().total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+
+        const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape());
+
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
+    }
+
+    TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_deconvolution_shape(*input, stride_x, stride_y, inner_border_right, inner_border_top,
+                                                                                                      info)));
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+    for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != scale_out_info.dimension(i));
+    }
+
+    ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo()));
+
+    return Status{};
+}
+
 void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info,
                                      unsigned int inner_border_right, unsigned int inner_border_top)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON(!info.padding_is_symmetric());
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
     _input        = input;
     _info         = info;
@@ -55,15 +107,9 @@
 
     const unsigned int stride_x = info.stride().first;
     const unsigned int stride_y = info.stride().second;
-    auto               out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
-                                                                  info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
 
-    const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
-
-    ARM_COMPUTE_UNUSED(output_shape);
-    ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
-    ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
-    ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, inner_border_right, inner_border_top));
 
     _memory_group.manage(&_scaled_output);
 
@@ -79,44 +125,20 @@
 
     // Allocate auxiliary tensors
     _scaled_output.allocator()->allocate();
+
+    // configure upsample function
+    _upsample_f.configure(input, &_scaled_output, info, inner_border_right, inner_border_top);
 }
 
 void NEDeconvolutionLayer::run()
 {
     _memory_group.acquire();
 
-    // Initialize _scaled_output buffer
-    const int width_in      = _input->info()->dimension(0);
-    const int height_in     = _input->info()->dimension(1);
-    const int width_scaled  = _scaled_output.info()->dimension(0);
-    const int height_scaled = _scaled_output.info()->dimension(1);
-    const int num_2d_slices = _input->info()->tensor_shape().total_size() / (width_in * height_in);
-    const int stride_x      = _info.stride().first;
-    const int stride_y      = _info.stride().second;
-
-    std::fill_n(_scaled_output.buffer(), _scaled_output.info()->total_size(), 0);
-
-    // scaled_output is the input for the forward convolution. We copy the input elements to scaled_output
-    // and insert rows and columns with zeroes depending on the stride values.
-    for(int slice = 0; slice < num_2d_slices; ++slice)
-    {
-        const int start_x = _info.pad().first;
-        const int start_y = _inner_border.second + _info.pad().second;
-        const int end_y   = height_scaled - _info.pad().second;
-        const int end_x   = width_scaled - _inner_border.first - _info.pad().first;
-
-        for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
-        {
-            for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
-            {
-                const auto in = *(reinterpret_cast<float *>(_input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(in_x, in_y, slice))));
-                *(reinterpret_cast<float *>(_scaled_output.buffer() + _scaled_output.info()->offset_element_in_bytes(Coordinates(xi, yi, slice)))) = in;
-            }
-        }
-    }
+    // Run upsample kernel
+    _upsample_f.run();
 
     // Run convolution layer
     _conv_f.run();
 
     _memory_group.release();
-}
+}
\ No newline at end of file