arm_compute v17.06
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index d16354f..82929ba 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -30,6 +30,7 @@
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h"
#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
+#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
@@ -41,6 +42,7 @@
#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
#include "arm_compute/runtime/CL/functions/CLDilate.h"
@@ -55,11 +57,16 @@
#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
#include "arm_compute/runtime/CL/functions/CLHistogram.h"
#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
+#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
diff --git a/arm_compute/runtime/CL/CLHOG.h b/arm_compute/runtime/CL/CLHOG.h
new file mode 100644
index 0000000..9b4a303
--- /dev/null
+++ b/arm_compute/runtime/CL/CLHOG.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOG_H__
+#define __ARM_COMPUTE_CLHOG_H__
+
+#include "arm_compute/core/CL/ICLHOG.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/HOGInfo.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** OpenCL implementation of HOG data-object */
+class CLHOG : public ICLHOG
+{
+public:
+ /** Default constructor */
+ CLHOG();
+ /** Allocate the HOG descriptor using the given HOG's metadata
+ *
+ * @param[in] input HOG's metadata used to allocate the HOG descriptor
+ */
+ void init(const HOGInfo &input);
+
+ /** Enqueue a map operation of the allocated buffer.
+ *
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed.
+ */
+ void map(bool blocking = true);
+ using ICLHOG::map;
+
+ /** Enqueue an unmap operation of the allocated and mapped buffer.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ */
+ void unmap();
+ using ICLHOG::unmap;
+
+ // Inherited method overridden:
+ void free() override;
+ const HOGInfo *info() const override;
+ const cl::Buffer &cl_buffer() const override;
+
+protected:
+ // Inherited methods overridden:
+ uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+ void do_unmap(cl::CommandQueue &q) override;
+
+private:
+ HOGInfo _info;
+ cl::Buffer _buffer;
+};
+}
+#endif /* __ARM_COMPUTE_CLHOG_H__ */
diff --git a/arm_compute/runtime/CL/CLMultiHOG.h b/arm_compute/runtime/CL/CLMultiHOG.h
new file mode 100644
index 0000000..17bb4e0
--- /dev/null
+++ b/arm_compute/runtime/CL/CLMultiHOG.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMULTIHOG_H__
+#define __ARM_COMPUTE_CLMULTIHOG_H__
+
+#include "arm_compute/core/CL/ICLMultiHOG.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLHOG.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic implementation of the CL multi HOG data-objects */
+class CLMultiHOG : public ICLMultiHOG
+{
+public:
+ /** Constructor
+ *
+ * @param[in] num_models Number of HOG data objects to contain
+ *
+ */
+ CLMultiHOG(size_t num_models);
+
+ // Inherited methods overridden:
+ size_t num_models() const override;
+ ICLHOG *cl_model(size_t index) override;
+ const ICLHOG *cl_model(size_t index) const override;
+
+private:
+ size_t _num_models;
+ std::unique_ptr<CLHOG[]> _model;
+};
+}
+#endif /*__ARM_COMPUTE_CLMULTIHOG_H__ */
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 71baa55..8e80259 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -24,8 +24,12 @@
#ifndef __ARM_COMPUTE_CLSCHEDULER_H__
#define __ARM_COMPUTE_CLSCHEDULER_H__
+#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
namespace arm_compute
{
@@ -50,7 +54,7 @@
void default_init()
{
CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault());
- init(cl::Context::getDefault(), cl::CommandQueue::getDefault());
+ init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault());
}
/** Schedule the execution of the passed kernel if possible.
*
@@ -63,11 +67,14 @@
*
* @param[in] context A CL context.
* @param[in] queue A CL command queue.
+ * @param[in] device A CL device.
*/
- void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault())
+ void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(),
+ cl::Device device = cl::Device::getDefault())
{
_context = std::move(context);
_queue = std::move(queue);
+ _target = get_target_from_device(device);
}
/** Accessor for the associated CL context.
@@ -97,6 +104,15 @@
return _queue;
}
+ /** Get the target GPU.
+ *
+ * @return The target GPU.
+ */
+ GPUTarget target() const
+ {
+ return _target;
+ }
+
/** Accessor to set the CL command queue to be used by the scheduler.
*
* @param[in] queue A CL command queue.
@@ -106,6 +122,15 @@
_queue = std::move(queue);
}
+ /** Accessor to set target GPU to be used by the scheduler.
+ *
+ * @param[in] target The target GPU.
+ */
+ void set_target(GPUTarget target)
+ {
+ _target = target;
+ }
+
/** Blocks until all commands in the associated command queue have finished. */
void sync()
{
@@ -127,6 +152,7 @@
private:
cl::Context _context;
cl::CommandQueue _queue;
+ GPUTarget _target;
};
}
#endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */
diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h
new file mode 100644
index 0000000..4bab164
--- /dev/null
+++ b/arm_compute/runtime/CL/CLSubTensor.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSUBTENSOR_H__
+#define __ARM_COMPUTE_CLSUBTENSOR_H__
+
+#include "arm_compute/core/SubTensorInfo.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensorInfo;
+
+/** Basic implementation of the OpenCL sub-tensor interface */
+class CLSubTensor : public ICLTensor
+{
+public:
+ /** Constructor
+ *
+ * @param[in] parent Parent tensor
+ * @param[in] tensor_shape Shape of the subtensor
+ * @param[in] coords Coordinates of the first subtensor element inside the parent tensor.
+ */
+ CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords);
+ /** Destructor: free the tensor's memory */
+ ~CLSubTensor() = default;
+ /** Restrict instances of this class to be copy constructed */
+ CLSubTensor(const CLSubTensor &) = delete;
+ /** Restrict instances of this class to be copied */
+ CLSubTensor &operator=(const CLSubTensor &) = delete;
+ /** Allow instances of this class to be move constructed */
+ CLSubTensor(CLSubTensor &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSubTensor &operator=(CLSubTensor &&) = default;
+
+ /** Enqueue a map operation of the allocated buffer.
+ *
+ * @note Mapping a subtensor will lead to the mapping of the whole parent tensor for now.
+ *
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed.
+ */
+ void map(bool blocking = true);
+ using ICLTensor::map;
+ /** Enqueue an unmap operation of the allocated and mapped buffer.
+ *
+ * @note Unmapping a subtensor will lead to the unmapping of the whole parent tensor for now.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ */
+ void unmap();
+ using ICLTensor::unmap;
+
+ /** Return the parent tensor of the subtensor
+ *
+ * @return Parent tensor
+ */
+ ICLTensor *parent();
+
+ // Inherited methods overridden:
+ ITensorInfo *info() const override;
+ ITensorInfo *info() override;
+ const cl::Buffer &cl_buffer() const override;
+
+protected:
+ // Inherited methods overridden:
+ uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+ void do_unmap(cl::CommandQueue &q) override;
+
+private:
+ ICLTensor *_parent;
+ mutable SubTensorInfo _info;
+};
+}
+#endif /*__ARM_COMPUTE_CLSUBTENSOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
new file mode 100644
index 0000000..d766d1c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer.
+ *
+ * Batch normalization is calculated by:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ */
+class CLBatchNormalizationLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ CLBatchNormalizationLayer();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F32.
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] epsilon Small value to avoid division with zero.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+};
+}
+#endif /* __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 2a9b487..6a40396 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -27,12 +27,12 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
@@ -42,6 +42,34 @@
{
class ICLTensor;
+/** Function to reshape and transpose the weights. This function calls the following kernels:
+ * -# @ref CLWeightsReshapeKernel
+ * -# @ref CLGEMMTranspose1xWKernel
+ */
+class CLConvolutionLayerReshapeWeights : public IFunction
+{
+public:
+ /** Constructor */
+ CLConvolutionLayerReshapeWeights();
+ /** Set the input and output tensors.
+ *
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[out] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
+ * Data types supported: Same as @p weights.
+ */
+ void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW);
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel;
+ CLGEMMTranspose1xWKernel _weights_transposed_kernel;
+ CLTensor _weights_reshaped;
+ bool _transpose1xW;
+};
+
/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels:
*
* -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration)
@@ -58,35 +86,36 @@
CLConvolutionLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16, F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F16, F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
void run() override;
private:
- CLIm2ColKernel _input_im2col_kernel;
- CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel;
- CLGEMMInterleave4x4Kernel _input_interleave_kernel;
- CLGEMMTranspose1xWKernel _weights_transposed_kernel;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLCol2ImKernel _output_col2im_kernel;
- CLTensor _input_im2col_reshaped;
- CLTensor _input_interleaved_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _weights_transposed;
- CLTensor _gemm_output;
- bool _is_first_run;
- bool _has_bias;
- bool _is_fc;
+ CLConvolutionLayerReshapeWeights _reshape_weights;
+ CLIm2ColKernel _input_im2col_kernel;
+ CLGEMMInterleave4x4Kernel _input_interleave_kernel;
+ CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLCol2ImKernel _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _input_interleaved_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _weights_transposed;
+ CLTensor _gemm_output;
+ bool _has_bias;
+ bool _is_fully_connected_convolution;
+ bool _are_weights_reshaped;
};
}
#endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
new file mode 100644
index 0000000..3199936
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+class CLDepthConcatenateKernel;
+class CLFillBorderKernel;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref CLDepthConcatenateKernel
+ *
+ */
+class CLDepthConcatenate : public IFunction
+{
+public:
+ /** Default constructor */
+ CLDepthConcatenate();
+ /** Initialise the kernel's inputs vector and output.
+ *
+ * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32.
+ * @param[out] output Output tensor. Data types supported: F32.
+ */
+ void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::vector<ICLTensor *> _inputs_vector;
+ std::unique_ptr<CLDepthConcatenateKernel[]> _concat_kernels_vector;
+ std::unique_ptr<CLFillBorderKernel[]> _border_handlers_vector;
+ unsigned int _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 09e4fc9..826f445 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -36,13 +36,44 @@
namespace arm_compute
{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels:
+ *
+ * -# @ref CLTransposeKernel (if @p transpose_weights is set to true)
+ * -# @ref CLGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerReshapeWeights : public IFunction
+{
+public:
+ /** Constructor */
+ CLFullyConnectedLayerReshapeWeights();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
+ * @param[in] is_batched_fc_layer True if it is a batched fully connected layer
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLTransposeKernel _transpose_kernel;
+ CLGEMMTranspose1xWKernel _transpose1xW_kernel;
+ CLTensor _transpose_output;
+ bool _transpose_weights;
+ bool _is_batched_fc_layer;
+};
+
/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
*
* -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
- * -# @ref CLTransposeKernel (if @p transpose_weights is set to true) (called once)
- * -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input)
- * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input)
- * -# @ref NEGEMMMatrixMultiplyKernel
+ * -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false) (called once)
+ * -# @ref CLGEMMInterleave4x4Kernel (called if we have a multi-batch input)
+ * -# @ref CLGEMMMatrixMultiplyKernel
* -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
@@ -54,13 +85,14 @@
CLFullyConnectedLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: F16, F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
- * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
+ * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
+ * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true);
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
//Inherited methods override
void run() override;
@@ -71,21 +103,18 @@
void configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
void configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
- CLIm2ColKernel _im2col_kernel;
- CLTransposeKernel _transpose_kernel;
- CLGEMMTranspose1xWKernel _transpose1xW_kernel;
- CLGEMMInterleave4x4Kernel _interleave4x4_kernel;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
- CLTensor _im2col_output;
- CLTensor _interleave4x4_output;
- CLTensor _transpose_output;
- CLTensor _transpose1xW_output;
- bool _is_first_run;
- bool _transpose_weights;
- bool _fc_after_conv;
- bool _batched_fc_layer;
- bool _accumulate_biases;
+ CLIm2ColKernel _im2col_kernel;
+ CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+ CLGEMMInterleave4x4Kernel _interleave4x4_kernel;
+ CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ CLTensor _im2col_output;
+ CLTensor _interleave4x4_output;
+ CLTensor _reshape_weights_output;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _is_batched_fc_layer;
+ bool _accumulate_biases;
};
}
#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
new file mode 100644
index 0000000..cdb23bf
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDESCRIPTOR_H__
+#define __ARM_COMPUTE_CLHOGDESCRIPTOR_H__
+
+#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IHOG;
+/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLHOGGradient
+ * -# @ref CLHOGOrientationBinningKernel
+ * -# @ref CLHOGBlockNormalizationKernel
+ *
+ */
+class CLHOGDescriptor : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGDescriptor();
+ /** Initialise the function's source, destination, HOG data-object and border mode
+ *
+ * @param[in, out] input Input tensor. Data type supported: U8
+ * (Written to only for @p border_mode != UNDEFINED)
+ * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
+ * @param[in] hog HOG data object which describes the HOG descriptor
+ * @param[in] border_mode Border mode to use.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+ // Inherited method overridden:
+ void run() override;
+
+private:
+ CLHOGGradient _gradient;
+ CLHOGOrientationBinningKernel _orient_bin;
+ CLHOGBlockNormalizationKernel _block_norm;
+ CLTensor _mag;
+ CLTensor _phase;
+ CLTensor _hog_space;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
new file mode 100644
index 0000000..0b4fad7
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDETECTOR_H__
+#define __ARM_COMPUTE_CLHOGDETECTOR_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "arm_compute/core/IHOG.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
+ *
+ * -# @ref CLHOGDetectorKernel
+ *
+ */
+class CLHOGDetector : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGDetector();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetector(const CLHOGDetector &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetector &operator=(const CLHOGDetector &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGDetector(CLHOGDetector &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGDetector &operator=(CLHOGDetector &&) = default;
+ /** Default destructor */
+ ~CLHOGDetector() = default;
+ /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
+ *
+ * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
+ *
+ * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32
+ * @param[in] hog HOG data-object that describes the HOG descriptor
+ * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the block stride stored in hog
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLHOGDetectorKernel _hog_detector_kernel;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer _num_detection_windows;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDETECTOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
new file mode 100644
index 0000000..e74a684
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGGRADIENT_H__
+#define __ARM_COMPUTE_CLHOGGRADIENT_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDerivative.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDerivative
+ * -# @ref CLMagnitudePhaseKernel
+ *
+ */
+class CLHOGGradient : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGGradient();
+ /** Initialise the function's source, destinations, phase type and border mode
+ *
+ * @param[in, out] input Input tensor. Data type supported: U8.
+ * (Written to only for @p border_mode != UNDEFINED)
+ * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16.
+ * @param[out] output_phase Output tensor.(phase). Format supported: U8
+ * @param[in] phase_type Type of @ref PhaseType
+ * @param[in] border_mode Border mode to use
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+ // Inherited method overridden:
+ void run() override;
+
+private:
+ CLDerivative _derivative;
+ CLMagnitudePhaseKernel _mag_phase;
+ CLTensor _gx;
+ CLTensor _gy;
+};
+}
+#endif /*__ARM_COMPUTE_CLHOGGRADIENT_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
new file mode 100644
index 0000000..3fe0fa9
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGMULTIDETECTION_H__
+#define __ARM_COMPUTE_CLHOGMULTIDETECTION_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLMultiHOG.h"
+#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
+ *
+ * -# @ref CLHOGGradient
+ * -# @ref CLHOGOrientationBinningKernel
+ * -# @ref CLHOGBlockNormalizationKernel
+ * -# @ref CLHOGDetector
+ * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
+ *
+ * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
+ * -# Phase type
+ -# Normalization type
+ -# L2 hysteresis threshold if the normalization type is L2HYS_NORM
+ *
+ */
+class CLHOGMultiDetection : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGMultiDetection();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
+ /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
+ *
+ * @param[in, out] input Input tensor. Data type supported: U8
+ * (Written to only for @p border_mode != UNDEFINED)
+ * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
+ * This container should store the HOG data-objects in descending or ascending cell_size width order.
+ * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
+ * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects
+ * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
+ * The dimension of this array must be the same of multi_hog->num_models()
+ * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
+ * @param[in] border_mode Border mode to use.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not.
+ * True if the non-maxima suppression stage has to be computed
+ * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
+ *
+ */
+ void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
+ uint8_t constant_border_value = 0,
+ float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+
+ // Inherited method overridden:
+ void run() override;
+
+private:
+ CLHOGGradient _gradient_kernel;
+ std::unique_ptr<CLHOGOrientationBinningKernel[]> _orient_bin_kernel;
+ std::unique_ptr<CLHOGBlockNormalizationKernel[]> _block_norm_kernel;
+ std::unique_ptr<CLHOGDetector[]> _hog_detect_kernel;
+ std::unique_ptr<CPPDetectionWindowNonMaximaSuppressionKernel> _non_maxima_kernel;
+ std::unique_ptr<CLTensor[]> _hog_space;
+ std::unique_ptr<CLTensor[]> _hog_norm_space;
+ ICLDetectionWindowArray *_detection_windows;
+ CLTensor _mag;
+ CLTensor _phase;
+ bool _non_maxima_suppression;
+ size_t _num_orient_bin_kernel;
+ size_t _num_block_norm_kernel;
+ size_t _num_hog_detect_kernel;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
new file mode 100644
index 0000000..b4e4691
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLLocallyConnectedLayerWeightsReshapeKernel (executed only once for each configuration)
+ * -# @ref CLIm2ColKernel
+ * -# @ref CLLocallyConnectedMatrixMultiplyKernel
+ * -# @ref CLCol2ImKernel
+ */
+class CLLocallyConnectedLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ CLLocallyConnectedLayer();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLIm2ColKernel _input_im2col_kernel;
+ CLLocallyConnectedLayerWeightsReshapeKernel _weights_reshape_kernel;
+ CLLocallyConnectedMatrixMultiplyKernel _mm_kernel;
+ CLCol2ImKernel _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _gemm_output;
+ bool _is_first_run;
+};
+}
+#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */