arm_compute v17.06
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index d16354f..82929ba 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -30,6 +30,7 @@
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h"
#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
+#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
@@ -41,6 +42,7 @@
#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
#include "arm_compute/runtime/CL/functions/CLDilate.h"
@@ -55,11 +57,16 @@
#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
#include "arm_compute/runtime/CL/functions/CLHistogram.h"
#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
+#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
diff --git a/arm_compute/runtime/CL/CLHOG.h b/arm_compute/runtime/CL/CLHOG.h
new file mode 100644
index 0000000..9b4a303
--- /dev/null
+++ b/arm_compute/runtime/CL/CLHOG.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOG_H__
+#define __ARM_COMPUTE_CLHOG_H__
+
+#include "arm_compute/core/CL/ICLHOG.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/HOGInfo.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** OpenCL implementation of HOG data-object */
+class CLHOG : public ICLHOG
+{
+public:
+ /** Default constructor */
+ CLHOG();
+ /** Allocate the HOG descriptor using the given HOG's metadata
+ *
+ * @param[in] input HOG's metadata used to allocate the HOG descriptor
+ */
+ void init(const HOGInfo &input);
+
+ /** Enqueue a map operation of the allocated buffer.
+ *
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed.
+ */
+ void map(bool blocking = true);
+ using ICLHOG::map;
+
+ /** Enqueue an unmap operation of the allocated and mapped buffer.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ */
+ void unmap();
+ using ICLHOG::unmap;
+
+ // Inherited method overridden:
+ void free() override;
+ const HOGInfo *info() const override;
+ const cl::Buffer &cl_buffer() const override;
+
+protected:
+ // Inherited methods overridden:
+ uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+ void do_unmap(cl::CommandQueue &q) override;
+
+private:
+ HOGInfo _info;
+ cl::Buffer _buffer;
+};
+}
+#endif /* __ARM_COMPUTE_CLHOG_H__ */
diff --git a/arm_compute/runtime/CL/CLMultiHOG.h b/arm_compute/runtime/CL/CLMultiHOG.h
new file mode 100644
index 0000000..17bb4e0
--- /dev/null
+++ b/arm_compute/runtime/CL/CLMultiHOG.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMULTIHOG_H__
+#define __ARM_COMPUTE_CLMULTIHOG_H__
+
+#include "arm_compute/core/CL/ICLMultiHOG.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLHOG.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic implementation of the CL multi HOG data-objects */
+class CLMultiHOG : public ICLMultiHOG
+{
+public:
+ /** Constructor
+ *
+ * @param[in] num_models Number of HOG data objects to contain
+ *
+ */
+ CLMultiHOG(size_t num_models);
+
+ // Inherited methods overridden:
+ size_t num_models() const override;
+ ICLHOG *cl_model(size_t index) override;
+ const ICLHOG *cl_model(size_t index) const override;
+
+private:
+ size_t _num_models;
+ std::unique_ptr<CLHOG[]> _model;
+};
+}
+#endif /*__ARM_COMPUTE_CLMULTIHOG_H__ */
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 71baa55..8e80259 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -24,8 +24,12 @@
#ifndef __ARM_COMPUTE_CLSCHEDULER_H__
#define __ARM_COMPUTE_CLSCHEDULER_H__
+#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
namespace arm_compute
{
@@ -50,7 +54,7 @@
void default_init()
{
CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault());
- init(cl::Context::getDefault(), cl::CommandQueue::getDefault());
+ init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault());
}
/** Schedule the execution of the passed kernel if possible.
*
@@ -63,11 +67,14 @@
*
* @param[in] context A CL context.
* @param[in] queue A CL command queue.
+ * @param[in] device A CL device.
*/
- void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault())
+ void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(),
+ cl::Device device = cl::Device::getDefault())
{
_context = std::move(context);
_queue = std::move(queue);
+ _target = get_target_from_device(device);
}
/** Accessor for the associated CL context.
@@ -97,6 +104,15 @@
return _queue;
}
+ /** Get the target GPU.
+ *
+ * @return The target GPU.
+ */
+ GPUTarget target() const
+ {
+ return _target;
+ }
+
/** Accessor to set the CL command queue to be used by the scheduler.
*
* @param[in] queue A CL command queue.
@@ -106,6 +122,15 @@
_queue = std::move(queue);
}
+ /** Accessor to set target GPU to be used by the scheduler.
+ *
+ * @param[in] target The target GPU.
+ */
+ void set_target(GPUTarget target)
+ {
+ _target = target;
+ }
+
/** Blocks until all commands in the associated command queue have finished. */
void sync()
{
@@ -127,6 +152,7 @@
private:
cl::Context _context;
cl::CommandQueue _queue;
+ GPUTarget _target;
};
}
#endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */
diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h
new file mode 100644
index 0000000..4bab164
--- /dev/null
+++ b/arm_compute/runtime/CL/CLSubTensor.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSUBTENSOR_H__
+#define __ARM_COMPUTE_CLSUBTENSOR_H__
+
+#include "arm_compute/core/SubTensorInfo.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensorInfo;
+
+/** Basic implementation of the OpenCL sub-tensor interface */
+class CLSubTensor : public ICLTensor
+{
+public:
+ /** Constructor
+ *
+ * @param[in] parent Parent tensor
+ * @param[in] tensor_shape Shape of the subtensor
+ * @param[in] coords Coordinates of the first subtensor element inside the parent tensor.
+ */
+ CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords);
+ /** Destructor: free the tensor's memory */
+ ~CLSubTensor() = default;
+ /** Restrict instances of this class to be copy constructed */
+ CLSubTensor(const CLSubTensor &) = delete;
+ /** Restrict instances of this class to be copied */
+ CLSubTensor &operator=(const CLSubTensor &) = delete;
+ /** Allow instances of this class to be move constructed */
+ CLSubTensor(CLSubTensor &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSubTensor &operator=(CLSubTensor &&) = default;
+
+ /** Enqueue a map operation of the allocated buffer.
+ *
+ * @note Mapping a subtensor will lead to the mapping of the whole parent tensor for now.
+ *
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed.
+ */
+ void map(bool blocking = true);
+ using ICLTensor::map;
+ /** Enqueue an unmap operation of the allocated and mapped buffer.
+ *
+ * @note Unmapping a subtensor will lead to the unmapping of the whole parent tensor for now.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ */
+ void unmap();
+ using ICLTensor::unmap;
+
+ /** Return the parent tensor of the subtensor
+ *
+ * @return Parent tensor
+ */
+ ICLTensor *parent();
+
+ // Inherited methods overridden:
+ ITensorInfo *info() const override;
+ ITensorInfo *info() override;
+ const cl::Buffer &cl_buffer() const override;
+
+protected:
+ // Inherited methods overridden:
+ uint8_t *do_map(cl::CommandQueue &q, bool blocking) override;
+ void do_unmap(cl::CommandQueue &q) override;
+
+private:
+ ICLTensor *_parent;
+ mutable SubTensorInfo _info;
+};
+}
+#endif /*__ARM_COMPUTE_CLSUBTENSOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
new file mode 100644
index 0000000..d766d1c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer.
+ *
+ * Batch normalization is calculated by:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ */
+class CLBatchNormalizationLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ CLBatchNormalizationLayer();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F32.
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] epsilon Small value to avoid division with zero.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+};
+}
+#endif /* __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 2a9b487..6a40396 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -27,12 +27,12 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionLayerWeightsReshapeKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
@@ -42,6 +42,34 @@
{
class ICLTensor;
+/** Function to reshape and transpose the weights. This function calls the following kernels:
+ * -# @ref CLWeightsReshapeKernel
+ * -# @ref CLGEMMTranspose1xWKernel
+ */
+class CLConvolutionLayerReshapeWeights : public IFunction
+{
+public:
+ /** Constructor */
+ CLConvolutionLayerReshapeWeights();
+ /** Set the input and output tensors.
+ *
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[out] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
+ * Data types supported: Same as @p weights.
+ */
+ void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW);
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel;
+ CLGEMMTranspose1xWKernel _weights_transposed_kernel;
+ CLTensor _weights_reshaped;
+ bool _transpose1xW;
+};
+
/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels:
*
* -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration)
@@ -58,35 +86,36 @@
CLConvolutionLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16, F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F16, F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
void run() override;
private:
- CLIm2ColKernel _input_im2col_kernel;
- CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel;
- CLGEMMInterleave4x4Kernel _input_interleave_kernel;
- CLGEMMTranspose1xWKernel _weights_transposed_kernel;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLCol2ImKernel _output_col2im_kernel;
- CLTensor _input_im2col_reshaped;
- CLTensor _input_interleaved_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _weights_transposed;
- CLTensor _gemm_output;
- bool _is_first_run;
- bool _has_bias;
- bool _is_fc;
+ CLConvolutionLayerReshapeWeights _reshape_weights;
+ CLIm2ColKernel _input_im2col_kernel;
+ CLGEMMInterleave4x4Kernel _input_interleave_kernel;
+ CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLCol2ImKernel _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _input_interleaved_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _weights_transposed;
+ CLTensor _gemm_output;
+ bool _has_bias;
+ bool _is_fully_connected_convolution;
+ bool _are_weights_reshaped;
};
}
#endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
new file mode 100644
index 0000000..3199936
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+class CLDepthConcatenateKernel;
+class CLFillBorderKernel;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref CLDepthConcatenateKernel
+ *
+ */
+class CLDepthConcatenate : public IFunction
+{
+public:
+ /** Default constructor */
+ CLDepthConcatenate();
+ /** Initialise the kernel's inputs vector and output.
+ *
+ * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32.
+ * @param[out] output Output tensor. Data types supported: F32.
+ */
+ void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::vector<ICLTensor *> _inputs_vector;
+ std::unique_ptr<CLDepthConcatenateKernel[]> _concat_kernels_vector;
+ std::unique_ptr<CLFillBorderKernel[]> _border_handlers_vector;
+ unsigned int _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 09e4fc9..826f445 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -36,13 +36,44 @@
namespace arm_compute
{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels:
+ *
+ * -# @ref CLTransposeKernel (if @p transpose_weights is set to true)
+ * -# @ref CLGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerReshapeWeights : public IFunction
+{
+public:
+ /** Constructor */
+ CLFullyConnectedLayerReshapeWeights();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
+ * @param[in] is_batched_fc_layer True if it is a batched fully connected layer
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLTransposeKernel _transpose_kernel;
+ CLGEMMTranspose1xWKernel _transpose1xW_kernel;
+ CLTensor _transpose_output;
+ bool _transpose_weights;
+ bool _is_batched_fc_layer;
+};
+
/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
*
* -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
- * -# @ref CLTransposeKernel (if @p transpose_weights is set to true) (called once)
- * -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input)
- * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input)
- * -# @ref NEGEMMMatrixMultiplyKernel
+ * -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false) (called once)
+ * -# @ref CLGEMMInterleave4x4Kernel (called if we have a multi-batch input)
+ * -# @ref CLGEMMMatrixMultiplyKernel
* -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
@@ -54,13 +85,14 @@
CLFullyConnectedLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: F16, F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
- * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
+ * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
+ * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
*/
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true);
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
//Inherited methods override
void run() override;
@@ -71,21 +103,18 @@
void configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
void configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
- CLIm2ColKernel _im2col_kernel;
- CLTransposeKernel _transpose_kernel;
- CLGEMMTranspose1xWKernel _transpose1xW_kernel;
- CLGEMMInterleave4x4Kernel _interleave4x4_kernel;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
- CLTensor _im2col_output;
- CLTensor _interleave4x4_output;
- CLTensor _transpose_output;
- CLTensor _transpose1xW_output;
- bool _is_first_run;
- bool _transpose_weights;
- bool _fc_after_conv;
- bool _batched_fc_layer;
- bool _accumulate_biases;
+ CLIm2ColKernel _im2col_kernel;
+ CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+ CLGEMMInterleave4x4Kernel _interleave4x4_kernel;
+ CLGEMMMatrixMultiplyKernel _mm_kernel;
+ CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ CLTensor _im2col_output;
+ CLTensor _interleave4x4_output;
+ CLTensor _reshape_weights_output;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _is_batched_fc_layer;
+ bool _accumulate_biases;
};
}
#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
new file mode 100644
index 0000000..cdb23bf
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDESCRIPTOR_H__
+#define __ARM_COMPUTE_CLHOGDESCRIPTOR_H__
+
+#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IHOG;
+/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLHOGGradient
+ * -# @ref CLHOGOrientationBinningKernel
+ * -# @ref CLHOGBlockNormalizationKernel
+ *
+ */
+class CLHOGDescriptor : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGDescriptor();
+ /** Initialise the function's source, destination, HOG data-object and border mode
+ *
+ * @param[in, out] input Input tensor. Data type supported: U8
+ * (Written to only for @p border_mode != UNDEFINED)
+ * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
+ * @param[in] hog HOG data object which describes the HOG descriptor
+ * @param[in] border_mode Border mode to use.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+ // Inherited method overridden:
+ void run() override;
+
+private:
+ CLHOGGradient _gradient;
+ CLHOGOrientationBinningKernel _orient_bin;
+ CLHOGBlockNormalizationKernel _block_norm;
+ CLTensor _mag;
+ CLTensor _phase;
+ CLTensor _hog_space;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
new file mode 100644
index 0000000..0b4fad7
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDETECTOR_H__
+#define __ARM_COMPUTE_CLHOGDETECTOR_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "arm_compute/core/IHOG.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
+ *
+ * -# @ref CLHOGDetectorKernel
+ *
+ */
+class CLHOGDetector : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGDetector();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetector(const CLHOGDetector &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetector &operator=(const CLHOGDetector &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGDetector(CLHOGDetector &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGDetector &operator=(CLHOGDetector &&) = default;
+ /** Default destructor */
+ ~CLHOGDetector() = default;
+ /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
+ *
+ * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
+ *
+ * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32
+ * @param[in] hog HOG data-object that describes the HOG descriptor
+ * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the block stride stored in hog
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLHOGDetectorKernel _hog_detector_kernel;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer _num_detection_windows;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDETECTOR_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
new file mode 100644
index 0000000..e74a684
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGGRADIENT_H__
+#define __ARM_COMPUTE_CLHOGGRADIENT_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDerivative.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDerivative
+ * -# @ref CLMagnitudePhaseKernel
+ *
+ */
+class CLHOGGradient : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGGradient();
+ /** Initialise the function's source, destinations, phase type and border mode
+ *
+ * @param[in, out] input Input tensor. Data type supported: U8.
+ * (Written to only for @p border_mode != UNDEFINED)
+ * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16.
+ * @param[out] output_phase Output tensor.(phase). Format supported: U8
+ * @param[in] phase_type Type of @ref PhaseType
+ * @param[in] border_mode Border mode to use
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
+
+ // Inherited method overridden:
+ void run() override;
+
+private:
+ CLDerivative _derivative;
+ CLMagnitudePhaseKernel _mag_phase;
+ CLTensor _gx;
+ CLTensor _gy;
+};
+}
+#endif /*__ARM_COMPUTE_CLHOGGRADIENT_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
new file mode 100644
index 0000000..3fe0fa9
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGMULTIDETECTION_H__
+#define __ARM_COMPUTE_CLHOGMULTIDETECTION_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLMultiHOG.h"
+#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
+#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
+ *
+ * -# @ref CLHOGGradient
+ * -# @ref CLHOGOrientationBinningKernel
+ * -# @ref CLHOGBlockNormalizationKernel
+ * -# @ref CLHOGDetector
+ * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
+ *
+ * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
+ * -# Phase type
+ -# Normalization type
+ -# L2 hysteresis threshold if the normalization type is L2HYS_NORM
+ *
+ */
+class CLHOGMultiDetection : public IFunction
+{
+public:
+ /** Default constructor */
+ CLHOGMultiDetection();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
+ /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
+ *
+ * @param[in, out] input Input tensor. Data type supported: U8
+ * (Written to only for @p border_mode != UNDEFINED)
+ * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
+ * This container should store the HOG data-objects in descending or ascending cell_size width order.
+ * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
+ * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects
+ * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
+ * The dimension of this array must be the same of multi_hog->num_models()
+ * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
+ * @param[in] border_mode Border mode to use.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not.
+ * True if the non-maxima suppression stage has to be computed
+ * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
+ *
+ */
+ void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
+ uint8_t constant_border_value = 0,
+ float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+
+ // Inherited method overridden:
+ void run() override;
+
+private:
+ CLHOGGradient _gradient_kernel;
+ std::unique_ptr<CLHOGOrientationBinningKernel[]> _orient_bin_kernel;
+ std::unique_ptr<CLHOGBlockNormalizationKernel[]> _block_norm_kernel;
+ std::unique_ptr<CLHOGDetector[]> _hog_detect_kernel;
+ std::unique_ptr<CPPDetectionWindowNonMaximaSuppressionKernel> _non_maxima_kernel;
+ std::unique_ptr<CLTensor[]> _hog_space;
+ std::unique_ptr<CLTensor[]> _hog_norm_space;
+ ICLDetectionWindowArray *_detection_windows;
+ CLTensor _mag;
+ CLTensor _phase;
+ bool _non_maxima_suppression;
+ size_t _num_orient_bin_kernel;
+ size_t _num_block_norm_kernel;
+ size_t _num_hog_detect_kernel;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
new file mode 100644
index 0000000..b4e4691
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLLocallyConnectedLayerWeightsReshapeKernel (executed only once for each configuration)
+ * -# @ref CLIm2ColKernel
+ * -# @ref CLLocallyConnectedMatrixMultiplyKernel
+ * -# @ref CLCol2ImKernel
+ */
+class CLLocallyConnectedLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ CLLocallyConnectedLayer();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLIm2ColKernel _input_im2col_kernel;
+ CLLocallyConnectedLayerWeightsReshapeKernel _weights_reshape_kernel;
+ CLLocallyConnectedMatrixMultiplyKernel _mm_kernel;
+ CLCol2ImKernel _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _gemm_output;
+ bool _is_first_run;
+};
+}
+#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h
index 0828af6..7a37e5e 100644
--- a/arm_compute/runtime/CPP/CPPScheduler.h
+++ b/arm_compute/runtime/CPP/CPPScheduler.h
@@ -24,35 +24,28 @@
#ifndef __ARM_COMPUTE_CPPSCHEDULER_H__
#define __ARM_COMPUTE_CPPSCHEDULER_H__
-#include <cstddef>
+#include "arm_compute/runtime/IScheduler.h"
+
#include <memory>
namespace arm_compute
{
-class ICPPKernel;
class Thread;
-/** Pool of threads to automatically split a kernel's execution among several threads. */
-class CPPScheduler
+/** C++11 implementation of a pool of threads to automatically split a kernel's execution among several threads. */
+class CPPScheduler : public IScheduler
{
-private:
- /** Constructor: create a pool of threads. */
- CPPScheduler();
-
public:
- /** Force the re-creation of the pool of threads to use the specified number of threads.
+ /** Sets the number of threads the scheduler will use to run the kernels.
*
- * @param[in] num_threads If set to 0, then std::thread::hardware_concurrency() threads will be used, otherwise the number of threads specified.
+ * @param[in] num_threads If set to 0, then the maximum number of threads supported by C++11 will be used, otherwise the number of threads specified.
*/
- void force_number_of_threads(int num_threads);
+ void set_num_threads(unsigned int num_threads) override;
/** Returns the number of threads that the CPPScheduler has in his pool.
*
* @return Number of threads available in CPPScheduler.
*/
- int num_threads() const
- {
- return _num_threads;
- }
+ unsigned int num_threads() const override;
/** Access the scheduler singleton
*
* @return The scheduler
@@ -65,12 +58,15 @@
* - The scheduler has been initialized with only one thread.
*
* @param[in] kernel Kernel to execute.
- * @param[in] split_dimension Dimension along which to split the kernel's execution window (By default 1/Y)
+ * @param[in] split_dimension Dimension along which to split the kernel's execution window.
*/
- void multithread(ICPPKernel *kernel, size_t split_dimension = 1);
+ void schedule(ICPPKernel *kernel, unsigned int split_dimension) override;
private:
- int _num_threads;
+ /** Constructor: create a pool of threads. */
+ CPPScheduler();
+
+ unsigned int _num_threads;
std::unique_ptr<Thread[], void (*)(Thread *)> _threads;
};
}
diff --git a/arm_compute/runtime/IFunction.h b/arm_compute/runtime/IFunction.h
index 0cd21b9..a4e7ed1 100644
--- a/arm_compute/runtime/IFunction.h
+++ b/arm_compute/runtime/IFunction.h
@@ -36,7 +36,7 @@
* - Multi-threading is used for the kernels which are parallelisable.
* - By default std::thread::hardware_concurrency() threads are used.
*
- * @note @ref CPPScheduler::force_number_of_threads() can be used to manually set the number of threads
+ * @note @ref CPPScheduler::set_num_threads() can be used to manually set the number of threads
*
* For OpenCL kernels:
* - All the kernels are enqueued on the queue associated with CLScheduler.
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
new file mode 100644
index 0000000..39c027c
--- /dev/null
+++ b/arm_compute/runtime/IScheduler.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ISCHEDULER_H__
+#define __ARM_COMPUTE_ISCHEDULER_H__
+
+namespace arm_compute
+{
+class ICPPKernel;
+
+/** Scheduler interface to run kernels */
+class IScheduler
+{
+public:
+ /** Destructor. */
+ virtual ~IScheduler() = default;
+ /** Sets the number of threads the scheduler will use to run the kernels.
+ *
+ * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
+ */
+ virtual void set_num_threads(unsigned int num_threads) = 0;
+ /** Returns the number of threads that the SingleThreadScheduler has in his pool.
+ *
+ * @return Number of threads available in SingleThreadScheduler.
+ */
+ virtual unsigned int num_threads() const = 0;
+ /** Runs the kernel in the same thread as the caller synchronously.
+ *
+ * @param[in] kernel Kernel to execute.
+ * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ */
+ virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0;
+};
+}
+#endif /* __ARM_COMPUTE_ISCHEDULER_H__ */
diff --git a/arm_compute/runtime/MultiHOG.h b/arm_compute/runtime/MultiHOG.h
index 486ae14..32bad70 100644
--- a/arm_compute/runtime/MultiHOG.h
+++ b/arm_compute/runtime/MultiHOG.h
@@ -29,6 +29,8 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/HOG.h"
+#include <memory>
+
namespace arm_compute
{
/** CPU implementation of multi HOG data-object */
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index ef17599..daf76f3 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -30,6 +30,7 @@
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h"
#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
@@ -41,9 +42,11 @@
#include "arm_compute/runtime/NEON/functions/NEColorConvert.h"
#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
#include "arm_compute/runtime/NEON/functions/NEDilate.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h"
#include "arm_compute/runtime/NEON/functions/NEErode.h"
#include "arm_compute/runtime/NEON/functions/NEFastCorners.h"
@@ -65,6 +68,7 @@
#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h"
#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h"
+#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h"
#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h"
diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h
index c65d6b7..94c82b2 100644
--- a/arm_compute/runtime/NEON/NEScheduler.h
+++ b/arm_compute/runtime/NEON/NEScheduler.h
@@ -24,10 +24,10 @@
#ifndef __ARM_COMPUTE_NESCHEDULER_H__
#define __ARM_COMPUTE_NESCHEDULER_H__
-#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "arm_compute/runtime/Scheduler.h"
namespace arm_compute
{
-using NEScheduler = CPPScheduler;
+using NEScheduler = Scheduler;
}
#endif /*__ARM_COMPUTE_NESCHEDULER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 3fb3e20..35366e1 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -41,7 +41,7 @@
public:
/** Set the input and output tensor.
*
- * @param[in] input Source tensor. Data type supported: F32.
+ * @param[in] input Source tensor. Data type supported: QS8/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] activation_info Activation layer parameters.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 8f66a6d..8e34e98 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -35,11 +35,11 @@
class NEArithmeticAddition : public INESimpleFunction
{
public:
- /** Initialise the kernel's inputs, output and convertion policy.
+ /** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8 or S16.
- * @param[in] input2 Second tensor input. Data types supported: U8 or S16.
- * @param[out] output Output tensor. Data types supported: U8 or S16.
+ * @param[in] input1 First tensor input. Data types supported: U8/S16.
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16.
+ * @param[out] output Output tensor. Data types supported: U8/S16.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index d0eaff7..841b591 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -35,11 +35,11 @@
class NEArithmeticSubtraction : public INESimpleFunction
{
public:
- /** Initialise the kernel's inputs, output and convertion policy.
+ /** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8 or S16.
- * @param[in] input2 Second tensor input. Data types supported: U8 or S16.
- * @param[out] output Output tensor. Data types supported: U8 or S16.
+ * @param[in] input1 First tensor input. Data types supported: U8/S16.
+ * @param[in] input2 Second tensor input. Data types supported: U8/S16.
+ * @param[out] output Output tensor. Data types supported: U8/S16.
* @param[in] policy Policy to use to handle overflow.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
new file mode 100644
index 0000000..b0b5c12
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__
+
+#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer.
+ *
+ * Batch normalization is calculated by:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ */
+class NEBatchNormalizationLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ NEBatchNormalizationLayer();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: QS8/F32.
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] epsilon Small value to avoid division with zero.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ */
+ void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */
+};
+}
+#endif /* __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h
index 5c80977..1704d9f 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolution.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolution.h
@@ -49,7 +49,7 @@
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
+ * @param[out] output Destination tensor, Data types supported: U8/S16.
* @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
* @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
* @param[in] border_mode Strategy to use for borders.
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index a6862ca..a8fff8d 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -27,12 +27,12 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvolutionLayerWeightsReshapeKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
+#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/Tensor.h"
@@ -40,11 +40,38 @@
{
class ITensor;
-/** Basic function to simulate a convolution layer. This function calls the following OpenCL kernels:
- * -# @ref NEConvolutionLayerWeightsReshapeKernel (executed only once for each configuration)
- * -# @ref NEGEMMTranspose1xWKernel (executed only once for each configuration)
+/** Function to reshape and perform 1xW transposition on the weights. This function calls the following kernels:
+ * -# @ref NEWeightsReshapeKernel
+ * -# @ref NEGEMMTranspose1xWKernel (executed in case GEMM is required for the operation)
+ */
+class NEConvolutionLayerReshapeWeights : public IFunction
+{
+public:
+ /** Constructor */
+ NEConvolutionLayerReshapeWeights();
+ /** Set the input and output tensors.
+ *
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[out] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
+ * Data types supported: Same as @p weights.
+ */
+ void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW);
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ NEWeightsReshapeKernel _weights_reshape_kernel;
+ NEGEMMTranspose1xWKernel _weights_transposed_kernel;
+ Tensor _weights_reshaped;
+ bool _transpose1xW;
+};
+
+/** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
+ * -# @ref NEWeightsReshapeKernel (executed only once for each configuration)
* -# @ref NEIm2ColKernel
- * -# @ref NEGEMMInterleave4x4Kernel
+ * -# @ref NEGEMMInterleave4x4Kernel (executed only in case GEMM is required for the operation)
* -# @ref NEGEMMMatrixMultiplyKernel
* -# @ref NECol2ImKernel
*/
@@ -55,34 +82,34 @@
NEConvolutionLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QS8/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
-
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
// Inherited methods overridden:
void run() override;
private:
- NEIm2ColKernel _input_im2col_kernel;
- NEGEMMInterleave4x4Kernel _input_interleave_kernel;
- NEConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel;
- NEGEMMTranspose1xWKernel _weights_transposed_kernel;
- NEGEMMMatrixMultiplyKernel _mm_kernel;
- NECol2ImKernel _output_col2im_kernel;
- Tensor _input_im2col_reshaped;
- Tensor _input_interleaved_reshaped;
- Tensor _weights_reshaped;
- Tensor _weights_transposed;
- Tensor _gemm_output;
- bool _is_first_run;
- bool _has_bias;
+ NEIm2ColKernel _input_im2col_kernel;
+ NEGEMMInterleave4x4Kernel _input_interleave_kernel;
+ NEConvolutionLayerReshapeWeights _reshape_weights;
+ NEGEMMMatrixMultiplyKernel _mm_kernel;
+ NECol2ImKernel _output_col2im_kernel;
+ Tensor _input_im2col_reshaped;
+ Tensor _input_interleaved_reshaped;
+ Tensor _weights_reshaped;
+ Tensor _gemm_output;
+ bool _has_bias;
+ bool _is_fully_connected_convolution;
+ bool _are_weights_reshaped;
};
}
#endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h
new file mode 100644
index 0000000..02ff122
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ITensor;
+class NEDepthConcatenateKernel;
+class NEFillBorderKernel;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref NEDepthConcatenateKernel
+ *
+ */
+class NEDepthConcatenate : public IFunction
+{
+public:
+ /** Default constructor */
+ NEDepthConcatenate();
+ /** Initialise the kernel's inputs vector and output.
+ *
+ * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32.
+ * @param[out] output Output tensor. Data types supported: F32.
+ */
+ void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ std::vector<ITensor *> _inputs_vector;
+ std::unique_ptr<NEDepthConcatenateKernel[]> _concat_kernels_vector;
+ std::unique_ptr<NEFillBorderKernel[]> _border_handlers_vector;
+ unsigned int _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h
index 21ccca3..7c59ce4 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvert.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvert.h
@@ -48,17 +48,18 @@
* Input format must be different than output format.
*
* Valid conversions Input -> Output :
- * U8 -> U16, S16, U32, S32
- * U16 -> U8, U32, S32
- * S16 -> U8, U32, S32
- * U32 -> U8, U16, S16
- * S32 -> U8, U16, S16
+ * QS8 -> F32
+ * U8 -> U16, S16, S32
+ * U16 -> U8, U32
+ * S16 -> U8, S32
+ * F32 -> QS8
*
*
- * @param[in] input The input tensor to convert. Data type supported: U8, U16, S16, U32 or S32.
- * @param[out] output The output tensor. Data type supported: U8, U16, S16, U32 or S32.
+ * @param[in] input The input tensor to convert. Data type supported: QS8/U8/U16/S16/F32.
+ * @param[out] output The output tensor. Data type supported: QS8/U8/U16/S16/U32/S32/F32.
* @param[in] policy Conversion policy.
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
+ * It is not used on fixed point conversion.
*/
void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift);
};
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
new file mode 100644
index 0000000..a356cac
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
+
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+/** Function to run the direct convolution.
+ *
+ * This function calls the following NEON kernels:
+ *
+ * -# @ref NEFillBorderKernel for the input
+ * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ * -# @ref NEDirectConvolutionLayerKernel
+ */
+class NEDirectConvolutionLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NEDirectConvolutionLayer();
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in, out] input Input tensor. Data types supported: QS8/F32.
+ * @param[in] weights Set of kernels to convolve the input volume.
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] bias Set of biases. Data type supported: Same as @p input.
+ * @param[out] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel;
+ NEDirectConvolutionLayerKernel _conv_kernel;
+ NEFillBorderKernel _input_border_handler;
+ Tensor _accumulator;
+};
+}
+#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index c69c285..b6b7e77 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -41,7 +41,7 @@
*
* @note This function fills the borders within the XY-planes.
*
- * @param[in, out] input Source tensor. Data type supported: U8, S16, S32, F32
+ * @param[in, out] input Source tensor. Data type supported: U8/QS8/S16/S32/F32
* @param[in] border_width Width of the tensor border in pixels.
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 69e27b8..33ec4ef 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -36,10 +36,41 @@
namespace arm_compute
{
+/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
+ *
+ * -# @ref NETransposeKernel (if @p transpose_weights is set to true)
+ * -# @ref NEGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class NEFullyConnectedLayerReshapeWeights : public IFunction
+{
+public:
+ /** Constructor */
+ NEFullyConnectedLayerReshapeWeights();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
+ * @param[in] is_batched_fc_layer True if it is a batched fully connected layer
+ */
+ void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ NETransposeKernel _transpose_kernel;
+ NEGEMMTranspose1xWKernel _transpose1xW_kernel;
+ Tensor _transpose_output;
+ bool _transpose_weights;
+ bool _is_batched_fc_layer;
+};
+
/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
- * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
- * -# @ref NETransposeKernel (if @p transpose_weights flag is set to true) (called once)
- * -# @ref NEGEMMTranspose1xWKernel (called once if we have a multi-batch input)
+ * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped flag is set to false) (called once)
* -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input)
* -# @ref NEGEMMMatrixMultiplyKernel
* -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
@@ -53,13 +84,14 @@
NEFullyConnectedLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input.
- * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true.
+ * @param[in] input Source tensor. Data type supported: QS8/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] transpose_weights (Optional) Transpose the weights tensor if true. Defaults to true.
+ * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true);
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
//Inherited methods override
void run() override;
@@ -70,21 +102,18 @@
void configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output);
void configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output);
- NEIm2ColKernel _im2col_kernel;
- NETransposeKernel _transpose_kernel;
- NEGEMMTranspose1xWKernel _transpose1xW_kernel;
- NEGEMMInterleave4x4Kernel _interleave4x4_kernel;
- NEGEMMMatrixMultiplyKernel _mm_kernel;
- NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
- Tensor _im2col_output;
- Tensor _interleave4x4_output;
- Tensor _transpose_output;
- Tensor _transpose1xW_output;
- bool _is_first_run;
- bool _transpose_weights;
- bool _fc_after_conv;
- bool _batched_fc_layer;
- bool _accumulate_biases;
+ NEIm2ColKernel _im2col_kernel;
+ NEFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+ NEGEMMInterleave4x4Kernel _interleave4x4_kernel;
+ NEGEMMMatrixMultiplyKernel _mm_kernel;
+ NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ Tensor _im2col_output;
+ Tensor _interleave4x4_output;
+ Tensor _reshape_weights_output;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _is_batched_fc_layer;
+ bool _accumulate_biases;
};
}
#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index b9346e7..a40aa91 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -50,9 +50,9 @@
/** Initialise the kernel's inputs, output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
- * @note GEMM: The tensors a, b, c, d must have the same data type. All are either F32 or F16. You should not mix data types when calling this function.
+ * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
- * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: F32, F16.
+ * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
* @param[out] d Output tensor. Data type supported: same as @p a
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
index 71fefbf..b911fd0 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
@@ -40,7 +40,7 @@
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] input First input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 69096fb..447b8c9 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -38,7 +38,7 @@
public:
/** Initialise the kernel's inputs, output
*
- * @param[in] input First input tensor. Data type supported: F32, F16, U8.
+ * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/
* @param[out] output Output tensor. Data type supported: same as @p input
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
index 7487f66..699e42e 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
@@ -64,7 +64,7 @@
protected:
NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */
NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */
- Tensor _tmp; /** temporary buffer for output of horizontal pass */
+ Tensor _tmp; /**< temporary buffer for output of horizontal pass */
NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */
};
}
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
index 46ab72c..98b8a89 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
@@ -40,6 +40,8 @@
public:
/** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
*
+ * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
+ *
* @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32
* @param[in] hog HOG data-object that describes the HOG descriptor
* @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
index 9440ee0..2d07e64 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
@@ -24,10 +24,10 @@
#ifndef __ARM_COMPUTE_NEHOGMULTIDETECTION_H__
#define __ARM_COMPUTE_NEHOGMULTIDETECTION_H__
+#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/IMultiHOG.h"
#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGNonMaximaSuppressionKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
@@ -41,7 +41,7 @@
* -# @ref NEHOGOrientationBinningKernel
* -# @ref NEHOGBlockNormalizationKernel
* -# @ref NEHOGDetector
- * -# @ref NEHOGNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
+ * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
*
* @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
* -# Phase type
@@ -85,20 +85,20 @@
void run() override;
private:
- NEHOGGradient _gradient_kernel;
- std::unique_ptr<NEHOGOrientationBinningKernel[]> _orient_bin_kernel;
- std::unique_ptr<NEHOGBlockNormalizationKernel[]> _block_norm_kernel;
- std::unique_ptr<NEHOGDetector[]> _hog_detect_kernel;
- std::unique_ptr<NEHOGNonMaximaSuppressionKernel> _non_maxima_kernel;
- std::unique_ptr<Tensor[]> _hog_space;
- std::unique_ptr<Tensor[]> _hog_norm_space;
- IDetectionWindowArray *_detection_windows;
- Tensor _mag;
- Tensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
+ NEHOGGradient _gradient_kernel;
+ std::unique_ptr<NEHOGOrientationBinningKernel[]> _orient_bin_kernel;
+ std::unique_ptr<NEHOGBlockNormalizationKernel[]> _block_norm_kernel;
+ std::unique_ptr<NEHOGDetector[]> _hog_detect_kernel;
+ std::unique_ptr<CPPDetectionWindowNonMaximaSuppressionKernel> _non_maxima_kernel;
+ std::unique_ptr<Tensor[]> _hog_space;
+ std::unique_ptr<Tensor[]> _hog_norm_space;
+ IDetectionWindowArray *_detection_windows;
+ Tensor _mag;
+ Tensor _phase;
+ bool _non_maxima_suppression;
+ size_t _num_orient_bin_kernel;
+ size_t _num_block_norm_kernel;
+ size_t _num_hog_detect_kernel;
};
}
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
new file mode 100644
index 0000000..1b2b2ee
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
+#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
+#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
+#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+class INETensor;
+
+/** Basic function to compute the locally connected layer. This function calls the following NEON kernels:
+ *
+ * -# @ref NEWeightsReshapeKernel (executed only once for each configuration)
+ * -# @ref NEIm2ColKernel
+ * -# @ref NELocallyConnectedMatrixMultiplyKernel
+ * -# @ref NECol2ImKernel
+ */
+class NELocallyConnectedLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ NELocallyConnectedLayer();
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ NEIm2ColKernel _input_im2col_kernel;
+ NEWeightsReshapeKernel _weights_reshape_kernel;
+ NELocallyConnectedMatrixMultiplyKernel _mm_kernel;
+ NECol2ImKernel _output_col2im_kernel;
+ Tensor _input_im2col_reshaped;
+ Tensor _weights_reshaped;
+ Tensor _gemm_output;
+ bool _is_first_run;
+};
+}
+#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
index e60349a..82e75ee 100644
--- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
+++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
@@ -48,7 +48,7 @@
NEMinMaxLocation();
/** Initialise the kernel's inputs and outputs.
*
- * @param[in] input Input image. Data types supported: U8 or S16.
+ * @param[in] input Input image. Data types supported: U8/S16.
* @param[out] min Minimum value of image.
* @param[out] max Maximum value of image.
* @param[out] min_loc (Optional) Array of minimum value locations.
diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
index 06e4f08..c87d722 100644
--- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
@@ -45,7 +45,7 @@
* @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
* The constant values used with CONSTANT border mode is 0
*
- * @param[in, out] input Source tensor. Data type supported: U8, F32. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input
* @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT
*
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index b7be34d..3202867 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -52,7 +52,7 @@
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data type supported: F32. Number of channels must be 1.
+ * and an optional 4th dimension for batch of inputs. Data type supported: QS8/F32
* @param[out] output Destination with the same dimensions, data type and number of channels of @p input
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 835bd13..de7a797 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -37,9 +37,9 @@
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8 or S16.
- * @param[in] input2 Second tensor input. Data types supported: U8 or S16.
- * @param[out] output Output tensor. Data types supported: U8 or S16.
+ * @param[in] input1 First tensor input. Data types supported: U8/QS8/S16/F32.
+ * @param[in] input2 Second tensor input. Data types supported: U8/QS8/S16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QS8/S16/F32.
* @param[in] scale Scale to apply after multiplication. Must be positive.
* @param[in] overflow_policy Overflow policy.
* @param[in] rounding_policy Rounding policy.
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 5d67830..5a9cffa 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -42,7 +42,7 @@
public:
/** Set the input and output tensors.
*
- * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: F32.
+ * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index c67b667..dc84dec 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -50,7 +50,7 @@
NESoftmaxLayer();
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: QS8/F32.
* @param[out] output Destination tensor. Data types supported: same as @p input.
*/
void configure(ITensor *input, ITensor *output);
@@ -63,7 +63,6 @@
NELogits1DShiftExpSumKernel _shift_exp_sum_kernel;
NELogits1DNormKernel _norm_kernel;
NEFillBorderKernel _fill_border_kernel;
- NEFillBorderKernel _fill_border_kernel_sum;
Tensor _max;
Tensor _sum;
Tensor _tmp;
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
index d2f9d30..b59ffb8 100644
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ b/arm_compute/runtime/NEON/functions/NETableLookup.h
@@ -37,9 +37,9 @@
public:
/** Initialise the kernel's inputs and output
*
- * @param[in] input First tensor input. Data types supported: U8 and S16
+ * @param[in] input First tensor input. Data types supported: U8/S16
* @param[in] lut Input lookup table.
- * @param[out] output Output tensor. Data types supported: U8 and S16.
+ * @param[out] output Output tensor. Data types supported: same as @p input
*/
void configure(const ITensor *input, const ILut *lut, ITensor *output);
};
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 1b88715..4b606e7 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -41,7 +41,7 @@
public:
/** Initialise the kernel's inputs and output
*
- * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h
new file mode 100644
index 0000000..21df6a6
--- /dev/null
+++ b/arm_compute/runtime/OMP/OMPScheduler.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_OMPSCHEDULER_H__
+#define __ARM_COMPUTE_OMPSCHEDULER_H__
+
+#include "arm_compute/runtime/IScheduler.h"
+
+namespace arm_compute
+{
+/** Pool of threads to automatically split a kernel's execution among several threads. */
+class OMPScheduler : public IScheduler
+{
+public:
+ /** Sets the number of threads the scheduler will use to run the kernels.
+ *
+ * @param[in] num_threads If set to 0, then the number returned by omp_get_max_threads() will be used, otherwise the number of threads specified.
+ */
+ void set_num_threads(unsigned int num_threads) override;
+ /** Returns the number of threads that the OMPScheduler has in its pool.
+ *
+ * @return Number of threads available in OMPScheduler.
+ */
+ unsigned int num_threads() const override;
+ /** Access the scheduler singleton
+ *
+ * @return The scheduler
+ */
+ static OMPScheduler &get();
+ /** Multithread the execution of the passed kernel if possible.
+ *
+ * The kernel will run on a single thread if any of these conditions is true:
+ * - ICPPKernel::is_parallelisable() returns false
+ * - The scheduler has been initialized with only one thread.
+ *
+ * @param[in] kernel Kernel to execute.
+ * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ */
+ void schedule(ICPPKernel *kernel, unsigned int split_dimension) override;
+
+private:
+ /** Constructor. */
+ OMPScheduler();
+
+ unsigned int _num_threads;
+};
+}
+#endif /* __ARM_COMPUTE_OMPSCHEDULER_H__ */
diff --git a/arm_compute/runtime/Scheduler.h b/arm_compute/runtime/Scheduler.h
new file mode 100644
index 0000000..21f944b
--- /dev/null
+++ b/arm_compute/runtime/Scheduler.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_SCHEDULER_H__
+#define __ARM_COMPUTE_SCHEDULER_H__
+
+#include "arm_compute/runtime/IScheduler.h"
+#include <memory>
+
+namespace arm_compute
+{
+/** Configurable scheduler which supports multiple multithreading APIs and choosing between different schedulers at runtime. */
+class Scheduler
+{
+public:
+ enum class Type
+ {
+ ST, // Single thread.
+ CPP, // C++11 threads.
+ OMP, // OpenMP.
+ CUSTOM // Provided by the user.
+ };
+ /** Sets the user defined scheduler and makes it the active scheduler.
+ *
+ * @param[in] scheduler A shared pointer to a custom scheduler implemented by the user.
+ */
+ static void set(std::shared_ptr<IScheduler> &scheduler);
+ /** Access the scheduler singleton.
+ *
+ * @return A reference to the scheduler object.
+ */
+ static IScheduler &get();
+ /** Set the active scheduler.
+ *
+ * Only one scheduler can be enabled at any time.
+ *
+ * @param[in] t the type of the scheduler to be enabled.
+ */
+ static void set(Type t);
+ /** Returns the type of the active scheduler.
+ *
+ * @return The current scheduler's type.
+ */
+ static Type get_type();
+ /** Returns true if the given scheduler type is supported. False otherwise.
+ *
+ * @return true if the given scheduler type is supported. False otherwise.
+ */
+ static bool is_available(Type t);
+
+private:
+ static Type _scheduler_type;
+ static std::shared_ptr<IScheduler> _custom_scheduler;
+ Scheduler();
+};
+}
+#endif /* __ARM_COMPUTE_SCHEDULER_H__ */
diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h
new file mode 100644
index 0000000..a6e1def
--- /dev/null
+++ b/arm_compute/runtime/SingleThreadScheduler.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__
+#define __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__
+
+#include "arm_compute/runtime/IScheduler.h"
+
+namespace arm_compute
+{
+/** Pool of threads to automatically split a kernel's execution among several threads. */
+class SingleThreadScheduler : public IScheduler
+{
+public:
+ /** Sets the number of threads the scheduler will use to run the kernels.
+ *
+ * @param[in] num_threads This is ignored for this scheduler as the number of threads is always one.
+ */
+ void set_num_threads(unsigned int num_threads) override;
+ /** Returns the number of threads that the SingleThreadScheduler has, which is always 1.
+ *
+ * @return Number of threads available in SingleThreadScheduler.
+ */
+ unsigned int num_threads() const override;
+ /** Access the scheduler singleton
+ *
+ * @return The scheduler
+ */
+ static SingleThreadScheduler &get();
+ /** Runs the kernel in the same thread as the caller synchronously.
+ *
+ * @param[in] kernel Kernel to execute.
+ * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ */
+ void schedule(ICPPKernel *kernel, unsigned int split_dimension) override;
+
+private:
+ /** Constructor. */
+ SingleThreadScheduler() = default;
+};
+}
+#endif /* __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ */
diff --git a/arm_compute/runtime/SubTensor.h b/arm_compute/runtime/SubTensor.h
new file mode 100644
index 0000000..bdb229d
--- /dev/null
+++ b/arm_compute/runtime/SubTensor.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_SUBTENSOR_H__
+#define __ARM_COMPUTE_SUBTENSOR_H__
+
+#include "arm_compute/core/SubTensorInfo.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensorInfo;
+
+/** Basic implementation of the sub-tensor interface */
+class SubTensor : public ITensor
+{
+public:
+ /** Constructor
+ *
+ * @param[in] parent Parent tensor
+ * @param[in] tensor_shape Shape of the subtensor
+ * @param[in] coords Coordinates of the first subtensor element inside the parent tensor.
+ */
+ SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords);
+ /** Destructor: free the tensor's memory */
+ ~SubTensor() = default;
+ /** Restrict instances of this class to be copy constructed */
+ SubTensor(const SubTensor &) = delete;
+ /** Restrict instances of this class to be copied */
+ SubTensor &operator=(const SubTensor &) = delete;
+ /** Allow instances of this class to be move constructed */
+ SubTensor(SubTensor &&) = default;
+ /** Allow instances of this class to be moved */
+ SubTensor &operator=(SubTensor &&) = default;
+ /** Return the parent tensor of the subtensor
+ *
+ * @return Parent tensor
+ */
+ ITensor *parent();
+
+ // Inherited methods overridden:
+ ITensorInfo *info() const override;
+ ITensorInfo *info() override;
+ uint8_t *buffer() const override;
+
+private:
+ ITensor *_parent;
+ mutable SubTensorInfo _info;
+};
+}
+#endif /*__ARM_COMPUTE_SUBTENSOR_H__ */
diff --git a/arm_compute/runtime/Tensor.h b/arm_compute/runtime/Tensor.h
index e491635..1fe73a2 100644
--- a/arm_compute/runtime/Tensor.h
+++ b/arm_compute/runtime/Tensor.h
@@ -31,7 +31,7 @@
namespace arm_compute
{
-class TensorInfo;
+class ITensorInfo;
/** Basic implementation of the tensor interface */
class Tensor : public ITensor
@@ -52,9 +52,9 @@
TensorAllocator *allocator();
// Inherited methods overridden:
- TensorInfo *info() const override;
- TensorInfo *info() override;
- uint8_t *buffer() const override;
+ ITensorInfo *info() const override;
+ ITensorInfo *info() override;
+ uint8_t *buffer() const override;
private:
mutable TensorAllocator _allocator; /**< Instance of the basic CPU allocator.*/
diff --git a/arm_compute/runtime/Utils.h b/arm_compute/runtime/Utils.h
new file mode 100644
index 0000000..2f037a0
--- /dev/null
+++ b/arm_compute/runtime/Utils.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_RUNTIME_UTILS_H__
+#define __ARM_COMPUTE_RUNTIME_UTILS_H__
+
+#include "arm_compute/runtime/Scheduler.h"
+
+#include <string>
+
+namespace arm_compute
+{
+/** Convert a Scheduler::Type into a string.
+ *
+ * @param[in] t @ref Scheduler::Type to be translated to string.
+ *
+ * @return The string describing the scheduler type.
+ */
+const std::string &string_from_scheduler_type(Scheduler::Type t);
+}
+#endif /* __ARM_COMPUTE_RUNTIME_UTILS_H__ */