arm_compute v19.05
diff --git a/utils/CommonGraphOptions.cpp b/utils/CommonGraphOptions.cpp
index 4247b2d..e2ca98a 100644
--- a/utils/CommonGraphOptions.cpp
+++ b/utils/CommonGraphOptions.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -83,6 +83,7 @@
     os << "Data type : " << common_params.data_type << std::endl;
     os << "Data layout : " << common_params.data_layout << std::endl;
     os << "Tuner enabled? : " << (common_params.enable_tuner ? true_str : false_str) << std::endl;
+    os << "Tuner mode : " << common_params.tuner_mode << std::endl;
     os << "Tuner file : " << common_params.tuner_file << std::endl;
     os << "Fast math enabled? : " << (common_params.fast_math_hint == FastMathHint::Enabled ? true_str : false_str) << std::endl;
     if(!common_params.data_path.empty())
@@ -117,6 +118,7 @@
       data_type(),
       data_layout(),
       enable_tuner(parser.add_option<ToggleOption>("enable-tuner")),
+      tuner_mode(),
       fast_math_hint(parser.add_option<ToggleOption>("fast-math")),
       data_path(parser.add_option<SimpleOption<std::string>>("data")),
       image(parser.add_option<SimpleOption<std::string>>("image")),
@@ -146,9 +148,17 @@
         DataLayout::NCHW,
     };
 
+    const std::set<CLTunerMode> supported_tuner_modes
+    {
+        CLTunerMode::EXHAUSTIVE,
+        CLTunerMode::NORMAL,
+        CLTunerMode::RAPID
+    };
+
     target      = parser.add_option<EnumOption<Target>>("target", supported_targets, Target::NEON);
     data_type   = parser.add_option<EnumOption<DataType>>("type", supported_data_types, DataType::F32);
     data_layout = parser.add_option<EnumOption<DataLayout>>("layout", supported_data_layouts);
+    tuner_mode  = parser.add_option<EnumOption<CLTunerMode>>("tuner-mode", supported_tuner_modes, CLTunerMode::NORMAL);
 
     help->set_help("Show this help message");
     threads->set_help("Number of threads to use");
@@ -156,6 +166,7 @@
     data_type->set_help("Data type to use");
     data_layout->set_help("Data layout to use");
     enable_tuner->set_help("Enable OpenCL dynamic tuner");
+    tuner_mode->set_help("Configures the time taken by the tuner to tune. Slow tuner produces the most performant LWS configuration");
     fast_math_hint->set_help("Enable fast math");
     data_path->set_help("Path where graph parameters reside");
     image->set_help("Input image for the graph");
@@ -181,6 +192,7 @@
         common_params.data_layout = options.data_layout->value();
     }
     common_params.enable_tuner           = options.enable_tuner->is_set() ? options.enable_tuner->value() : false;
+    common_params.tuner_mode             = options.tuner_mode->value();
     common_params.fast_math_hint         = options.fast_math_hint->is_set() ? fast_math_hint_value : FastMathHint::Disabled;
     common_params.data_path              = options.data_path->value();
     common_params.image                  = options.image->value();
diff --git a/utils/CommonGraphOptions.h b/utils/CommonGraphOptions.h
index 921889d..826cca1 100644
--- a/utils/CommonGraphOptions.h
+++ b/utils/CommonGraphOptions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 
 #include "arm_compute/graph/TypeLoader.h"
 #include "arm_compute/graph/TypePrinter.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
 
 namespace arm_compute
 {
@@ -93,6 +94,7 @@
     arm_compute::DataType            data_type{ DataType::F32 };
     arm_compute::DataLayout          data_layout{ DataLayout::NHWC };
     bool                             enable_tuner{ false };
+    arm_compute::CLTunerMode         tuner_mode{ CLTunerMode::NORMAL };
     arm_compute::graph::FastMathHint fast_math_hint{ arm_compute::graph::FastMathHint::Disabled };
     std::string                      data_path{};
     std::string                      image{};
@@ -147,6 +149,7 @@
     EnumOption<arm_compute::DataType>      *data_type;        /**< Graph data type */
     EnumOption<arm_compute::DataLayout>    *data_layout;      /**< Graph data layout */
     ToggleOption                           *enable_tuner;     /**< Enable tuner */
+    SimpleOption<arm_compute::CLTunerMode> *tuner_mode;       /**< Tuner mode */
     ToggleOption                           *fast_math_hint;   /**< Fast math hint */
     SimpleOption<std::string>              *data_path;        /**< Trainable parameters path */
     SimpleOption<std::string>              *image;            /**< Image */
diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp
index ab2c753..6be289a 100644
--- a/utils/GraphUtils.cpp
+++ b/utils/GraphUtils.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,8 +77,8 @@
     });
 }
 
-CaffePreproccessor::CaffePreproccessor(std::array<float, 3> mean, float scale, bool bgr)
-    : _mean(mean), _scale(scale), _bgr(bgr)
+CaffePreproccessor::CaffePreproccessor(std::array<float, 3> mean, bool bgr, float scale)
+    : _mean(mean), _bgr(bgr), _scale(scale)
 {
     if(_bgr)
     {
@@ -153,24 +153,29 @@
 }
 
 template <typename T>
-void NumPyAccessor::access_numpy_tensor(ITensor &tensor)
+void NumPyAccessor::access_numpy_tensor(ITensor &tensor, T tolerance)
 {
     const int num_elements          = tensor.info()->tensor_shape().total_size();
-    int       num_mismatches        = utils::compare_tensor<T>(tensor, _npy_tensor);
+    int       num_mismatches        = utils::compare_tensor<T>(tensor, _npy_tensor, tolerance);
     float     percentage_mismatches = static_cast<float>(num_mismatches) / num_elements;
 
     _output_stream << "Results: " << 100.f - (percentage_mismatches * 100) << " % matches with the provided output[" << _filename << "]." << std::endl;
+    _output_stream << "         " << num_elements - num_mismatches << " out of " << num_elements << " matches with the provided output[" << _filename << "]." << std::endl
+                   << std::endl;
 }
 
 bool NumPyAccessor::access_tensor(ITensor &tensor)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32, DataType::QASYMM8);
     ARM_COMPUTE_ERROR_ON(_npy_tensor.info()->dimension(0) != tensor.info()->dimension(0));
 
     switch(tensor.info()->data_type())
     {
+        case DataType::QASYMM8:
+            access_numpy_tensor<qasymm8_t>(tensor, 0);
+            break;
         case DataType::F32:
-            access_numpy_tensor<float>(tensor);
+            access_numpy_tensor<float>(tensor, 0.0001f);
             break;
         default:
             ARM_COMPUTE_ERROR("NOT SUPPORTED!");
@@ -604,6 +609,7 @@
 {
     switch(tensor.info()->data_type())
     {
+        case DataType::QASYMM8:
         case DataType::U8:
         {
             std::uniform_int_distribution<uint8_t> distribution_u8(_lower.get<uint8_t>(), _upper.get<uint8_t>());
@@ -654,7 +660,7 @@
         }
         case DataType::F16:
         {
-            std::uniform_real_distribution<float> distribution_f16(_lower.get<float>(), _upper.get<float>());
+            std::uniform_real_distribution<float> distribution_f16(_lower.get<half>(), _upper.get<half>());
             fill<half>(tensor, distribution_f16);
             break;
         }
diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h
index 131378e..88221c7 100644
--- a/utils/GraphUtils.h
+++ b/utils/GraphUtils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_GRAPH_UTILS_H__
-#define __ARM_COMPUTE_GRAPH_UTILS_H__
+#ifndef __ARM_COMPUTE_UTILS_GRAPH_UTILS_H__
+#define __ARM_COMPUTE_UTILS_GRAPH_UTILS_H__
 
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Utils.h"
@@ -62,17 +62,17 @@
 public:
     /** Default Constructor
      *
-     * @param mean Mean array in RGB ordering
-     * @param scale Scale value
-     * @param bgr  Boolean specifying if the preprocessing should assume BGR format
+     * @param[in] mean  Mean array in RGB ordering
+     * @param[in] bgr   Boolean specifying if the preprocessing should assume BGR format
+     * @param[in] scale Scale value
      */
-    CaffePreproccessor(std::array<float, 3> mean = std::array<float, 3> { { 0, 0, 0 } }, float scale = 1.f, bool bgr = true);
+    CaffePreproccessor(std::array<float, 3> mean = std::array<float, 3> { { 0, 0, 0 } }, bool bgr = true, float scale = 1.f);
     void preprocess(ITensor &tensor) override;
 
 private:
     std::array<float, 3> _mean;
-    float _scale;
     bool  _bgr;
+    float _scale;
 };
 
 /** TF preproccessor */
@@ -160,7 +160,7 @@
 
 private:
     template <typename T>
-    void access_numpy_tensor(ITensor &tensor);
+    void access_numpy_tensor(ITensor &tensor, T tolerance);
 
     Tensor            _npy_tensor;
     const std::string _filename;
@@ -601,4 +601,4 @@
 } // namespace graph_utils
 } // namespace arm_compute
 
-#endif /* __ARM_COMPUTE_GRAPH_UTILS_H__ */
+#endif /* __ARM_COMPUTE_UTILS_GRAPH_UTILS_H__ */
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index f2cf606..a71e036 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -34,6 +34,7 @@
 #include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTunerTypes.h"
 
 #include <ostream>
 #include <sstream>
@@ -978,7 +979,6 @@
     return str.str();
 }
 
-//FIXME: Check why this doesn't work and the TensorShape and Coordinates overload are needed
 /** Formatted output of the Dimensions type.
  *
  * @param[in] dimensions Type to output.
@@ -1183,6 +1183,46 @@
     return os;
 }
 
+/** Formatted output of the PaddingMode type.
+ *
+ * @param[out] os   Output stream.
+ * @param[in]  mode Type to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const PaddingMode &mode)
+{
+    switch(mode)
+    {
+        case PaddingMode::CONSTANT:
+            os << "CONSTANT";
+            break;
+        case PaddingMode::REFLECT:
+            os << "REFLECT";
+            break;
+        case PaddingMode::SYMMETRIC:
+            os << "SYMMETRIC";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the PaddingMode type.
+ *
+ * @param[in] mode Type to output.
+ *
+ * @return Formatted string.
+ */
+inline std::string to_string(const PaddingMode &mode)
+{
+    std::stringstream str;
+    str << mode;
+    return str.str();
+}
+
 /** Formatted output of the PadStrideInfo type.
  *
  * @param[out] os              Output stream.
@@ -2179,6 +2219,49 @@
     return support::cpp11::to_string(val);
 }
 
+/** Convert a CLTunerMode value to a string
+ *
+ * @param val CLTunerMode value to be converted
+ *
+ * @return String representing the corresponding CLTunerMode.
+ */
+inline std::string to_string(const CLTunerMode val)
+{
+    switch(val)
+    {
+        case CLTunerMode::EXHAUSTIVE:
+        {
+            return std::string("Exhaustive");
+        }
+        case CLTunerMode::NORMAL:
+        {
+            return std::string("Normal");
+        }
+        case CLTunerMode::RAPID:
+        {
+            return std::string("Rapid");
+        }
+        default:
+        {
+            ARM_COMPUTE_ERROR("Invalid tuner mode.");
+            return std::string("UNDEFINED");
+        }
+    }
+}
+/** [Print CLTunerMode type] **/
+/** Formatted output of the CLTunerMode type.
+ *
+ * @param[out] os  Output stream.
+ * @param[in]  val CLTunerMode to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const CLTunerMode &val)
+{
+    os << to_string(val);
+    return os;
+}
+
 } // namespace arm_compute
 
 #endif /* __ARM_COMPUTE_TYPE_PRINTER_H__ */
diff --git a/utils/Utils.h b/utils/Utils.h
index 04ccc3e..afd90a1 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -774,7 +774,7 @@
 template <typename T>
 void init_sgemm_output(T &dst, T &src0, T &src1, arm_compute::DataType dt)
 {
-    dst.allocator()->init(TensorInfo(TensorShape(src1.info()->dimension(0), src0.info()->dimension(1)), 1, dt));
+    dst.allocator()->init(TensorInfo(TensorShape(src1.info()->dimension(0), src0.info()->dimension(1), src0.info()->dimension(2)), 1, dt));
 }
 /** This function returns the amount of memory free reading from /proc/meminfo
  *
@@ -782,15 +782,16 @@
  */
 uint64_t get_mem_free_from_meminfo();
 
-/** Compare to tensor
+/** Compare two tensors
  *
- * @param[in] tensor1 First tensor to be compared.
- * @param[in] tensor2 Second tensor to be compared.
+ * @param[in] tensor1   First tensor to be compared.
+ * @param[in] tensor2   Second tensor to be compared.
+ * @param[in] tolerance Tolerance used for the comparison.
  *
  * @return The number of mismatches
  */
 template <typename T>
-int compare_tensor(ITensor &tensor1, ITensor &tensor2)
+int compare_tensor(ITensor &tensor1, ITensor &tensor2, T tolerance)
 {
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&tensor1, &tensor2);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(&tensor1, &tensor2);
@@ -807,7 +808,7 @@
 
     execute_window_loop(window, [&](const Coordinates & id)
     {
-        if(std::abs(*reinterpret_cast<T *>(itensor1.ptr()) - *reinterpret_cast<T *>(itensor2.ptr())) > 0.0001)
+        if(std::abs(*reinterpret_cast<T *>(itensor1.ptr()) - *reinterpret_cast<T *>(itensor2.ptr())) > tolerance)
         {
             ++num_mismatches;
         }
diff --git a/utils/command_line/CommandLineParser.h b/utils/command_line/CommandLineParser.h
index 06c4bf5..f834af8 100644
--- a/utils/command_line/CommandLineParser.h
+++ b/utils/command_line/CommandLineParser.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -225,7 +225,7 @@
 
     for(const auto &option : _positional_options)
     {
-        //FIXME: Print help string as well
+        // TODO(COMPMID-2079): Print help string as well
         std::cout << option->name() << "\n";
     }
 }