arm_compute v19.02

Change-Id: I853a3ecf38f206da13c1b03640c8adf73c20477c
diff --git a/tests/validation/CPP/NonMaximalSuppression.cpp b/tests/validation/CPP/NonMaximalSuppression.cpp
new file mode 100644
index 0000000..6cd7b52
--- /dev/null
+++ b/tests/validation/CPP/NonMaximalSuppression.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/NonMaxSuppressionFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto max_output_boxes_dataset = framework::dataset::make("MaxOutputBoxes", 1, 10);
+const auto score_threshold_dataset  = framework::dataset::make("ScoreThreshold", { 0.1f, 0.5f, 0.f, 1.f });
+const auto nms_threshold_dataset    = framework::dataset::make("NMSThreshold", { 0.1f, 0.5f, 0.f, 1.f });
+const auto NMSParametersSmall       = datasets::Small2DNonMaxSuppressionShapes() * max_output_boxes_dataset * score_threshold_dataset * nms_threshold_dataset;
+const auto NMSParametersBig         = datasets::Large2DNonMaxSuppressionShapes() * max_output_boxes_dataset * score_threshold_dataset * nms_threshold_dataset;
+
+} // namespace
+
+TEST_SUITE(CPP)
+TEST_SUITE(NMS)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+                                                framework::dataset::make("BoundingBox",{
+                                                                                        TensorInfo(TensorShape(4U, 100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(1U, 4U, 2U), 1, DataType::F32),    // invalid shape
+                                                                                        TensorInfo(TensorShape(4U, 2U), 1, DataType::S32),    // invalid data type
+                                                                                        TensorInfo(TensorShape(4U, 3U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(4U, 66U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(4U, 100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(4U, 100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(4U, 100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(4U, 100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(4U, 100U), 1, DataType::F32),
+                                                                                    }),
+                                                framework::dataset::make("Scores", {
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(37U, 2U, 13U, 27U), 1, DataType::F32), // invalid shape
+                                                                                        TensorInfo(TensorShape(4U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(3U), 1, DataType::U8),  // invalid data type
+                                                                                        TensorInfo(TensorShape(66U), 1, DataType::F32),  // invalid data type
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32),
+                                                                                    })),
+                                                framework::dataset::make("Indices", {
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(4U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(3U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(200U), 1, DataType::S32), // indices bigger than max bbs, OK because max_output is 66
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::F32), // invalid data type
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::S32),
+                                                                                        TensorInfo(TensorShape(100U), 1, DataType::S32),
+
+                                                                                    })),
+                                                framework::dataset::make("max_output", {
+                                                                                        10U, 2U,4U, 3U,66U, 1U,
+                                                                                        0U, /* invalid, must be greater than 0 */
+                                                                                        10000U, /* OK, clamped to indices' size */
+                                                                                        100U,
+                                                                                        10U,
+                                                                                     })),
+                                                framework::dataset::make("score_threshold", {
+                                                                                        0.1f, 0.4f, 0.2f,0.8f,0.3f, 0.01f, 0.5f, 0.45f,
+                                                                                        -1.f, /* invalid value, must be in [0,1] */
+                                                                                        0.5f,
+                                                                                     })),
+                                                framework::dataset::make("nms_threshold", {
+                                                                                        0.3f, 0.7f, 0.1f,0.13f,0.2f, 0.97f, 0.76f, 0.87f, 0.1f,
+                                                                                        10.f, /* invalid value, must be in [0,1]*/
+                                                                                     })),
+                                                framework::dataset::make("Expected", {
+                                                                                        true, false, false, false, true, false, false,true, false, false
+                                                                                     })),
+
+                                            bbox_info, scores_info, indices_info, max_out, score_threshold, nms_threshold, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CPPNonMaximumSuppression::validate(&bbox_info.clone()->set_is_resizable(false),
+                                                               &scores_info.clone()->set_is_resizable(false),
+                                                               &indices_info.clone()->set_is_resizable(false),
+                                max_out,score_threshold,nms_threshold)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+using CPPNonMaxSuppressionFixture = NMSValidationFixture<Tensor, Accessor, CPPNonMaximumSuppression>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPNonMaxSuppressionFixture, framework::DatasetMode::PRECOMMIT, NMSParametersSmall)
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPNonMaxSuppressionFixture, framework::DatasetMode::NIGHTLY, NMSParametersBig)
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END() // CPP
+TEST_SUITE_END() // NMS
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Permute.cpp b/tests/validation/CPP/Permute.cpp
index 0a97041..2ba10ec 100644
--- a/tests/validation/CPP/Permute.cpp
+++ b/tests/validation/CPP/Permute.cpp
@@ -42,10 +42,19 @@
 {
 namespace
 {
-const auto PermuteParametersSmall = combine(concat(concat(datasets::Small2DShapes(), datasets::Small3DShapes()), datasets::Small4DShapes()),
-                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
-const auto PermuteParametersLarge = combine(datasets::Large4DShapes(),
-                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
+const auto PermuteVectors = framework::dataset::make("PermutationVector",
+{
+    PermutationVector(2U, 0U, 1U),
+    PermutationVector(1U, 2U, 0U),
+    PermutationVector(0U, 1U, 2U),
+    PermutationVector(0U, 2U, 1U),
+    PermutationVector(1U, 0U, 2U),
+    PermutationVector(2U, 1U, 0U),
+});
+const auto PermuteInputLayout     = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
+const auto PermuteParametersSmall = concat(concat(datasets::Small2DShapes(), datasets::Small3DShapes()), datasets::Small4DShapes()) * PermuteInputLayout * PermuteVectors;
+const auto PermuteParametersLarge = datasets::Large4DShapes() * PermuteInputLayout * PermuteVectors;
+
 } // namespace
 TEST_SUITE(CPP)
 TEST_SUITE(Permute)
@@ -77,25 +86,32 @@
 using CPPPermuteFixture = PermuteValidationFixture<Tensor, Accessor, CPPPermute, T>;
 
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U8)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       PermuteParametersSmall * framework::dataset::make("DataType", DataType::U8))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U8)))
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       PermuteParametersLarge * framework::dataset::make("DataType", DataType::U8))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
+
 TEST_SUITE_END()
 
 TEST_SUITE(U16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint16_t>, framework::DatasetMode::PRECOMMIT,
+                       PermuteParametersSmall * framework::dataset::make("DataType", DataType::U16))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U16)))
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint16_t>, framework::DatasetMode::NIGHTLY,
+                       PermuteParametersLarge * framework::dataset::make("DataType", DataType::U16))
 {
     // Validate output
     validate(Accessor(_target), _reference);
@@ -103,12 +119,15 @@
 TEST_SUITE_END()
 
 TEST_SUITE(U32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint32_t>, framework::DatasetMode::PRECOMMIT,
+                       PermuteParametersSmall * framework::dataset::make("DataType", DataType::U32))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U32)))
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint32_t>, framework::DatasetMode::NIGHTLY,
+                       PermuteParametersLarge * framework::dataset::make("DataType", DataType::U32))
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/CPP/TopKV.cpp b/tests/validation/CPP/TopKV.cpp
new file mode 100644
index 0000000..0217819
--- /dev/null
+++ b/tests/validation/CPP/TopKV.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPTopKV.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PermuteFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename U, typename T>
+inline void fill_tensor(U &&tensor, const std::vector<T> &v)
+{
+    std::memcpy(tensor.data(), v.data(), sizeof(T) * v.size());
+}
+} // namespace
+
+TEST_SUITE(CPP)
+TEST_SUITE(TopKV)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+        framework::dataset::make("PredictionsInfo", { TensorInfo(TensorShape(20, 10), 1, DataType::F32),
+                                                TensorInfo(TensorShape(10, 20), 1, DataType::F16),  // Mismatching batch_size
+                                                TensorInfo(TensorShape(20, 10), 1, DataType::S8), // Unsupported data type
+                                                TensorInfo(TensorShape(10, 10, 10), 1, DataType::F32), // Wrong predictions dimensions
+                                                TensorInfo(TensorShape(20, 10), 1, DataType::F32)}), // Wrong output dimension
+        framework::dataset::make("TargetsInfo",{ TensorInfo(TensorShape(10), 1, DataType::U32),
+                                                TensorInfo(TensorShape(10), 1, DataType::U32),
+                                                TensorInfo(TensorShape(10), 1, DataType::U32),
+                                                TensorInfo(TensorShape(10), 1, DataType::U32),
+                                                TensorInfo(TensorShape(10), 1, DataType::U32)})),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(10), 1, DataType::U8),
+                                                TensorInfo(TensorShape(10), 1, DataType::U8),
+                                                TensorInfo(TensorShape(10), 1, DataType::U8),
+                                                TensorInfo(TensorShape(10), 1, DataType::U8),
+                                                TensorInfo(TensorShape(1), 1, DataType::U8)})),
+
+        framework::dataset::make("k",{ 0, 1, 2, 3, 4 })),
+        framework::dataset::make("Expected", {true, false, false, false, false })),
+        prediction_info, targets_info, output_info, k, expected)
+{
+    const Status status = CPPTopKV::validate(&prediction_info.clone()->set_is_resizable(false),&targets_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), k);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_CASE(Float, framework::DatasetMode::ALL)
+{
+    const unsigned int k = 5;
+
+    Tensor predictions = create_tensor<Tensor>(TensorShape(10, 20), DataType::F32);
+    Tensor targets     = create_tensor<Tensor>(TensorShape(20), DataType::U32);
+
+    predictions.allocator()->allocate();
+    targets.allocator()->allocate();
+
+    // Fill the tensors with random pre-generated values
+    fill_tensor(Accessor(predictions), std::vector<float>
+    {
+        0.8147, 0.6557, 0.4387, 0.7513, 0.3517, 0.1622, 0.1067, 0.8530, 0.7803, 0.5470,
+        0.9058, 0.0357, 0.3816, 0.2551, 0.8308, 0.7943, 0.9619, 0.6221, 0.3897, 0.2963,
+        0.1270, 0.8491, 0.7655, 0.5060, 0.5853, 0.3112, 0.0046, 0.3510, 0.2417, 0.7447,
+        0.9134, 0.9340, 0.7952, 0.6991, 0.5497, 0.5285, 0.7749, 0.5132, 0.4039, 0.1890,
+        0.6324, 0.6787, 0.1869, 0.8909, 0.9172, 0.1656, 0.8173, 0.4018, 0.0965, 0.6868,
+        0.0975, 0.7577, 0.4898, 0.9593, 0.2858, 0.6020, 0.8687, 0.0760, 0.1320, 0.1835,
+        0.2785, 0.7431, 0.4456, 0.5472, 0.7572, 0.2630, 0.0844, 0.2399, 0.9421, 0.3685,
+        0.5469, 0.3922, 0.6463, 0.1386, 0.7537, 0.6541, 0.3998, 0.1233, 0.9561, 0.6256,
+        0.9575, 0.6555, 0.7094, 0.1493, 0.3804, 0.6892, 0.2599, 0.1839, 0.5752, 0.7802,
+        0.9649, 0.1712, 0.7547, 0.2575, 0.5678, 0.7482, 0.8001, 0.2400, 0.0598, 0.0811,
+        0.1576, 0.7060, 0.2760, 0.8407, 0.0759, 0.4505, 0.4314, 0.4173, 0.2348, 0.9294,
+        0.9706, 0.0318, 0.6797, 0.2543, 0.0540, 0.0838, 0.9106, 0.0497, 0.3532, 0.7757,
+        0.9572, 0.2769, 0.6551, 0.8143, 0.5308, 0.2290, 0.1818, 0.9027, 0.8212, 0.4868,
+        0.4854, 0.0462, 0.1626, 0.2435, 0.7792, 0.9133, 0.2638, 0.9448, 0.0154, 0.4359,
+        0.8003, 0.0971, 0.1190, 0.9293, 0.9340, 0.1524, 0.1455, 0.4909, 0.0430, 0.4468,
+        0.1419, 0.8235, 0.4984, 0.3500, 0.1299, 0.8258, 0.1361, 0.4893, 0.1690, 0.3063,
+        0.4218, 0.6948, 0.9597, 0.1966, 0.5688, 0.5383, 0.8693, 0.3377, 0.6491, 0.5085,
+        0.9157, 0.3171, 0.3404, 0.2511, 0.4694, 0.9961, 0.5797, 0.9001, 0.7317, 0.5108,
+        0.7922, 0.9502, 0.5853, 0.6160, 0.0119, 0.0782, 0.5499, 0.3692, 0.6477, 0.8176,
+        0.9595, 0.0344, 0.2238, 0.4733, 0.3371, 0.4427, 0.1450, 0.1112, 0.4509, 0.7948
+    });
+
+    fill_tensor(Accessor(targets), std::vector<int> { 1, 5, 7, 2, 8, 1, 2, 1, 2, 4, 3, 9, 4, 1, 9, 9, 4, 1, 2, 4 });
+
+    // Determine the output through the CPP kernel
+    Tensor   output;
+    CPPTopKV topkv;
+    topkv.configure(&predictions, &targets, &output, k);
+
+    output.allocator()->allocate();
+
+    // Run the kernel
+    topkv.run();
+
+    // Validate against the expected values
+    SimpleTensor<uint8_t> expected_output(TensorShape(20), DataType::U8);
+    fill_tensor(expected_output, std::vector<uint8_t> { 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0 });
+    validate(Accessor(output), expected_output);
+}
+
+TEST_CASE(Quantized, framework::DatasetMode::ALL)
+{
+    const unsigned int k = 5;
+
+    Tensor predictions = create_tensor<Tensor>(TensorShape(10, 20), DataType::QASYMM8, 1, QuantizationInfo());
+    Tensor targets     = create_tensor<Tensor>(TensorShape(20), DataType::U32);
+
+    predictions.allocator()->allocate();
+    targets.allocator()->allocate();
+
+    // Fill the tensors with random pre-generated values
+    fill_tensor(Accessor(predictions), std::vector<uint8_t>
+    {
+        133, 235, 69, 118, 140, 179, 189, 203, 137, 157,
+        242, 1, 196, 170, 166, 25, 102, 244, 24, 254,
+        164, 119, 49, 198, 140, 135, 175, 84, 29, 136,
+        246, 109, 74, 90, 185, 136, 181, 172, 35, 123,
+        62, 118, 24, 170, 134, 221, 114, 113, 174, 206,
+        174, 198, 148, 107, 255, 125, 6, 214, 127, 59,
+        75, 83, 175, 216, 56, 101, 85, 197, 49, 128,
+        172, 201, 140, 214, 28, 172, 109, 43, 127, 231,
+        178, 121, 109, 66, 29, 190, 70, 221, 38, 148,
+        18, 10, 165, 158, 17, 134, 51, 254, 15, 217,
+        66, 46, 166, 150, 104, 90, 211, 132, 218, 190,
+        58, 185, 174, 139, 115, 39, 111, 227, 144, 151,
+        171, 122, 163, 223, 94, 151, 228, 151, 238, 64,
+        217, 40, 242, 68, 196, 68, 101, 40, 179, 171,
+        89, 88, 54, 82, 161, 12, 197, 52, 150, 22,
+        200, 156, 182, 31, 198, 194, 102, 105, 209, 161,
+        173, 50, 61, 241, 239, 63, 207, 192, 226, 170,
+        2, 190, 31, 166, 250, 114, 194, 212, 254, 187,
+        155, 63, 156, 123, 50, 177, 97, 203, 1, 229,
+        100, 235, 116, 164, 36, 92, 56, 82, 222, 252
+    });
+
+    fill_tensor(Accessor(targets), std::vector<int> { 1, 5, 7, 2, 8, 1, 2, 1, 2, 4, 3, 9, 4, 1, 9, 9, 4, 1, 2, 4 });
+
+    // Determine the output through the CPP kernel
+    Tensor   output;
+    CPPTopKV topkv;
+    topkv.configure(&predictions, &targets, &output, k);
+
+    output.allocator()->allocate();
+
+    // Run the kernel
+    topkv.run();
+
+    // Validate against the expected values
+    SimpleTensor<uint8_t> expected_output(TensorShape(20), DataType::U8);
+    fill_tensor(expected_output, std::vector<uint8_t> { 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0 });
+    validate(Accessor(output), expected_output);
+}
+
+TEST_SUITE_END() // TopKV
+TEST_SUITE_END() // CPP
+} // namespace validation
+} // namespace test
+} // namespace arm_compute