Remove F16 VRELU microkernels

F16 VRELU microkernels are redundant because MIN/MAX instructions are fast on
platforms which support FP16 computations

PiperOrigin-RevId: 420173643
diff --git a/BUILD.bazel b/BUILD.bazel
index 75e907a..bdb542e 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4347,8 +4347,6 @@
     "src/f16-vhswish/gen/vhswish-neonfp16arith-x16.c",
     "src/f16-vmulcaddc/gen/c8-minmax-neonfp16arith-2x.c",
     "src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c",
-    "src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c",
-    "src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c",
 ]
 
 PROD_NEONDOT_MICROKERNEL_SRCS = [
@@ -9289,15 +9287,6 @@
 )
 
 xnnpack_benchmark(
-    name = "f16_vrelu_bench",
-    srcs = [
-        "bench/f16-vrelu.cc",
-        "src/xnnpack/AlignedAllocator.h",
-    ] + MICROKERNEL_BENCHMARK_HDRS,
-    deps = MICROKERNEL_BENCHMARK_DEPS,
-)
-
-xnnpack_benchmark(
     name = "f16_f32_vcvt_bench",
     srcs = [
         "bench/f16-f32-vcvt.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f413ff0..a6bb979 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3113,9 +3113,7 @@
   src/f16-vhswish/gen/vhswish-neonfp16arith-x8.c
   src/f16-vhswish/gen/vhswish-neonfp16arith-x16.c
   src/f16-vmulcaddc/gen/c8-minmax-neonfp16arith-2x.c
-  src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c
-  src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c
-  src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c)
+  src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c)
 
 SET(PROD_NEONDOT_MICROKERNEL_SRCS
   src/qc8-gemm/gen/1x8c4-minmax-fp32-neondot.c
@@ -8349,14 +8347,6 @@
   TARGET_INCLUDE_DIRECTORIES(f16-igemm-bench PRIVATE . include src)
   TARGET_LINK_LIBRARIES(f16-igemm-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
 
-  ADD_EXECUTABLE(f16-vrelu-bench bench/f16-vrelu.cc $<TARGET_OBJECTS:all_microkernels>)
-  SET_TARGET_PROPERTIES(f16-vrelu-bench PROPERTIES
-    CXX_STANDARD 11
-    CXX_STANDARD_REQUIRED YES
-    CXX_EXTENSIONS YES)
-  TARGET_INCLUDE_DIRECTORIES(f16-vrelu-bench PRIVATE . include src)
-  TARGET_LINK_LIBRARIES(f16-vrelu-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
-
   ADD_EXECUTABLE(f16-f32-vcvt-bench bench/f16-f32-vcvt.cc $<TARGET_OBJECTS:all_microkernels>)
   SET_TARGET_PROPERTIES(f16-f32-vcvt-bench PROPERTIES
     CXX_STANDARD 11
diff --git a/bench/f16-vrelu.cc b/bench/f16-vrelu.cc
deleted file mode 100644
index 86f7586..0000000
--- a/bench/f16-vrelu.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <algorithm>
-#include <cmath>
-#include <functional>
-#include <random>
-#include <vector>
-
-#include <benchmark/benchmark.h>
-#include <fp16/fp16.h>
-#include "bench/utils.h"
-#include <xnnpack/AlignedAllocator.h>
-#include <xnnpack/common.h>
-#include <xnnpack/params.h>
-#include <xnnpack/vunary.h>
-
-
-static void f16_vrelu(
-  benchmark::State& state,
-  xnn_f16_vrelu_ukernel_function f16_vrelu,
-  benchmark::utils::IsaCheckFunction isa_check = nullptr)
-{
-  if (isa_check && !isa_check(state)) {
-    return;
-  }
-
-  const size_t elements = state.range(0);
-
-  std::random_device random_device;
-  auto rng = std::mt19937(random_device());
-  auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
-  auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
-
-  std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> x(elements);
-  std::generate(x.begin(), x.end(), std::ref(f16rng));
-  std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> y(elements);
-  std::generate(x.begin(), x.end(), std::ref(f16rng));
-
-  for (auto _ : state) {
-    f16_vrelu(elements * sizeof(uint16_t), x.data(), y.data(), NULL);
-  }
-
-  const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
-  if (cpu_frequency != 0) {
-    state.counters["cpufreq"] = cpu_frequency;
-  }
-
-  const size_t elements_per_iteration = elements;
-  state.counters["elements"] =
-    benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
-
-  const size_t bytes_per_iteration = 2 * elements * sizeof(uint16_t);
-  state.counters["bytes"] =
-    benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
-}
-
-#if XNN_ARCH_ARM64
-  BENCHMARK_CAPTURE(f16_vrelu, neonfp16arith_x8, xnn_f16_vrelu_ukernel__neonfp16arith_x8, benchmark::utils::CheckNEONFP16ARITH)
-    ->RangeMultiplier(10)
-    ->Range(1000, 100000000)
-    ->UseRealTime();
-  BENCHMARK_CAPTURE(f16_vrelu, neonfp16arith_x16, xnn_f16_vrelu_ukernel__neonfp16arith_x16, benchmark::utils::CheckNEONFP16ARITH)
-    ->RangeMultiplier(10)
-    ->Range(1000, 100000000)
-    ->UseRealTime();
-#endif  // XNN_ARCH_ARM64
-
-
-#ifndef XNNPACK_BENCHMARK_NO_MAIN
-BENCHMARK_MAIN();
-#endif
diff --git a/scripts/generate-f16-vrelu.sh b/scripts/generate-f16-vrelu.sh
deleted file mode 100755
index fc763a9..0000000
--- a/scripts/generate-f16-vrelu.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-################################### ARM NEON ##################################
-tools/xngen src/f16-vrelu/neonfp16arith.c.in -D BATCH_TILE=8  -o src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c &
-tools/xngen src/f16-vrelu/neonfp16arith.c.in -D BATCH_TILE=16 -o src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c &
-
-################################## Unit tests #################################
-tools/generate-vunary-test.py --spec test/f16-vrelu.yaml --output test/f16-vrelu.cc &
-
-wait
diff --git a/src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c b/src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c
deleted file mode 100644
index 79be38d..0000000
--- a/src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// Auto-generated file. Do not edit!
-//   Template: src/f16-vrelu/neonfp16arith.c.in
-//   Generator: tools/xngen
-//
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/vunary.h>
-
-
-void xnn_f16_vrelu_ukernel__neonfp16arith_x16(
-    size_t n,
-    const void* restrict x_ptr,
-    void* restrict y_ptr,
-    const union xnn_f16_relu_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
-{
-  assert(n != 0);
-  assert(n % sizeof(__fp16) == 0);
-  assert(x_ptr != NULL);
-  assert(y_ptr != NULL);
-
-  const __fp16* x = (const __fp16*) x_ptr;
-  __fp16* y = (__fp16*) y_ptr;
-
-  const float16x8_t vzero = vmovq_n_f16(0);
-
-  for (; n >= 16 * sizeof(__fp16); n -= 16 * sizeof(__fp16)) {
-    float16x8_t vacc01234567 = vld1q_f16(x); x += 8;
-    float16x8_t vacc89ABCDEF = vld1q_f16(x); x += 8;
-
-    vacc01234567 = vmaxq_f16(vacc01234567, vzero);
-    vacc89ABCDEF = vmaxq_f16(vacc89ABCDEF, vzero);
-
-    vst1q_f16(y, vacc01234567); y += 8;
-    vst1q_f16(y, vacc89ABCDEF); y += 8;
-  }
-  for (; n >= 8 * sizeof(__fp16); n -= 8 * sizeof(__fp16)) {
-    float16x8_t vacc = vld1q_f16(x); x += 8;
-    vacc = vmaxq_f16(vacc, vzero);
-    vst1q_f16(y, vacc); y += 8;
-  }
-  if XNN_UNLIKELY(n != 0) {
-    float16x8_t vacc = vld1q_f16(x);
-    vacc = vmaxq_f16(vacc, vzero);
-
-    float16x4_t vacc_lo = vget_low_f16(vacc);
-    if (n & (4 * sizeof(__fp16))) {
-      vst1_f16(y, vacc_lo); y += 4;
-      vacc_lo = vget_high_f16(vacc);
-    }
-    if (n & (2 * sizeof(__fp16))) {
-      vst1_lane_u32((void*) y, vreinterpret_u32_f16(vacc_lo), 0); y += 2;
-      vacc_lo = vext_f16(vacc_lo, vacc_lo, 2);
-    }
-    if (n & (1 * sizeof(__fp16))) {
-      vst1_lane_f16(y, vacc_lo, 0);
-    }
-  }
-}
diff --git a/src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c b/src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c
deleted file mode 100644
index 106d557..0000000
--- a/src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// Auto-generated file. Do not edit!
-//   Template: src/f16-vrelu/neonfp16arith.c.in
-//   Generator: tools/xngen
-//
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/vunary.h>
-
-
-void xnn_f16_vrelu_ukernel__neonfp16arith_x8(
-    size_t n,
-    const void* restrict x_ptr,
-    void* restrict y_ptr,
-    const union xnn_f16_relu_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
-{
-  assert(n != 0);
-  assert(n % sizeof(__fp16) == 0);
-  assert(x_ptr != NULL);
-  assert(y_ptr != NULL);
-
-  const __fp16* x = (const __fp16*) x_ptr;
-  __fp16* y = (__fp16*) y_ptr;
-
-  const float16x8_t vzero = vmovq_n_f16(0);
-
-  for (; n >= 8 * sizeof(__fp16); n -= 8 * sizeof(__fp16)) {
-    float16x8_t vacc01234567 = vld1q_f16(x); x += 8;
-
-    vacc01234567 = vmaxq_f16(vacc01234567, vzero);
-
-    vst1q_f16(y, vacc01234567); y += 8;
-  }
-  if XNN_UNLIKELY(n != 0) {
-    float16x8_t vacc = vld1q_f16(x);
-    vacc = vmaxq_f16(vacc, vzero);
-
-    float16x4_t vacc_lo = vget_low_f16(vacc);
-    if (n & (4 * sizeof(__fp16))) {
-      vst1_f16(y, vacc_lo); y += 4;
-      vacc_lo = vget_high_f16(vacc);
-    }
-    if (n & (2 * sizeof(__fp16))) {
-      vst1_lane_u32((void*) y, vreinterpret_u32_f16(vacc_lo), 0); y += 2;
-      vacc_lo = vext_f16(vacc_lo, vacc_lo, 2);
-    }
-    if (n & (1 * sizeof(__fp16))) {
-      vst1_lane_f16(y, vacc_lo, 0);
-    }
-  }
-}
diff --git a/src/f16-vrelu/neonfp16arith.c.in b/src/f16-vrelu/neonfp16arith.c.in
deleted file mode 100644
index 8be9477..0000000
--- a/src/f16-vrelu/neonfp16arith.c.in
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-$assert BATCH_TILE % 8 == 0
-$assert BATCH_TILE >= 8
-$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-#include <assert.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/vunary.h>
-
-
-void xnn_f16_vrelu_ukernel__neonfp16arith_x${BATCH_TILE}(
-    size_t n,
-    const void* restrict x_ptr,
-    void* restrict y_ptr,
-    const union xnn_f16_relu_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
-{
-  assert(n != 0);
-  assert(n % sizeof(__fp16) == 0);
-  assert(x_ptr != NULL);
-  assert(y_ptr != NULL);
-
-  const __fp16* x = (const __fp16*) x_ptr;
-  __fp16* y = (__fp16*) y_ptr;
-
-  const float16x8_t vzero = vmovq_n_f16(0);
-
-  for (; n >= ${BATCH_TILE} * sizeof(__fp16); n -= ${BATCH_TILE} * sizeof(__fp16)) {
-    $for N in range(0, BATCH_TILE, 8):
-      float16x8_t vacc${ABC[N:N+8]} = vld1q_f16(x); x += 8;
-
-    $for N in range(0, BATCH_TILE, 8):
-      vacc${ABC[N:N+8]} = vmaxq_f16(vacc${ABC[N:N+8]}, vzero);
-
-    $for N in range(0, BATCH_TILE, 8):
-      vst1q_f16(y, vacc${ABC[N:N+8]}); y += 8;
-  }
-  $if BATCH_TILE > 8:
-    for (; n >= 8 * sizeof(__fp16); n -= 8 * sizeof(__fp16)) {
-      float16x8_t vacc = vld1q_f16(x); x += 8;
-      vacc = vmaxq_f16(vacc, vzero);
-      vst1q_f16(y, vacc); y += 8;
-    }
-  if XNN_UNLIKELY(n != 0) {
-    float16x8_t vacc = vld1q_f16(x);
-    vacc = vmaxq_f16(vacc, vzero);
-
-    float16x4_t vacc_lo = vget_low_f16(vacc);
-    if (n & (4 * sizeof(__fp16))) {
-      vst1_f16(y, vacc_lo); y += 4;
-      vacc_lo = vget_high_f16(vacc);
-    }
-    if (n & (2 * sizeof(__fp16))) {
-      vst1_lane_u32((void*) y, vreinterpret_u32_f16(vacc_lo), 0); y += 2;
-      vacc_lo = vext_f16(vacc_lo, vacc_lo, 2);
-    }
-    if (n & (1 * sizeof(__fp16))) {
-      vst1_lane_f16(y, vacc_lo, 0);
-    }
-  }
-}
diff --git a/test/f16-vrelu.cc b/test/f16-vrelu.cc
deleted file mode 100644
index f45c686..0000000
--- a/test/f16-vrelu.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-//
-// Auto-generated file. Do not edit!
-//   Specification: test/f16-vrelu.yaml
-//   Generator: tools/generate-vunary-test.py
-
-
-#include <gtest/gtest.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/isa-checks.h>
-
-#include <xnnpack/vunary.h>
-#include "vunary-microkernel-tester.h"
-
-
-#if XNN_ARCH_ARM64
-  TEST(F16_VRELU__NEONFP16ARITH_X8, batch_eq_8) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    VUnaryMicrokernelTester()
-      .batch_size(8)
-      .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X8, batch_div_8) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X8, batch_lt_8) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X8, batch_gt_8) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X8, inplace) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .inplace(true)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-#endif  // XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM64
-  TEST(F16_VRELU__NEONFP16ARITH_X16, batch_eq_16) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    VUnaryMicrokernelTester()
-      .batch_size(16)
-      .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X16, batch_div_16) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X16, batch_lt_16) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 1; batch_size < 16; batch_size++) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X16, batch_gt_16) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 17; batch_size < 32; batch_size++) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-
-  TEST(F16_VRELU__NEONFP16ARITH_X16, inplace) {
-    TEST_REQUIRES_ARM_NEON_FP16_ARITH;
-    for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
-      VUnaryMicrokernelTester()
-        .batch_size(batch_size)
-        .inplace(true)
-        .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
-    }
-  }
-#endif  // XNN_ARCH_ARM64
diff --git a/test/f16-vrelu.yaml b/test/f16-vrelu.yaml
deleted file mode 100644
index ad5cc0f..0000000
--- a/test/f16-vrelu.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-- name: xnn_f16_vrelu_ukernel__neonfp16arith_x8
-  arch:
-    - aarch64
-- name: xnn_f16_vrelu_ukernel__neonfp16arith_x16
-  arch:
-    - aarch64
diff --git a/test/vunary-microkernel-tester.h b/test/vunary-microkernel-tester.h
index bc5fc8c..aa55b23 100644
--- a/test/vunary-microkernel-tester.h
+++ b/test/vunary-microkernel-tester.h
@@ -575,58 +575,6 @@
     Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
   }
 
-  void Test(xnn_f16_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
-    std::random_device random_device;
-    auto rng = std::mt19937(random_device());
-    auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
-    auto f32rng = std::bind(distribution, std::ref(rng));
-    auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
-
-    std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
-    std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
-    std::vector<float> y_ref(batch_size());
-    for (size_t iteration = 0; iteration < iterations(); iteration++) {
-      std::generate(x.begin(), x.end(), std::ref(f16rng));
-      if (inplace()) {
-        std::generate(y.begin(), y.end(), std::ref(f16rng));
-      } else {
-        std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
-      }
-      const uint16_t* x_data = inplace() ? y.data() : x.data();
-
-      // Compute reference results.
-      for (size_t i = 0; i < batch_size(); i++) {
-        switch (op_type) {
-          case OpType::ReLU:
-            y_ref[i] = std::max(fp16_ieee_to_fp32_value(x_data[i]), 0.0f);
-            break;
-          default:
-            GTEST_FAIL() << "Unexpected op type";
-        }
-      }
-
-      // Prepare parameters.
-      union {
-        union xnn_f16_minmax_params minmax;
-      } params;
-      switch (op_type) {
-        case OpType::ReLU:
-          break;
-        default:
-          GTEST_FAIL() << "Unexpected op type";
-      }
-
-      // Call optimized micro-kernel.
-      vunary(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
-
-      // Verify results.
-      for (size_t i = 0; i < batch_size(); i++) {
-        ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
-          << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
-      }
-    }
-  }
-
   void Test(xnn_f16_vclamp_ukernel_function vclamp, xnn_init_f16_minmax_params_fn init_params) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());