Remove F16 VRELU microkernels
F16 VRELU microkernels are redundant because MIN/MAX instructions are fast on
platforms which support FP16 computations
PiperOrigin-RevId: 420173643
diff --git a/BUILD.bazel b/BUILD.bazel
index 75e907a..bdb542e 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4347,8 +4347,6 @@
"src/f16-vhswish/gen/vhswish-neonfp16arith-x16.c",
"src/f16-vmulcaddc/gen/c8-minmax-neonfp16arith-2x.c",
"src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c",
- "src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c",
- "src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c",
]
PROD_NEONDOT_MICROKERNEL_SRCS = [
@@ -9289,15 +9287,6 @@
)
xnnpack_benchmark(
- name = "f16_vrelu_bench",
- srcs = [
- "bench/f16-vrelu.cc",
- "src/xnnpack/AlignedAllocator.h",
- ] + MICROKERNEL_BENCHMARK_HDRS,
- deps = MICROKERNEL_BENCHMARK_DEPS,
-)
-
-xnnpack_benchmark(
name = "f16_f32_vcvt_bench",
srcs = [
"bench/f16-f32-vcvt.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f413ff0..a6bb979 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3113,9 +3113,7 @@
src/f16-vhswish/gen/vhswish-neonfp16arith-x8.c
src/f16-vhswish/gen/vhswish-neonfp16arith-x16.c
src/f16-vmulcaddc/gen/c8-minmax-neonfp16arith-2x.c
- src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c
- src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c
- src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c)
+ src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c)
SET(PROD_NEONDOT_MICROKERNEL_SRCS
src/qc8-gemm/gen/1x8c4-minmax-fp32-neondot.c
@@ -8349,14 +8347,6 @@
TARGET_INCLUDE_DIRECTORIES(f16-igemm-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(f16-igemm-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
- ADD_EXECUTABLE(f16-vrelu-bench bench/f16-vrelu.cc $<TARGET_OBJECTS:all_microkernels>)
- SET_TARGET_PROPERTIES(f16-vrelu-bench PROPERTIES
- CXX_STANDARD 11
- CXX_STANDARD_REQUIRED YES
- CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(f16-vrelu-bench PRIVATE . include src)
- TARGET_LINK_LIBRARIES(f16-vrelu-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
-
ADD_EXECUTABLE(f16-f32-vcvt-bench bench/f16-f32-vcvt.cc $<TARGET_OBJECTS:all_microkernels>)
SET_TARGET_PROPERTIES(f16-f32-vcvt-bench PROPERTIES
CXX_STANDARD 11
diff --git a/bench/f16-vrelu.cc b/bench/f16-vrelu.cc
deleted file mode 100644
index 86f7586..0000000
--- a/bench/f16-vrelu.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <algorithm>
-#include <cmath>
-#include <functional>
-#include <random>
-#include <vector>
-
-#include <benchmark/benchmark.h>
-#include <fp16/fp16.h>
-#include "bench/utils.h"
-#include <xnnpack/AlignedAllocator.h>
-#include <xnnpack/common.h>
-#include <xnnpack/params.h>
-#include <xnnpack/vunary.h>
-
-
-static void f16_vrelu(
- benchmark::State& state,
- xnn_f16_vrelu_ukernel_function f16_vrelu,
- benchmark::utils::IsaCheckFunction isa_check = nullptr)
-{
- if (isa_check && !isa_check(state)) {
- return;
- }
-
- const size_t elements = state.range(0);
-
- std::random_device random_device;
- auto rng = std::mt19937(random_device());
- auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
- auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
-
- std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> x(elements);
- std::generate(x.begin(), x.end(), std::ref(f16rng));
- std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> y(elements);
- std::generate(x.begin(), x.end(), std::ref(f16rng));
-
- for (auto _ : state) {
- f16_vrelu(elements * sizeof(uint16_t), x.data(), y.data(), NULL);
- }
-
- const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
- if (cpu_frequency != 0) {
- state.counters["cpufreq"] = cpu_frequency;
- }
-
- const size_t elements_per_iteration = elements;
- state.counters["elements"] =
- benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
-
- const size_t bytes_per_iteration = 2 * elements * sizeof(uint16_t);
- state.counters["bytes"] =
- benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
-}
-
-#if XNN_ARCH_ARM64
- BENCHMARK_CAPTURE(f16_vrelu, neonfp16arith_x8, xnn_f16_vrelu_ukernel__neonfp16arith_x8, benchmark::utils::CheckNEONFP16ARITH)
- ->RangeMultiplier(10)
- ->Range(1000, 100000000)
- ->UseRealTime();
- BENCHMARK_CAPTURE(f16_vrelu, neonfp16arith_x16, xnn_f16_vrelu_ukernel__neonfp16arith_x16, benchmark::utils::CheckNEONFP16ARITH)
- ->RangeMultiplier(10)
- ->Range(1000, 100000000)
- ->UseRealTime();
-#endif // XNN_ARCH_ARM64
-
-
-#ifndef XNNPACK_BENCHMARK_NO_MAIN
-BENCHMARK_MAIN();
-#endif
diff --git a/scripts/generate-f16-vrelu.sh b/scripts/generate-f16-vrelu.sh
deleted file mode 100755
index fc763a9..0000000
--- a/scripts/generate-f16-vrelu.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-################################### ARM NEON ##################################
-tools/xngen src/f16-vrelu/neonfp16arith.c.in -D BATCH_TILE=8 -o src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c &
-tools/xngen src/f16-vrelu/neonfp16arith.c.in -D BATCH_TILE=16 -o src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c &
-
-################################## Unit tests #################################
-tools/generate-vunary-test.py --spec test/f16-vrelu.yaml --output test/f16-vrelu.cc &
-
-wait
diff --git a/src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c b/src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c
deleted file mode 100644
index 79be38d..0000000
--- a/src/f16-vrelu/gen/vrelu-neonfp16arith-x16.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// Auto-generated file. Do not edit!
-// Template: src/f16-vrelu/neonfp16arith.c.in
-// Generator: tools/xngen
-//
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/vunary.h>
-
-
-void xnn_f16_vrelu_ukernel__neonfp16arith_x16(
- size_t n,
- const void* restrict x_ptr,
- void* restrict y_ptr,
- const union xnn_f16_relu_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
-{
- assert(n != 0);
- assert(n % sizeof(__fp16) == 0);
- assert(x_ptr != NULL);
- assert(y_ptr != NULL);
-
- const __fp16* x = (const __fp16*) x_ptr;
- __fp16* y = (__fp16*) y_ptr;
-
- const float16x8_t vzero = vmovq_n_f16(0);
-
- for (; n >= 16 * sizeof(__fp16); n -= 16 * sizeof(__fp16)) {
- float16x8_t vacc01234567 = vld1q_f16(x); x += 8;
- float16x8_t vacc89ABCDEF = vld1q_f16(x); x += 8;
-
- vacc01234567 = vmaxq_f16(vacc01234567, vzero);
- vacc89ABCDEF = vmaxq_f16(vacc89ABCDEF, vzero);
-
- vst1q_f16(y, vacc01234567); y += 8;
- vst1q_f16(y, vacc89ABCDEF); y += 8;
- }
- for (; n >= 8 * sizeof(__fp16); n -= 8 * sizeof(__fp16)) {
- float16x8_t vacc = vld1q_f16(x); x += 8;
- vacc = vmaxq_f16(vacc, vzero);
- vst1q_f16(y, vacc); y += 8;
- }
- if XNN_UNLIKELY(n != 0) {
- float16x8_t vacc = vld1q_f16(x);
- vacc = vmaxq_f16(vacc, vzero);
-
- float16x4_t vacc_lo = vget_low_f16(vacc);
- if (n & (4 * sizeof(__fp16))) {
- vst1_f16(y, vacc_lo); y += 4;
- vacc_lo = vget_high_f16(vacc);
- }
- if (n & (2 * sizeof(__fp16))) {
- vst1_lane_u32((void*) y, vreinterpret_u32_f16(vacc_lo), 0); y += 2;
- vacc_lo = vext_f16(vacc_lo, vacc_lo, 2);
- }
- if (n & (1 * sizeof(__fp16))) {
- vst1_lane_f16(y, vacc_lo, 0);
- }
- }
-}
diff --git a/src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c b/src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c
deleted file mode 100644
index 106d557..0000000
--- a/src/f16-vrelu/gen/vrelu-neonfp16arith-x8.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// Auto-generated file. Do not edit!
-// Template: src/f16-vrelu/neonfp16arith.c.in
-// Generator: tools/xngen
-//
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/vunary.h>
-
-
-void xnn_f16_vrelu_ukernel__neonfp16arith_x8(
- size_t n,
- const void* restrict x_ptr,
- void* restrict y_ptr,
- const union xnn_f16_relu_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
-{
- assert(n != 0);
- assert(n % sizeof(__fp16) == 0);
- assert(x_ptr != NULL);
- assert(y_ptr != NULL);
-
- const __fp16* x = (const __fp16*) x_ptr;
- __fp16* y = (__fp16*) y_ptr;
-
- const float16x8_t vzero = vmovq_n_f16(0);
-
- for (; n >= 8 * sizeof(__fp16); n -= 8 * sizeof(__fp16)) {
- float16x8_t vacc01234567 = vld1q_f16(x); x += 8;
-
- vacc01234567 = vmaxq_f16(vacc01234567, vzero);
-
- vst1q_f16(y, vacc01234567); y += 8;
- }
- if XNN_UNLIKELY(n != 0) {
- float16x8_t vacc = vld1q_f16(x);
- vacc = vmaxq_f16(vacc, vzero);
-
- float16x4_t vacc_lo = vget_low_f16(vacc);
- if (n & (4 * sizeof(__fp16))) {
- vst1_f16(y, vacc_lo); y += 4;
- vacc_lo = vget_high_f16(vacc);
- }
- if (n & (2 * sizeof(__fp16))) {
- vst1_lane_u32((void*) y, vreinterpret_u32_f16(vacc_lo), 0); y += 2;
- vacc_lo = vext_f16(vacc_lo, vacc_lo, 2);
- }
- if (n & (1 * sizeof(__fp16))) {
- vst1_lane_f16(y, vacc_lo, 0);
- }
- }
-}
diff --git a/src/f16-vrelu/neonfp16arith.c.in b/src/f16-vrelu/neonfp16arith.c.in
deleted file mode 100644
index 8be9477..0000000
--- a/src/f16-vrelu/neonfp16arith.c.in
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-$assert BATCH_TILE % 8 == 0
-$assert BATCH_TILE >= 8
-$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-#include <assert.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/vunary.h>
-
-
-void xnn_f16_vrelu_ukernel__neonfp16arith_x${BATCH_TILE}(
- size_t n,
- const void* restrict x_ptr,
- void* restrict y_ptr,
- const union xnn_f16_relu_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
-{
- assert(n != 0);
- assert(n % sizeof(__fp16) == 0);
- assert(x_ptr != NULL);
- assert(y_ptr != NULL);
-
- const __fp16* x = (const __fp16*) x_ptr;
- __fp16* y = (__fp16*) y_ptr;
-
- const float16x8_t vzero = vmovq_n_f16(0);
-
- for (; n >= ${BATCH_TILE} * sizeof(__fp16); n -= ${BATCH_TILE} * sizeof(__fp16)) {
- $for N in range(0, BATCH_TILE, 8):
- float16x8_t vacc${ABC[N:N+8]} = vld1q_f16(x); x += 8;
-
- $for N in range(0, BATCH_TILE, 8):
- vacc${ABC[N:N+8]} = vmaxq_f16(vacc${ABC[N:N+8]}, vzero);
-
- $for N in range(0, BATCH_TILE, 8):
- vst1q_f16(y, vacc${ABC[N:N+8]}); y += 8;
- }
- $if BATCH_TILE > 8:
- for (; n >= 8 * sizeof(__fp16); n -= 8 * sizeof(__fp16)) {
- float16x8_t vacc = vld1q_f16(x); x += 8;
- vacc = vmaxq_f16(vacc, vzero);
- vst1q_f16(y, vacc); y += 8;
- }
- if XNN_UNLIKELY(n != 0) {
- float16x8_t vacc = vld1q_f16(x);
- vacc = vmaxq_f16(vacc, vzero);
-
- float16x4_t vacc_lo = vget_low_f16(vacc);
- if (n & (4 * sizeof(__fp16))) {
- vst1_f16(y, vacc_lo); y += 4;
- vacc_lo = vget_high_f16(vacc);
- }
- if (n & (2 * sizeof(__fp16))) {
- vst1_lane_u32((void*) y, vreinterpret_u32_f16(vacc_lo), 0); y += 2;
- vacc_lo = vext_f16(vacc_lo, vacc_lo, 2);
- }
- if (n & (1 * sizeof(__fp16))) {
- vst1_lane_f16(y, vacc_lo, 0);
- }
- }
-}
diff --git a/test/f16-vrelu.cc b/test/f16-vrelu.cc
deleted file mode 100644
index f45c686..0000000
--- a/test/f16-vrelu.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-//
-// Auto-generated file. Do not edit!
-// Specification: test/f16-vrelu.yaml
-// Generator: tools/generate-vunary-test.py
-
-
-#include <gtest/gtest.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/isa-checks.h>
-
-#include <xnnpack/vunary.h>
-#include "vunary-microkernel-tester.h"
-
-
-#if XNN_ARCH_ARM64
- TEST(F16_VRELU__NEONFP16ARITH_X8, batch_eq_8) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- VUnaryMicrokernelTester()
- .batch_size(8)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X8, batch_div_8) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X8, batch_lt_8) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 1; batch_size < 8; batch_size++) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X8, batch_gt_8) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 9; batch_size < 16; batch_size++) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X8, inplace) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .inplace(true)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x8, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-#endif // XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM64
- TEST(F16_VRELU__NEONFP16ARITH_X16, batch_eq_16) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- VUnaryMicrokernelTester()
- .batch_size(16)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X16, batch_div_16) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X16, batch_lt_16) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 1; batch_size < 16; batch_size++) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X16, batch_gt_16) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 17; batch_size < 32; batch_size++) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-
- TEST(F16_VRELU__NEONFP16ARITH_X16, inplace) {
- TEST_REQUIRES_ARM_NEON_FP16_ARITH;
- for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
- VUnaryMicrokernelTester()
- .batch_size(batch_size)
- .inplace(true)
- .Test(xnn_f16_vrelu_ukernel__neonfp16arith_x16, VUnaryMicrokernelTester::OpType::ReLU);
- }
- }
-#endif // XNN_ARCH_ARM64
diff --git a/test/f16-vrelu.yaml b/test/f16-vrelu.yaml
deleted file mode 100644
index ad5cc0f..0000000
--- a/test/f16-vrelu.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-- name: xnn_f16_vrelu_ukernel__neonfp16arith_x8
- arch:
- - aarch64
-- name: xnn_f16_vrelu_ukernel__neonfp16arith_x16
- arch:
- - aarch64
diff --git a/test/vunary-microkernel-tester.h b/test/vunary-microkernel-tester.h
index bc5fc8c..aa55b23 100644
--- a/test/vunary-microkernel-tester.h
+++ b/test/vunary-microkernel-tester.h
@@ -575,58 +575,6 @@
Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
}
- void Test(xnn_f16_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
- std::random_device random_device;
- auto rng = std::mt19937(random_device());
- auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
- auto f32rng = std::bind(distribution, std::ref(rng));
- auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
-
- std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
- std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
- std::vector<float> y_ref(batch_size());
- for (size_t iteration = 0; iteration < iterations(); iteration++) {
- std::generate(x.begin(), x.end(), std::ref(f16rng));
- if (inplace()) {
- std::generate(y.begin(), y.end(), std::ref(f16rng));
- } else {
- std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
- }
- const uint16_t* x_data = inplace() ? y.data() : x.data();
-
- // Compute reference results.
- for (size_t i = 0; i < batch_size(); i++) {
- switch (op_type) {
- case OpType::ReLU:
- y_ref[i] = std::max(fp16_ieee_to_fp32_value(x_data[i]), 0.0f);
- break;
- default:
- GTEST_FAIL() << "Unexpected op type";
- }
- }
-
- // Prepare parameters.
- union {
- union xnn_f16_minmax_params minmax;
- } params;
- switch (op_type) {
- case OpType::ReLU:
- break;
- default:
- GTEST_FAIL() << "Unexpected op type";
- }
-
- // Call optimized micro-kernel.
- vunary(batch_size() * sizeof(uint16_t), x_data, y.data(), ¶ms);
-
- // Verify results.
- for (size_t i = 0; i < batch_size(); i++) {
- ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
- << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
- }
- }
- }
-
void Test(xnn_f16_vclamp_ukernel_function vclamp, xnn_init_f16_minmax_params_fn init_params) const {
std::random_device random_device;
auto rng = std::mt19937(random_device());