ROUNDZ evaluation stubs
PiperOrigin-RevId: 311256662
diff --git a/BUILD.bazel b/BUILD.bazel
index 96fc30e..13f52ca 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -236,6 +236,8 @@
"src/math/expminus-scalar-lut64-p2.c",
"src/math/expminus-scalar-p5.c",
"src/math/roundne-scalar-addsub.c",
+ "src/math/roundz-scalar-addsub.c",
+ "src/math/roundz-scalar-cvt.c",
"src/math/sigmoid-scalar-lut2048-p1-div.c",
"src/math/sigmoid-scalar-lut64-p2-div.c",
"src/math/sigmoid-scalar-p5-div.c",
@@ -498,6 +500,7 @@
"src/f32-sigmoid/gen/psimd-p5-div-x20.c",
"src/f32-sigmoid/gen/psimd-p5-div-x24.c",
"src/math/roundne-psimd-addsub.c",
+ "src/math/roundz-psimd-addsub.c",
"src/math/sigmoid-psimd-p5-div.c",
]
@@ -673,6 +676,8 @@
"src/x8-zip/x4-neon.c",
"src/x8-zip/xm-neon.c",
"src/math/roundne-neon-addsub.c",
+ "src/math/roundz-neon-addsub.c",
+ "src/math/roundz-neon-cvt.c",
"src/math/sigmoid-neon-frac-p9-p10-nr1recps.c",
"src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c",
"src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c",
@@ -917,6 +922,7 @@
NEONV8_UKERNELS = [
"src/math/roundne-neonv8.c",
+ "src/math/roundz-neonv8.c",
]
AARCH64_NEONFP16ARITH_UKERNELS = [
@@ -1073,6 +1079,7 @@
"src/f32-vmulcaddc/gen/c8-minmax-sse-2x.c",
"src/x32-packx/x4-sse.c",
"src/math/roundne-sse-addsub.c",
+ "src/math/roundz-sse-addsub.c",
]
SSE2_UKERNELS = [
@@ -1124,6 +1131,7 @@
"src/math/exp-sse2-p5.c",
"src/math/expminus-sse2-p5.c",
"src/math/roundne-sse2-cvt.c",
+ "src/math/roundz-sse2-cvt.c",
"src/math/sigmoid-sse2-p5-div.c",
"src/requantization/precise-sse2.c",
"src/requantization/fp32-sse2.c",
@@ -1147,6 +1155,7 @@
"src/f32-sigmoid/gen/sse41-p5-div-x20.c",
"src/f32-sigmoid/gen/sse41-p5-div-x24.c",
"src/math/roundne-sse41.c",
+ "src/math/roundz-sse41.c",
"src/requantization/precise-sse4.c",
"src/requantization/q31-sse4.c",
"src/requantization/gemmlowp-sse4.c",
@@ -2603,6 +2612,19 @@
deps = MICROKERNEL_TEST_DEPS,
)
+xnnpack_unit_test(
+ name = "f32_roundz_eval",
+ srcs = [
+ "eval/f32-roundz.cc",
+ "src/xnnpack/AlignedAllocator.h",
+ "src/xnnpack/math-stubs.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ tags = [
+ "notap",
+ ],
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
xnnpack_benchmark(
name = "f32_sigmoid_eval",
srcs = [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5aee75c..a54578b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -348,6 +348,8 @@
src/math/expminus-scalar-lut64-p2.c
src/math/expminus-scalar-p5.c
src/math/roundne-scalar-addsub.c
+ src/math/roundz-scalar-addsub.c
+ src/math/roundz-scalar-cvt.c
src/math/sigmoid-scalar-lut2048-p1-div.c
src/math/sigmoid-scalar-lut64-p2-div.c
src/math/sigmoid-scalar-p5-div.c
@@ -504,6 +506,7 @@
src/f32-sigmoid/gen/psimd-p5-div-x20.c
src/f32-sigmoid/gen/psimd-p5-div-x24.c
src/math/roundne-psimd-addsub.c
+ src/math/roundz-psimd-addsub.c
src/math/sigmoid-psimd-p5-div.c)
SET(XNNPACK_NEON_MICROKERNEL_SRCS
@@ -677,6 +680,8 @@
src/x8-zip/x4-neon.c
src/x8-zip/xm-neon.c
src/math/roundne-neon-addsub.c
+ src/math/roundz-neon-addsub.c
+ src/math/roundz-neon-cvt.c
src/math/sigmoid-neon-frac-p9-p10-nr1recps.c
src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c
src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c
@@ -840,7 +845,8 @@
src/math/sigmoid-neonfma-rr2-p5-nr2recps.c)
SET(XNNPACK_NEONV8_MICROKERNEL_SRCS
- src/math/roundne-neonv8.c)
+ src/math/roundne-neonv8.c
+ src/math/roundz-neonv8.c)
SET(XNNPACK_AARCH64_NEONFMA_MICROKERNEL_SRCS
src/f32-vbinary/gen/vdiv-minmax-neon-x4.c
@@ -1071,7 +1077,8 @@
src/f32-vmulcaddc/gen/c4-minmax-sse-2x.c
src/f32-vmulcaddc/gen/c8-minmax-sse-2x.c
src/x32-packx/x4-sse.c
- src/math/roundne-sse-addsub.c)
+ src/math/roundne-sse-addsub.c
+ src/math/roundz-sse-addsub.c)
SET(XNNPACK_SSE2_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-sse2-c4.c
@@ -1122,6 +1129,7 @@
src/math/exp-sse2-p5.c
src/math/expminus-sse2-p5.c
src/math/roundne-sse2-cvt.c
+ src/math/roundz-sse2-cvt.c
src/math/sigmoid-sse2-p5-div.c
src/requantization/precise-sse2.c
src/requantization/fp32-sse2.c
@@ -1143,6 +1151,7 @@
src/f32-sigmoid/gen/sse41-p5-div-x20.c
src/f32-sigmoid/gen/sse41-p5-div-x24.c
src/math/roundne-sse41.c
+ src/math/roundz-sse41.c
src/requantization/precise-sse4.c
src/requantization/q31-sse4.c
src/requantization/gemmlowp-sse4.c)
@@ -2895,6 +2904,14 @@
TARGET_INCLUDE_DIRECTORIES(f32-roundne-eval PRIVATE src)
TARGET_LINK_LIBRARIES(f32-roundne-eval PRIVATE XNNPACK fp16 gtest gtest_main)
+ ADD_EXECUTABLE(f32-roundz-eval eval/f32-roundz.cc)
+ SET_TARGET_PROPERTIES(f32-roundz-eval PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS NO)
+ TARGET_INCLUDE_DIRECTORIES(f32-roundz-eval PRIVATE src)
+ TARGET_LINK_LIBRARIES(f32-roundz-eval PRIVATE XNNPACK fp16 gtest gtest_main)
+
ADD_EXECUTABLE(f32-sigmoid-eval eval/f32-sigmoid.cc)
SET_TARGET_PROPERTIES(f32-sigmoid-eval PROPERTIES
CXX_STANDARD 11
diff --git a/eval/f32-roundz.cc b/eval/f32-roundz.cc
new file mode 100644
index 0000000..dd288b3
--- /dev/null
+++ b/eval/f32-roundz.cc
@@ -0,0 +1,1874 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iomanip>
+#include <ios>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <fp16.h>
+
+#include <xnnpack/AlignedAllocator.h>
+#include <xnnpack/common.h>
+#include <xnnpack/math-stubs.h>
+
+
+constexpr int kBlockSize = 1024;
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ TEST(ROUNDZ__SSE_ADDSUB, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE_ADDSUB, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ TEST(ROUNDZ__SSE2_CVT, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ TEST(ROUNDZ__SSE41, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__SSE41, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__SSE41, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__SSE41, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(ROUNDZ__NEON_ADDSUB, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_ADDSUB, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(ROUNDZ__NEON_CVT, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__NEON_CVT, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__NEON_CVT, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEON_CVT, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(ROUNDZ__NEONV8, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__NEONV8, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__NEONV8, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__NEONV8, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
+ TEST(ROUNDZ__PSIMD_ADDSUB, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+
+ TEST(ROUNDZ__PSIMD_ADDSUB, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__psimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+ }
+#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
+
+TEST(ROUNDZ__SCALAR_ADDSUB, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, positive_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, negative_normal) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, positive_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, negative_integral) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, positive_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0x7F800000));
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+}
+
+TEST(ROUNDZ__SCALAR_CVT, negative_infinity) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ std::fill(inputs.begin(), inputs.end(), UINT32_C(0xFF800000));
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
+}
+
+TEST(ROUNDZ__SCALAR_CVT, positive_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(n + i);
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, negative_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, positive_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, negative_snan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, DISABLED_positive_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
+
+TEST(ROUNDZ__SCALAR_CVT, DISABLED_negative_snan_to_qnan) {
+ std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
+ std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
+ }
+ xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
+ ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
+ << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
+ << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
+ << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
+ }
+ }
+}
diff --git a/src/math/roundz-neon-addsub.c b/src/math/roundz-neon-addsub.c
new file mode 100644
index 0000000..c60ab00
--- /dev/null
+++ b/src/math/roundz-neon-addsub.c
@@ -0,0 +1,66 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+
+#include <arm_neon.h>
+
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__neon_addsub(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ // Addition of this number to a floating-point number x cause rounding of the result to an integer. Then this magic
+ // number is subtracted back from the result to get original x rounded to integer. This trick works only for
+ // 0 <= x < 2**24, but all numbers in 2**23 <= x < 2**24 range are integers, so we can further restrict it to
+ // 0 <= x < 2**23. Then the upper bound of the validity interval is conveniently the same as the magic number.
+ const float32x4_t vmagic_number = vmovq_n_f32(0x1.000000p+23f);
+ // Unit constant to decrement absolute values rounded "wrong way" (i.e. away from zero) in the round-to-nearest-even
+ // operation.
+ const float32x4_t vone = vmovq_n_f32(1.0f);
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const float32x4_t vx = vld1q_f32(input); input += 4;
+
+ // The rounding trick works only for x >= 0, so we compute absolute value of x, round it, and restore the sign in
+ // the end. This method works for round-towards-zero because it is an odd function.
+ const float32x4_t vabsx = vabsq_f32(vx);
+ // Compute bitmask for selection between the value rounded with addition-subtraction trick and the abs(x) value.
+ // We use the result of the addition-subtraction trick only on its validity interval, i.e. 0 <= abs(x) < 2**23.
+ // Note: we do vcaltq_f32(vmagic_number, vx) instead of vcltq_f32(vmagic_number, vabsx) to reduce dependency chain.
+ const uint32x4_t vrndmask = vcaltq_f32(vmagic_number, vx);
+
+ // Addition-subtraction trick with the magic number to cause rounding to the nearest-even integer for abs(x).
+ // Note: the result is valid only for 0 <= abs(x) < 2**23.
+ // Note: addition-subtraction implicitly converts SNaN inputs to QNaNs.
+ const float32x4_t vrndabsx = vsubq_f32(vaddq_f32(vabsx, vmagic_number), vmagic_number);
+ // Extract bitmask for the sign of x.
+ // The bitmask is 0x00000000 when x is positive (including +0) and 0x80000000 when x is negative (including -0).
+ const uint32x4_t vsignx = veorq_u32(vreinterpretq_u32_f32(vabsx), vreinterpretq_u32_f32(vx));
+
+ // Compute adjustment to be subtracted from the rounded-to-nearest-even abs(x) value.
+ // Adjustment is one if the rounded value is greater than the abs(x) value and zero otherwise (including NaN input).
+ const float32x4_t vadjustment =
+ vreinterpretq_f32_u32(vandq_u32(vone, vreinterpretq_u32_f32(vcgtq_f32(vrndabsx, vabsx))));
+ // Adjust abs(x) rounded to nearest-even via the addition-subtraction trick to get abs(x) rounded down.
+ // Note: subtraction implicitly converts SNaN inputs to QNaNs.
+ const float32x4_t vflrabsx = vsubq_f32(vrndabsx, vadjustment);
+
+ // Combine abs(x) rounded down via addition-subtraction trick with adjustment and the input x value.
+ // For 0.0 <= x < 2**23, the result is abs(x) rounded via addition-subtraction trick.
+ // For -2**23 < x <= -0.0, the result is abs(x) rounded via addition-subtraction trick with the sign of x.
+ // For NaN inputs, the result is x converted to QNaN as a side-effect of addition-subtraction and adjustment.
+ // For abs(x) >= 2**23, the result is x itself.
+ const float32x4_t vy = vbslq_f32(vorrq_u32(vrndmask, vsignx), vx, vflrabsx);
+
+ vst1q_f32(output, vy); output += 4;
+ }
+}
diff --git a/src/math/roundz-neon-cvt.c b/src/math/roundz-neon-cvt.c
new file mode 100644
index 0000000..99a7a7c
--- /dev/null
+++ b/src/math/roundz-neon-cvt.c
@@ -0,0 +1,49 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+
+#include <arm_neon.h>
+
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__neon_cvt(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ // Threshold of non-integral values in single-precision floating-point representation.
+ // All inputs above this threshold (by absolute value) are integer numbers.
+ const float32x4_t vintegral_threshold = vmovq_n_f32(0x1.000000p+23f);
+ // Mask for the sign of a single-precision floating-point number.
+ const uint32x4_t vsign_mask = vmovq_n_u32(UINT32_C(0x80000000));
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const float32x4_t vx = vld1q_f32(input); input += 4;
+
+ // Convert floating-point value x to integer, with rounding towards zero, and then back to floating-point.
+ // Note: the result is valid only for abs(x) < 2**31, but we further restrict its use to 2**23.
+ const float32x4_t vrndx = vcvtq_f32_s32(vcvtq_s32_f32(vx));
+ // Extract the sign of the input.
+ // We need the sign to preserve negative zero value, which would otherwise get lost in FP->INT->FP conversion.
+ const uint32x4_t vsignx = vandq_u32(vreinterpretq_u32_f32(vrndx), vsign_mask);
+
+ // Compute bitmask for non-integral input.
+ // The bitmask is set to all ones when x is potentially non-integral, and we round it using FP->INT->FP conversion.
+ const uint32x4_t vrndmask = vcaltq_f32(vx, vintegral_threshold);
+
+ // Combine x rounded towardz zero via FP->INT->FP conversion and the input x value.
+ // For 0.0 <= x < 2**23, the result is x rounded via FP->INT->FP conversion.
+ // For -2**23 < x <= -0.0, the result is abs(x) rounded via FP->INT->FP conversion with the sign of x.
+ // For abs(x) >= 2**23 or NaN inputs, the result is x itself.
+ const float32x4_t vy = vbslq_f32(vbicq_u32(vrndmask, vsignx), vrndx, vx);
+
+ vst1q_f32(output, vy); output += 4;
+ }
+}
diff --git a/src/math/roundz-neonv8.c b/src/math/roundz-neonv8.c
new file mode 100644
index 0000000..92f31d5
--- /dev/null
+++ b/src/math/roundz-neonv8.c
@@ -0,0 +1,28 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+
+#include <arm_neon.h>
+
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__neonv8(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const float32x4_t vx = vld1q_f32(input); input += 4;
+
+ const float32x4_t vy = vrndq_f32(vx);
+
+ vst1q_f32(output, vy); output += 4;
+ }
+}
diff --git a/src/math/roundz-psimd-addsub.c b/src/math/roundz-psimd-addsub.c
new file mode 100644
index 0000000..14312e9
--- /dev/null
+++ b/src/math/roundz-psimd-addsub.c
@@ -0,0 +1,66 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <psimd.h>
+
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__psimd_addsub(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ // Mask for the sign bit of a floating-point number.
+ const psimd_s32 vsign_mask = psimd_splat_s32(INT32_C(0x80000000));
+ // Addition of this number to a floating-point number x cause rounding of the result to an integer. Then this magic
+ // number is subtracted back from the result to get original x rounded to integer. This trick works only for
+ // 0 <= x < 2**24, but all numbers in 2**23 <= x < 2**24 range are integers, so we can further restrict it to
+ // 0 <= x < 2**23. Then the upper bound of the validity interval is conveniently the same as the magic number.
+ const psimd_f32 vmagic_number = psimd_splat_f32(0x1.000000p+23f);
+ // Unit constant to decrement absolute values rounded "wrong way" (i.e. away from zero) in the round-to-nearest-even
+ // operation.
+ const psimd_f32 vone = psimd_splat_f32(1.0f);
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const psimd_f32 vx = psimd_load_f32(input);
+ input += 4;
+
+ // The rounding trick works only for x >= 0, so we compute absolute value of x, round it, and restore the sign in
+ // the end. This method works for round-toward-zero because it is an odd function.
+ const psimd_f32 vabsx = psimd_andnotmask_f32(vsign_mask, vx);
+
+ // Compute bitmask for the bits we want to copy from x. Other bits will be copied from the rounded abs(x).
+ // If abs(x) < 2**23 or x is NaN, we want the sign bit from x and the rest from the rounded abs(x).
+ // Otherwise (abs(x) >= 2**23), we want all bits from x.
+ const psimd_s32 vrndmask = vsign_mask | (vabsx >= vmagic_number);
+ // Addition-subtraction trick with the magic number to cause rounding to integer for abs(x).
+ // Note: the result is valid only for 0 <= abs(x) < 2**23.
+ // Note: addition-subtraction implicitly converts SNaN inputs to QNaNs.
+ const psimd_f32 vrndabsx = psimd_sub_f32(psimd_add_f32(vabsx, vmagic_number), vmagic_number);
+
+ // Compute adjustment to be subtracted from the rounded-to-nearest-even abs(x) value.
+ // Adjustment is one if the rounded value is greater than the abs(x) value and zero otherwise (including NaN input).
+ const psimd_f32 vadjustment = psimd_andmask_f32(vrndabsx > vabsx, vone);
+ // Adjust abs(x) rounded to nearest-even via the addition-subtraction trick to get abs(x) rounded down.
+ // Note: subtraction implicitly converts SNaN inputs to QNaNs.
+ const psimd_f32 vflrabsx = psimd_sub_f32(vrndabsx, vadjustment);
+
+ // Combine abs(x) rounded down via addition-subtraction trick with adjustment and the input x value.
+ // For abs(x) < 2**23, the result is abs(x) rounded via addition-subtraction trick with the sign of x.
+ // For NaN inputs, the result is x converted to QNaN as a side-effect of addition-subtraction and adjustment.
+ // For abs(x) >= 2**23, the result is x itself.
+ const psimd_f32 vy = psimd_blend_f32(vrndmask, vx, vflrabsx);
+
+ psimd_store_f32(output, vy);
+ output += 4;
+ }
+}
diff --git a/src/math/roundz-scalar-addsub.c b/src/math/roundz-scalar-addsub.c
new file mode 100644
index 0000000..f036d0f
--- /dev/null
+++ b/src/math/roundz-scalar-addsub.c
@@ -0,0 +1,55 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <math.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__scalar_addsub(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % sizeof(float) == 0);
+
+ // Addition of this number to a floating-point number x cause rounding of the result to an integer. Then this magic
+ // number is subtracted back from the result to get original x rounded to integer. This trick works only for
+ // 0 <= x < 2**24, but all numbers in 2**23 <= x < 2**24 range are integers, so we can further restrict it to
+ // 0 <= x < 2**23. Then the upper bound of the validity interval is conveniently the same as the magic number.
+ const float vmagic_number = 0x1.000000p+23f;
+ // Unit constant to decrement absolute values rounded "wrong way" (i.e. away from zero) in the round-to-nearest-even
+ // operation.
+ const float vone = 1.0f;
+
+ for (; n != 0; n -= sizeof(float)) {
+ const float vx = *input++;
+
+ // The rounding trick works only for x >= 0, so we compute absolute value of x, round it, and restore the sign in
+ // the end. This method works for round-towards-zero because it is an odd function.
+ const float vabsx = fabsf(vx);
+ // Addition-subtraction trick with the magic number to cause rounding to the nearest-even integer for abs(x).
+ // Note: the result is valid only for 0 <= abs(x) < 2**23.
+ // Note: addition-subtraction implicitly converts SNaN inputs to QNaNs.
+ const float vrndabsx = (vabsx + vmagic_number) - vmagic_number;
+
+ // Adjust abs(x) rounded to nearest-even via the addition-subtraction trick to get abs(x) rounded down.
+ // Note: subtraction implicitly converts SNaN inputs to QNaNs.
+ const float vflrabsx = XNN_UNPREDICTABLE(vrndabsx <= vabsx) ? vrndabsx : vrndabsx - vone;
+
+ // Select between the abs(x) rounded down using addition-subtraction trick with adjustment and the abs(x) value.
+ // For abs(x) < 2**23, the result is abs(x) rounded via addition-subtraction trick.
+ // For abs(x) >= 2**23, the result is abs(x) itself (already an integer).
+ // For NaN inputs, the result is abs(x) converted to QNaN as a side-effect of addition-subtraction.
+ const float vabsy = XNN_UNPREDICTABLE(vabsx >= vmagic_number) ? vabsx : vflrabsx;
+ // Restore the sign of the rounded value.
+ const float vy = copysignf(vabsy, vx);
+
+ *output++ = vy;
+ }
+}
diff --git a/src/math/roundz-scalar-cvt.c b/src/math/roundz-scalar-cvt.c
new file mode 100644
index 0000000..2694600
--- /dev/null
+++ b/src/math/roundz-scalar-cvt.c
@@ -0,0 +1,42 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__scalar_cvt(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % sizeof(float) == 0);
+
+ // Threshold of non-integral values in single-precision floating-point representation.
+ // All inputs above this threshold (by absolute value) are integer numbers.
+ const float vintegral_threshold = 0x1.000000p+23f;
+
+ for (; n != 0; n -= sizeof(float)) {
+ const float vx = *input++;
+
+ // Convert floating-point value x to integer, with rounding towards zero, and then back to floating-point.
+ // Note: the result is valid only for abs(x) < 2**31, but we further restrict its use to 2**23.
+ const float vrndx = (float) (int32_t) vx;
+ // Compute abs(x) to check if the FP->INT->FP conversion result is valid.
+ const float vabsx = fabsf(vx);
+
+ // Select between the x rounded via FP->INT->FP conversion and the original x value.
+ const float vprey = XNN_UNPREDICTABLE(vabsx < vintegral_threshold) ? vrndx : vx;
+ // Restore the sign of -0.0f lost in the FP->INT->FP conversion.
+ const float vy = copysignf(vprey, vx);
+
+ *output++ = vy;
+ }
+}
diff --git a/src/math/roundz-sse-addsub.c b/src/math/roundz-sse-addsub.c
new file mode 100644
index 0000000..af1a0be
--- /dev/null
+++ b/src/math/roundz-sse-addsub.c
@@ -0,0 +1,66 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+
+#include <xmmintrin.h>
+
+#include <xnnpack/math.h>
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__sse_addsub(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ // Mask for all bits of a floating-point number except the sign bit.
+ const __m128 vnonsign_mask = _mm_set1_ps(math_nonsign_mask_f32());
+ // Addition of this number to a floating-point number x cause rounding of the result to an integer. Then this magic
+ // number is subtracted back from the result to get original x rounded to integer. This trick works only for
+ // 0 <= x < 2**24, but all numbers in 2**23 <= x < 2**24 range are integers, so we can further restrict it to
+ // 0 <= x < 2**23. Then the upper bound of the validity interval is conveniently the same as the magic number.
+ const __m128 vmagic_number = _mm_set1_ps(0x1.000000p+23f);
+ // Unit constant to decrement absolute values rounded "wrong way" (i.e. away from zero) in the round-to-nearest-even
+ // operation.
+ const __m128 vone = _mm_set1_ps(1.0f);
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const __m128 vx = _mm_load_ps(input);
+ input += 4;
+
+ // The rounding trick works only for x >= 0, so we compute absolute value of x, round it, and restore the sign in
+ // the end. This method works for round-towards-zero because it is an odd function.
+ const __m128 vabsx = _mm_and_ps(vx, vnonsign_mask);
+
+ // Compute bitmask for the bits we want to copy from the rounded abs(x). Other bits will be copied from x.
+ // If abs(x) >= 2**23, we want all bits from x.
+ // If abs(x) < 2**23 or x is NaN, we want all but the sign bit from the rounded abs(x) and the sign bit from x.
+ const __m128 vrndmask = _mm_andnot_ps(_mm_cmpge_ps(vabsx, vmagic_number), vnonsign_mask);
+ // Addition-subtraction trick with the magic number to cause rounding to the nearest-even integer for abs(x).
+ // Note: the result is valid only for 0 <= abs(x) < 2**23.
+ // Note: addition-subtraction implicitly converts SNaN inputs to QNaNs.
+ const __m128 vrndabsx = _mm_sub_ps(_mm_add_ps(vabsx, vmagic_number), vmagic_number);
+
+ // Compute adjustment to be subtracted from the rounded-to-nearest-even abs(x) value.
+ // Adjustment is one if the rounded value is greater than the abs(x) value and zero otherwise (including NaN input).
+ const __m128 vadjustment = _mm_and_ps(vone, _mm_cmpgt_ps(vrndabsx, vabsx));
+ // Adjust abs(x) rounded to nearest-even via the addition-subtraction trick to get abs(x) rounded down.
+ // Note: subtraction implicitly converts SNaN inputs to QNaNs.
+ const __m128 vflrabsx = _mm_sub_ps(vrndabsx, vadjustment);
+
+ // Combine abs(x) rounded down via addition-subtraction trick with adjustment and the input x value.
+ // For abs(x) < 2**23, the result is abs(x) rounded via addition-subtraction trick with the sign of x.
+ // For NaN inputs, the result is x converted to QNaN as a side-effect of addition-subtraction and adjustment.
+ // For abs(x) >= 2**23, the result is x itself.
+ const __m128 vy = _mm_or_ps(_mm_and_ps(vflrabsx, vrndmask), _mm_andnot_ps(vrndmask, vx));
+
+ _mm_store_ps(output, vy);
+ output += 4;
+ }
+}
diff --git a/src/math/roundz-sse2-cvt.c b/src/math/roundz-sse2-cvt.c
new file mode 100644
index 0000000..a4a3bde
--- /dev/null
+++ b/src/math/roundz-sse2-cvt.c
@@ -0,0 +1,54 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+
+#include <emmintrin.h>
+
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__sse2_cvt(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ // This magic number with a bit representation 0x80000000 serves two purposes:
+ // 1. Extract the sign of a floating-point number.
+ // 2. Check if the input to CVTTPS2DQ (_mm_cvttps_epi32) is out-of-range, which results in 0x80000000 output.
+ const __m128 vmagic = _mm_set1_ps(-0.0f);
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const __m128 vx = _mm_load_ps(input);
+ input += 4;
+
+ // Extract the sign of the input.
+ // We need the sign to preserve negative zero value, which would otherwise get lost in FP->INT->FP conversion.
+ const __m128 vsignx = _mm_and_ps(vx, vmagic);
+ // Convert floating-point value x to integer, with rounding towards zero.
+ // If x is beyond [-2**31, 2**31-1] range or x is NaN, the result is -2**31 (0x80000000).
+ const __m128i vintx = _mm_cvttps_epi32(vx);
+
+ // Compute bitmask for out-of-range conversion input.
+ // The bitmask is set to all ones when x is out-of-range for CVTTPS2DQ, and also when x == -2**31. The latter case
+ // is ok, because this x is already an integer, and can be passed to output as is.
+ const __m128 vrndmask = _mm_castsi128_ps(_mm_cmpeq_epi32(vintx, _mm_castps_si128(vmagic)));
+
+ // Convert integer back to floating-point.
+ // We binary OR the result with the sign of x to restore the sign of negative zero.
+ const __m128 vrndx = _mm_or_ps(_mm_cvtepi32_ps(vintx), vsignx);
+
+ // Combine x rounded via conversion to integer and the initial x value.
+ // For -2**31 < x < 2**31, the result is x rounded via conversion to integer.
+ // Otherwise (including NaN inputs), the result is x itself.
+ const __m128 vy = _mm_or_ps(_mm_and_ps(vx, vrndmask), _mm_andnot_ps(vrndmask, vrndx));
+
+ _mm_store_ps(output, vy);
+ output += 4;
+ }
+}
diff --git a/src/math/roundz-sse41.c b/src/math/roundz-sse41.c
new file mode 100644
index 0000000..78cbefe
--- /dev/null
+++ b/src/math/roundz-sse41.c
@@ -0,0 +1,30 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+
+#include <smmintrin.h>
+
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_f32_roundz__sse41(
+ size_t n,
+ const float* input,
+ float* output)
+{
+ assert(n % (4 * sizeof(float)) == 0);
+
+ for (; n != 0; n -= 4 * sizeof(float)) {
+ const __m128 vx = _mm_load_ps(input);
+ input += 4;
+
+ const __m128 vy = _mm_round_ps(vx, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+
+ _mm_store_ps(output, vy);
+ output += 4;
+ }
+}
diff --git a/src/xnnpack/math-stubs.h b/src/xnnpack/math-stubs.h
index 34773f3..a13dac5 100644
--- a/src/xnnpack/math-stubs.h
+++ b/src/xnnpack/math-stubs.h
@@ -44,6 +44,16 @@
DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundne__psimd_addsub)
DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundne__scalar_addsub)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__neon_addsub)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__neon_cvt)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__neonv8)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__sse_addsub)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__sse2_cvt)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__sse41)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__psimd_addsub)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__scalar_addsub)
+DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_roundz__scalar_cvt)
+
DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_exp__neonfma_lut64_p2)
DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_exp__neonfma_p5)
DECLARE_F32_UNARY_MATH_FUNCTION(xnn_math_f32_exp__sse2_p5)