ROUNDZ evaluation stubs
PiperOrigin-RevId: 311256662
diff --git a/BUILD.bazel b/BUILD.bazel
index 96fc30e..13f52ca 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -236,6 +236,8 @@
"src/math/expminus-scalar-lut64-p2.c",
"src/math/expminus-scalar-p5.c",
"src/math/roundne-scalar-addsub.c",
+ "src/math/roundz-scalar-addsub.c",
+ "src/math/roundz-scalar-cvt.c",
"src/math/sigmoid-scalar-lut2048-p1-div.c",
"src/math/sigmoid-scalar-lut64-p2-div.c",
"src/math/sigmoid-scalar-p5-div.c",
@@ -498,6 +500,7 @@
"src/f32-sigmoid/gen/psimd-p5-div-x20.c",
"src/f32-sigmoid/gen/psimd-p5-div-x24.c",
"src/math/roundne-psimd-addsub.c",
+ "src/math/roundz-psimd-addsub.c",
"src/math/sigmoid-psimd-p5-div.c",
]
@@ -673,6 +676,8 @@
"src/x8-zip/x4-neon.c",
"src/x8-zip/xm-neon.c",
"src/math/roundne-neon-addsub.c",
+ "src/math/roundz-neon-addsub.c",
+ "src/math/roundz-neon-cvt.c",
"src/math/sigmoid-neon-frac-p9-p10-nr1recps.c",
"src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c",
"src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c",
@@ -917,6 +922,7 @@
NEONV8_UKERNELS = [
"src/math/roundne-neonv8.c",
+ "src/math/roundz-neonv8.c",
]
AARCH64_NEONFP16ARITH_UKERNELS = [
@@ -1073,6 +1079,7 @@
"src/f32-vmulcaddc/gen/c8-minmax-sse-2x.c",
"src/x32-packx/x4-sse.c",
"src/math/roundne-sse-addsub.c",
+ "src/math/roundz-sse-addsub.c",
]
SSE2_UKERNELS = [
@@ -1124,6 +1131,7 @@
"src/math/exp-sse2-p5.c",
"src/math/expminus-sse2-p5.c",
"src/math/roundne-sse2-cvt.c",
+ "src/math/roundz-sse2-cvt.c",
"src/math/sigmoid-sse2-p5-div.c",
"src/requantization/precise-sse2.c",
"src/requantization/fp32-sse2.c",
@@ -1147,6 +1155,7 @@
"src/f32-sigmoid/gen/sse41-p5-div-x20.c",
"src/f32-sigmoid/gen/sse41-p5-div-x24.c",
"src/math/roundne-sse41.c",
+ "src/math/roundz-sse41.c",
"src/requantization/precise-sse4.c",
"src/requantization/q31-sse4.c",
"src/requantization/gemmlowp-sse4.c",
@@ -2603,6 +2612,19 @@
deps = MICROKERNEL_TEST_DEPS,
)
+xnnpack_unit_test(
+ name = "f32_roundz_eval",
+ srcs = [
+ "eval/f32-roundz.cc",
+ "src/xnnpack/AlignedAllocator.h",
+ "src/xnnpack/math-stubs.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ tags = [
+ "notap",
+ ],
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
xnnpack_benchmark(
name = "f32_sigmoid_eval",
srcs = [