RND microkernels and tests
PiperOrigin-RevId: 315493586
diff --git a/BUILD.bazel b/BUILD.bazel
index 3d7099e..4675784 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -271,6 +271,18 @@
"src/f32-vmulcaddc/gen/c1-minmax-scalar-2x.c",
"src/f32-vmulcaddc/gen/c2-minmax-scalar-2x.c",
"src/f32-vmulcaddc/gen/c4-minmax-scalar-2x.c",
+ "src/f32-vrnd/gen/vrndne-scalar-libm-x1.c",
+ "src/f32-vrnd/gen/vrndne-scalar-libm-x2.c",
+ "src/f32-vrnd/gen/vrndne-scalar-libm-x4.c",
+ "src/f32-vrnd/gen/vrndz-scalar-libm-x1.c",
+ "src/f32-vrnd/gen/vrndz-scalar-libm-x2.c",
+ "src/f32-vrnd/gen/vrndz-scalar-libm-x4.c",
+ "src/f32-vrnd/gen/vrndu-scalar-libm-x1.c",
+ "src/f32-vrnd/gen/vrndu-scalar-libm-x2.c",
+ "src/f32-vrnd/gen/vrndu-scalar-libm-x4.c",
+ "src/f32-vrnd/gen/vrndd-scalar-libm-x1.c",
+ "src/f32-vrnd/gen/vrndd-scalar-libm-x2.c",
+ "src/f32-vrnd/gen/vrndd-scalar-libm-x4.c",
"src/f32-vunary/gen/vabs-scalar-x1.c",
"src/f32-vunary/gen/vabs-scalar-x2.c",
"src/f32-vunary/gen/vabs-scalar-x4.c",
@@ -580,6 +592,14 @@
"src/f32-sigmoid/gen/psimd-p5-div-x16.c",
"src/f32-sigmoid/gen/psimd-p5-div-x20.c",
"src/f32-sigmoid/gen/psimd-p5-div-x24.c",
+ "src/f32-vrnd/gen/vrndne-psimd-x4.c",
+ "src/f32-vrnd/gen/vrndne-psimd-x8.c",
+ "src/f32-vrnd/gen/vrndz-psimd-x4.c",
+ "src/f32-vrnd/gen/vrndz-psimd-x8.c",
+ "src/f32-vrnd/gen/vrndu-psimd-x4.c",
+ "src/f32-vrnd/gen/vrndu-psimd-x8.c",
+ "src/f32-vrnd/gen/vrndd-psimd-x4.c",
+ "src/f32-vrnd/gen/vrndd-psimd-x8.c",
"src/math/roundne-psimd-addsub.c",
"src/math/roundd-psimd-addsub.c",
"src/math/roundu-psimd-addsub.c",
@@ -748,6 +768,14 @@
"src/f32-vbinary/gen/vsubc-minmax-neon-x8.c",
"src/f32-vmulcaddc/gen/c4-minmax-neon-2x.c",
"src/f32-vmulcaddc/gen/c8-minmax-neon-2x.c",
+ "src/f32-vrnd/gen/vrndne-neon-x4.c",
+ "src/f32-vrnd/gen/vrndne-neon-x8.c",
+ "src/f32-vrnd/gen/vrndz-neon-x4.c",
+ "src/f32-vrnd/gen/vrndz-neon-x8.c",
+ "src/f32-vrnd/gen/vrndu-neon-x4.c",
+ "src/f32-vrnd/gen/vrndu-neon-x8.c",
+ "src/f32-vrnd/gen/vrndd-neon-x4.c",
+ "src/f32-vrnd/gen/vrndd-neon-x8.c",
"src/f32-vunary/gen/vabs-neon-x4.c",
"src/f32-vunary/gen/vabs-neon-x8.c",
"src/f32-vunary/gen/vneg-neon-x4.c",
@@ -1035,6 +1063,14 @@
]
NEONV8_UKERNELS = [
+ "src/f32-vrnd/gen/vrndne-neonv8-x4.c",
+ "src/f32-vrnd/gen/vrndne-neonv8-x8.c",
+ "src/f32-vrnd/gen/vrndz-neonv8-x4.c",
+ "src/f32-vrnd/gen/vrndz-neonv8-x8.c",
+ "src/f32-vrnd/gen/vrndu-neonv8-x4.c",
+ "src/f32-vrnd/gen/vrndu-neonv8-x8.c",
+ "src/f32-vrnd/gen/vrndd-neonv8-x4.c",
+ "src/f32-vrnd/gen/vrndd-neonv8-x8.c",
"src/math/roundne-neonv8.c",
"src/math/roundd-neonv8.c",
"src/math/roundu-neonv8.c",
@@ -1256,6 +1292,14 @@
"src/f32-sigmoid/gen/sse2-p5-div-x16.c",
"src/f32-sigmoid/gen/sse2-p5-div-x20.c",
"src/f32-sigmoid/gen/sse2-p5-div-x24.c",
+ "src/f32-vrnd/gen/vrndne-sse2-x4.c",
+ "src/f32-vrnd/gen/vrndne-sse2-x8.c",
+ "src/f32-vrnd/gen/vrndz-sse2-x4.c",
+ "src/f32-vrnd/gen/vrndz-sse2-x8.c",
+ "src/f32-vrnd/gen/vrndu-sse2-x4.c",
+ "src/f32-vrnd/gen/vrndu-sse2-x8.c",
+ "src/f32-vrnd/gen/vrndd-sse2-x4.c",
+ "src/f32-vrnd/gen/vrndd-sse2-x8.c",
"src/q8-avgpool/9p8x-minmax-sse2-c8.c",
"src/q8-avgpool/9x-minmax-sse2-c8.c",
"src/q8-igemm/4x4c2-minmax-sse2.c",
@@ -1305,6 +1349,14 @@
"src/f32-sigmoid/gen/sse41-p5-div-x16.c",
"src/f32-sigmoid/gen/sse41-p5-div-x20.c",
"src/f32-sigmoid/gen/sse41-p5-div-x24.c",
+ "src/f32-vrnd/gen/vrndne-sse41-x4.c",
+ "src/f32-vrnd/gen/vrndne-sse41-x8.c",
+ "src/f32-vrnd/gen/vrndz-sse41-x4.c",
+ "src/f32-vrnd/gen/vrndz-sse41-x8.c",
+ "src/f32-vrnd/gen/vrndu-sse41-x4.c",
+ "src/f32-vrnd/gen/vrndu-sse41-x8.c",
+ "src/f32-vrnd/gen/vrndd-sse41-x4.c",
+ "src/f32-vrnd/gen/vrndd-sse41-x8.c",
"src/math/roundne-sse41.c",
"src/math/roundd-sse41.c",
"src/math/roundu-sse41.c",
@@ -1396,6 +1448,14 @@
"src/f32-vbinary/gen/vsubc-minmax-avx-x8.c",
"src/f32-vbinary/gen/vsubc-minmax-avx-x16.c",
"src/f32-vscale/avx-unroll32.c",
+ "src/f32-vrnd/gen/vrndne-avx-x8.c",
+ "src/f32-vrnd/gen/vrndne-avx-x16.c",
+ "src/f32-vrnd/gen/vrndz-avx-x8.c",
+ "src/f32-vrnd/gen/vrndz-avx-x16.c",
+ "src/f32-vrnd/gen/vrndu-avx-x8.c",
+ "src/f32-vrnd/gen/vrndu-avx-x16.c",
+ "src/f32-vrnd/gen/vrndd-avx-x8.c",
+ "src/f32-vrnd/gen/vrndd-avx-x16.c",
"src/f32-vunary/gen/vabs-avx-x8.c",
"src/f32-vunary/gen/vabs-avx-x16.c",
"src/f32-vunary/gen/vneg-avx-x8.c",
@@ -1700,6 +1760,14 @@
"src/f32-vscaleextexp/gen/avx512f-p5-scalef-x160.c",
"src/f32-vscaleextexp/gen/avx512f-p5-scalef-x176.c",
"src/f32-vscaleextexp/gen/avx512f-p5-scalef-x192.c",
+ "src/f32-vrnd/gen/vrndne-avx512f-x16.c",
+ "src/f32-vrnd/gen/vrndne-avx512f-x32.c",
+ "src/f32-vrnd/gen/vrndz-avx512f-x16.c",
+ "src/f32-vrnd/gen/vrndz-avx512f-x32.c",
+ "src/f32-vrnd/gen/vrndu-avx512f-x16.c",
+ "src/f32-vrnd/gen/vrndu-avx512f-x32.c",
+ "src/f32-vrnd/gen/vrndd-avx512f-x16.c",
+ "src/f32-vrnd/gen/vrndd-avx512f-x32.c",
"src/f32-vunary/gen/vabs-avx512f-x16.c",
"src/f32-vunary/gen/vabs-avx512f-x32.c",
"src/f32-vunary/gen/vneg-avx512f-x16.c",
@@ -4025,6 +4093,42 @@
)
xnnpack_unit_test(
+ name = "f32_vrndne_test",
+ srcs = [
+ "test/f32-vrndne.cc",
+ "test/vunary-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vrndz_test",
+ srcs = [
+ "test/f32-vrndz.cc",
+ "test/vunary-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vrndu_test",
+ srcs = [
+ "test/f32-vrndu.cc",
+ "test/vunary-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
+ name = "f32_vrndd_test",
+ srcs = [
+ "test/f32-vrndd.cc",
+ "test/vunary-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
name = "f32_vscale_test",
srcs = [
"test/f32-vscale.cc",