RND microkernels and tests

PiperOrigin-RevId: 315493586
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5890736..a1725b9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -394,6 +394,18 @@
   src/f32-vmulcaddc/gen/c1-minmax-scalar-2x.c
   src/f32-vmulcaddc/gen/c2-minmax-scalar-2x.c
   src/f32-vmulcaddc/gen/c4-minmax-scalar-2x.c
+  src/f32-vrnd/gen/vrndne-scalar-libm-x1.c
+  src/f32-vrnd/gen/vrndne-scalar-libm-x2.c
+  src/f32-vrnd/gen/vrndne-scalar-libm-x4.c
+  src/f32-vrnd/gen/vrndz-scalar-libm-x1.c
+  src/f32-vrnd/gen/vrndz-scalar-libm-x2.c
+  src/f32-vrnd/gen/vrndz-scalar-libm-x4.c
+  src/f32-vrnd/gen/vrndu-scalar-libm-x1.c
+  src/f32-vrnd/gen/vrndu-scalar-libm-x2.c
+  src/f32-vrnd/gen/vrndu-scalar-libm-x4.c
+  src/f32-vrnd/gen/vrndd-scalar-libm-x1.c
+  src/f32-vrnd/gen/vrndd-scalar-libm-x2.c
+  src/f32-vrnd/gen/vrndd-scalar-libm-x4.c
   src/f32-vunary/gen/vabs-scalar-x1.c
   src/f32-vunary/gen/vabs-scalar-x2.c
   src/f32-vunary/gen/vabs-scalar-x4.c
@@ -589,6 +601,14 @@
   src/f32-sigmoid/gen/psimd-p5-div-x16.c
   src/f32-sigmoid/gen/psimd-p5-div-x20.c
   src/f32-sigmoid/gen/psimd-p5-div-x24.c
+  src/f32-vrnd/gen/vrndne-psimd-x4.c
+  src/f32-vrnd/gen/vrndne-psimd-x8.c
+  src/f32-vrnd/gen/vrndz-psimd-x4.c
+  src/f32-vrnd/gen/vrndz-psimd-x8.c
+  src/f32-vrnd/gen/vrndu-psimd-x4.c
+  src/f32-vrnd/gen/vrndu-psimd-x8.c
+  src/f32-vrnd/gen/vrndd-psimd-x4.c
+  src/f32-vrnd/gen/vrndd-psimd-x8.c
   src/math/roundne-psimd-addsub.c
   src/math/roundd-psimd-addsub.c
   src/math/roundu-psimd-addsub.c
@@ -755,6 +775,14 @@
   src/f32-vbinary/gen/vsubc-minmax-neon-x8.c
   src/f32-vmulcaddc/gen/c4-minmax-neon-2x.c
   src/f32-vmulcaddc/gen/c8-minmax-neon-2x.c
+  src/f32-vrnd/gen/vrndne-neon-x4.c
+  src/f32-vrnd/gen/vrndne-neon-x8.c
+  src/f32-vrnd/gen/vrndz-neon-x4.c
+  src/f32-vrnd/gen/vrndz-neon-x8.c
+  src/f32-vrnd/gen/vrndu-neon-x4.c
+  src/f32-vrnd/gen/vrndu-neon-x8.c
+  src/f32-vrnd/gen/vrndd-neon-x4.c
+  src/f32-vrnd/gen/vrndd-neon-x8.c
   src/f32-vunary/gen/vabs-neon-x4.c
   src/f32-vunary/gen/vabs-neon-x8.c
   src/f32-vunary/gen/vneg-neon-x4.c
@@ -956,6 +984,14 @@
   src/math/sigmoid-neonfma-rr2-p5-nr2recps.c)
 
 SET(XNNPACK_NEONV8_MICROKERNEL_SRCS
+  src/f32-vrnd/gen/vrndne-neonv8-x4.c
+  src/f32-vrnd/gen/vrndne-neonv8-x8.c
+  src/f32-vrnd/gen/vrndz-neonv8-x4.c
+  src/f32-vrnd/gen/vrndz-neonv8-x8.c
+  src/f32-vrnd/gen/vrndu-neonv8-x4.c
+  src/f32-vrnd/gen/vrndu-neonv8-x8.c
+  src/f32-vrnd/gen/vrndd-neonv8-x4.c
+  src/f32-vrnd/gen/vrndd-neonv8-x8.c
   src/math/roundne-neonv8.c
   src/math/roundd-neonv8.c
   src/math/roundu-neonv8.c
@@ -1257,6 +1293,14 @@
   src/f32-sigmoid/gen/sse2-p5-div-x16.c
   src/f32-sigmoid/gen/sse2-p5-div-x20.c
   src/f32-sigmoid/gen/sse2-p5-div-x24.c
+  src/f32-vrnd/gen/vrndne-sse2-x4.c
+  src/f32-vrnd/gen/vrndne-sse2-x8.c
+  src/f32-vrnd/gen/vrndz-sse2-x4.c
+  src/f32-vrnd/gen/vrndz-sse2-x8.c
+  src/f32-vrnd/gen/vrndu-sse2-x4.c
+  src/f32-vrnd/gen/vrndu-sse2-x8.c
+  src/f32-vrnd/gen/vrndd-sse2-x4.c
+  src/f32-vrnd/gen/vrndd-sse2-x8.c
   src/q8-avgpool/9p8x-minmax-sse2-c8.c
   src/q8-avgpool/9x-minmax-sse2-c8.c
   src/q8-igemm/4x4c2-minmax-sse2.c
@@ -1304,6 +1348,14 @@
   src/f32-sigmoid/gen/sse41-p5-div-x16.c
   src/f32-sigmoid/gen/sse41-p5-div-x20.c
   src/f32-sigmoid/gen/sse41-p5-div-x24.c
+  src/f32-vrnd/gen/vrndne-sse41-x4.c
+  src/f32-vrnd/gen/vrndne-sse41-x8.c
+  src/f32-vrnd/gen/vrndz-sse41-x4.c
+  src/f32-vrnd/gen/vrndz-sse41-x8.c
+  src/f32-vrnd/gen/vrndu-sse41-x4.c
+  src/f32-vrnd/gen/vrndu-sse41-x8.c
+  src/f32-vrnd/gen/vrndd-sse41-x4.c
+  src/f32-vrnd/gen/vrndd-sse41-x8.c
   src/math/roundne-sse41.c
   src/math/roundd-sse41.c
   src/math/roundu-sse41.c
@@ -1393,6 +1445,14 @@
   src/f32-vbinary/gen/vsub-minmax-avx-x16.c
   src/f32-vbinary/gen/vsubc-minmax-avx-x8.c
   src/f32-vbinary/gen/vsubc-minmax-avx-x16.c
+  src/f32-vrnd/gen/vrndne-avx-x8.c
+  src/f32-vrnd/gen/vrndne-avx-x16.c
+  src/f32-vrnd/gen/vrndz-avx-x8.c
+  src/f32-vrnd/gen/vrndz-avx-x16.c
+  src/f32-vrnd/gen/vrndu-avx-x8.c
+  src/f32-vrnd/gen/vrndu-avx-x16.c
+  src/f32-vrnd/gen/vrndd-avx-x8.c
+  src/f32-vrnd/gen/vrndd-avx-x16.c
   src/f32-vscale/avx-unroll32.c
   src/f32-vunary/gen/vabs-avx-x8.c
   src/f32-vunary/gen/vabs-avx-x16.c
@@ -1670,6 +1730,14 @@
   src/f32-vbinary/gen/vsub-minmax-avx512f-x32.c
   src/f32-vbinary/gen/vsubc-minmax-avx512f-x16.c
   src/f32-vbinary/gen/vsubc-minmax-avx512f-x32.c
+  src/f32-vrnd/gen/vrndne-avx512f-x16.c
+  src/f32-vrnd/gen/vrndne-avx512f-x32.c
+  src/f32-vrnd/gen/vrndz-avx512f-x16.c
+  src/f32-vrnd/gen/vrndz-avx512f-x32.c
+  src/f32-vrnd/gen/vrndu-avx512f-x16.c
+  src/f32-vrnd/gen/vrndu-avx512f-x32.c
+  src/f32-vrnd/gen/vrndd-avx512f-x16.c
+  src/f32-vrnd/gen/vrndd-avx512f-x32.c
   src/f32-vscale/avx512f-unroll64.c
   src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x16.c
   src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x32.c
@@ -2773,6 +2841,15 @@
   TARGET_LINK_LIBRARIES(f16-vrsubc-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(f16-vrsubc-minmax-test f16-vrsubc-minmax-test)
 
+  ADD_EXECUTABLE(f32-vabs-test test/f32-vabs.cc)
+  SET_TARGET_PROPERTIES(f32-vabs-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vabs-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vabs-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vabs-test f32-vabs-test)
+
   ADD_EXECUTABLE(f32-vadd-minmax-test test/f32-vadd-minmax.cc)
   SET_TARGET_PROPERTIES(f32-vadd-minmax-test PROPERTIES
     CXX_STANDARD 11
@@ -2890,6 +2967,51 @@
   TARGET_LINK_LIBRARIES(f32-vmulcaddc-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(f32-vmulcaddc-minmax-test f32-vmulcaddc-minmax-test)
 
+  ADD_EXECUTABLE(f32-vneg-test test/f32-vneg.cc)
+  SET_TARGET_PROPERTIES(f32-vneg-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vneg-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vneg-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vneg-test f32-vneg-test)
+
+  ADD_EXECUTABLE(f32-vrndne-test test/f32-vrndne.cc)
+  SET_TARGET_PROPERTIES(f32-vrndne-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vrndne-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vrndne-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vrndne-test f32-vrndne-test)
+
+  ADD_EXECUTABLE(f32-vrndz-test test/f32-vrndz.cc)
+  SET_TARGET_PROPERTIES(f32-vrndz-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vrndz-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vrndz-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vrndz-test f32-vrndz-test)
+
+  ADD_EXECUTABLE(f32-vrndu-test test/f32-vrndu.cc)
+  SET_TARGET_PROPERTIES(f32-vrndu-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vrndu-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vrndu-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vrndu-test f32-vrndu-test)
+
+  ADD_EXECUTABLE(f32-vrndd-test test/f32-vrndd.cc)
+  SET_TARGET_PROPERTIES(f32-vrndd-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vrndd-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vrndd-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vrndd-test f32-vrndd-test)
+
   ADD_EXECUTABLE(f32-vscaleexpminusmax-test test/f32-vscaleexpminusmax.cc)
   SET_TARGET_PROPERTIES(f32-vscaleexpminusmax-test PROPERTIES
     CXX_STANDARD 11
@@ -2908,6 +3030,15 @@
   TARGET_LINK_LIBRARIES(f32-vscaleextexp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(f32-vscaleextexp-test f32-vscaleextexp-test)
 
+  ADD_EXECUTABLE(f32-vsqr-test test/f32-vsqr.cc)
+  SET_TARGET_PROPERTIES(f32-vsqr-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(f32-vsqr-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(f32-vsqr-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(f32-vsqr-test f32-vsqr-test)
+
   ADD_EXECUTABLE(f32-vsqrdiff-test test/f32-vsqrdiff.cc)
   SET_TARGET_PROPERTIES(f32-vsqrdiff-test PROPERTIES
     CXX_STANDARD 11