RELU microkernel to clamp values to 0 for a specialized clamp operator

PiperOrigin-RevId: 321457971
diff --git a/BUILD.bazel b/BUILD.bazel
index 39b1d4c..3bc7b65 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -208,6 +208,9 @@
     "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4.c",
     "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc2.c",
     "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc4.c",
+    "src/f32-relu/gen/scalar-x1.c",
+    "src/f32-relu/gen/scalar-x2.c",
+    "src/f32-relu/gen/scalar-x4.c",
     "src/f32-rmax/scalar.c",
     "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c",
     "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c",
@@ -456,6 +459,9 @@
     "src/f32-pavgpool/9x-minmax-wasm-c1.c",
     "src/f32-prelu/gen/wasm-2x1.c",
     "src/f32-prelu/gen/wasm-2x4.c",
+    "src/f32-relu/gen/wasm-x1.c",
+    "src/f32-relu/gen/wasm-x2.c",
+    "src/f32-relu/gen/wasm-x4.c",
     "src/f32-vbinary/gen/vadd-minmax-wasm-x1.c",
     "src/f32-vbinary/gen/vadd-minmax-wasm-x2.c",
     "src/f32-vbinary/gen/vadd-minmax-wasm-x4.c",
@@ -662,6 +668,8 @@
     "src/f32-prelu/gen/wasmsimd-bitselect-2x8.c",
     "src/f32-prelu/gen/wasmsimd-minmax-2x4.c",
     "src/f32-prelu/gen/wasmsimd-minmax-2x8.c",
+    "src/f32-relu/gen/wasmsimd-x4.c",
+    "src/f32-relu/gen/wasmsimd-x8.c",
     "src/f32-rmax/wasmsimd-arm.c",
     "src/f32-rmax/wasmsimd-x86.c",
     "src/f32-vbinary/gen/vadd-minmax-wasmsimd-arm-x4.c",
@@ -1040,6 +1048,8 @@
     "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20.c",
     "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc2.c",
     "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc5.c",
+    "src/f32-relu/gen/neon-x4.c",
+    "src/f32-relu/gen/neon-x8.c",
     "src/f32-rmax/neon.c",
     "src/f32-sigmoid/gen/neon-frac-p9-p10-nr1recps-x16.c",
     "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c",
@@ -1448,6 +1458,8 @@
     "src/f16-hswish/gen/neonfp16arith-x8.c",
     "src/f16-prelu/gen/neonfp16arith-2x16.c",
     "src/f16-prelu/gen/neonfp16arith-2x8.c",
+    "src/f16-relu/gen/neonfp16arith-x16.c",
+    "src/f16-relu/gen/neonfp16arith-x8.c",
     "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x16.c",
     "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x8.c",
     "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x16.c",
@@ -1566,6 +1578,8 @@
     "src/f32-ppmm/gen/4x8-minmax-sse.c",
     "src/f32-prelu/gen/sse-2x4.c",
     "src/f32-prelu/gen/sse-2x8.c",
+    "src/f32-relu/gen/sse-x4.c",
+    "src/f32-relu/gen/sse-x8.c",
     "src/f32-rmax/sse.c",
     "src/f32-spmm/gen/4x1-minmax-sse.c",
     "src/f32-spmm/gen/8x1-minmax-sse.c",
@@ -1774,6 +1788,8 @@
     "src/f32-igemm/gen/5x16-minmax-avx-broadcast.c",
     "src/f32-prelu/gen/avx-2x8.c",
     "src/f32-prelu/gen/avx-2x16.c",
+    "src/f32-relu/gen/avx-x16.c",
+    "src/f32-relu/gen/avx-x8.c",
     "src/f32-rmax/avx.c",
     "src/f32-vbinary/gen/vadd-minmax-avx-x8.c",
     "src/f32-vbinary/gen/vadd-minmax-avx-x16.c",
@@ -2075,6 +2091,8 @@
     "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
     "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
     "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
+    "src/f32-relu/gen/avx512f-x16.c",
+    "src/f32-relu/gen/avx512f-x32.c",
     "src/f32-rmax/avx512f.c",
     "src/f32-vbinary/gen/vadd-minmax-avx512f-x16.c",
     "src/f32-vbinary/gen/vadd-minmax-avx512f-x32.c",
@@ -4463,6 +4481,15 @@
 )
 
 xnnpack_unit_test(
+    name = "f32_relu_test",
+    srcs = [
+        "test/f32-relu.cc",
+        "test/vunary-microkernel-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
     name = "f32_rmax_test",
     srcs = [
         "test/f32-rmax.cc",