Refactor and open-source vectorized expminus function

- AVX2 version with degree-5 polynomial approximation

PiperOrigin-RevId: 275378607
diff --git a/BUILD b/BUILD
index 80c0d9e..49d8dba 100644
--- a/BUILD
+++ b/BUILD
@@ -410,6 +410,7 @@
     "src/math/exp-avx2-p5.c",
     "src/math/exp-avx2-perm-p3.c",
     "src/math/exp-avx2-perm-p4.c",
+    "src/math/expminus-avx2-p5.c",
 ]
 
 AVX512F_UKERNELS = [
@@ -1044,6 +1045,15 @@
     deps = ACCURACY_EVAL_DEPS,
 )
 
+xnnpack_benchmark(
+    name = "f32_expminus_eval",
+    srcs = [
+        "eval/f32-expminus.cc",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + ACCURACY_EVAL_HDRS,
+    deps = ACCURACY_EVAL_DEPS,
+)
+
 ######################### Unit tests for micro-kernels #########################
 
 xnnpack_unit_test(