Vectorized implementations of expf function for AVX2 and AVX512F
PiperOrigin-RevId: 274706485
diff --git a/BUILD b/BUILD
index 019bd19..ccd45c0 100644
--- a/BUILD
+++ b/BUILD
@@ -28,6 +28,13 @@
"@pthreadpool",
]
+ACCURACY_EVAL_DEPS = [
+ ":XNNPACK",
+ ":ukernels",
+ "@FP16",
+ "@pthreadpool",
+]
+
MICROKERNEL_TEST_DEPS = [
":ukernels",
":enable_assembly",
@@ -396,8 +403,17 @@
"src/f32-rmax/avx.c",
]
+AVX2_UKERNELS = [
+ "src/math/exp-avx2-p5.c",
+ "src/math/exp-avx2-perm-p3.c",
+ "src/math/exp-avx2-perm-p4.c",
+]
+
AVX512F_UKERNELS = [
"src/f32-rmax/avx512f.c",
+ "src/math/exp-avx512f-p5-scalef.c",
+ "src/math/exp-avx512f-p5.c",
+ "src/math/exp-avx512f-perm-p3.c",
]
AARCH32_ASM_UKERNELS = [
@@ -486,10 +502,15 @@
"src/xnnpack/compute.h",
"src/xnnpack/im2col.h",
"src/xnnpack/indirection.h",
+ "src/xnnpack/math-stubs.h",
"src/xnnpack/operator.h",
"src/xnnpack/pack.h",
- "src/xnnpack/requantization.h",
"src/xnnpack/requantization-stubs.h",
+ "src/xnnpack/requantization.h",
+]
+
+ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
+ "src/xnnpack/math-stubs.h",
]
MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
@@ -612,6 +633,18 @@
)
xnnpack_cc_library(
+ name = "avx2_ukernels",
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts(),
+ x86_copts = [
+ "-mfma",
+ "-mavx2",
+ ],
+ x86_srcs = AVX2_UKERNELS,
+ deps = ["@FP16"],
+)
+
+xnnpack_cc_library(
name = "avx512f_ukernels",
hdrs = INTERNAL_HDRS,
copts = xnnpack_std_copts(),
@@ -650,6 +683,7 @@
":psimd_ukernels",
":sse2_ukernels",
":avx_ukernels",
+ ":avx2_ukernels",
":avx512f_ukernels",
],
)
@@ -996,6 +1030,17 @@
],
)
+#################### Accuracy evaluation for math functions ####################
+
+xnnpack_benchmark(
+ name = "f32_exp_eval",
+ srcs = [
+ "eval/f32-exp.cc",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + ACCURACY_EVAL_HDRS,
+ deps = ACCURACY_EVAL_DEPS,
+)
+
######################### Unit tests for micro-kernels #########################
xnnpack_unit_test(