VSCALE microkernel and SoftMax Three-Pass algorithm with Reloading
- VSCALE micro-kernel in AVX and AVX512F implementations
- Unit tests
- Micro-benchmark for SoftArgMax using Three-Pass algorithm with Reloading
PiperOrigin-RevId: 275939577
diff --git a/BUILD b/BUILD
index d6e7f1a..b171d8f 100644
--- a/BUILD
+++ b/BUILD
@@ -404,6 +404,7 @@
AVX_UKERNELS = [
"src/f32-rmax/avx.c",
+ "src/f32-vscale/avx-unroll32.c",
]
AVX2_UKERNELS = [
@@ -420,6 +421,7 @@
"src/f32-raddexpminusmax/avx512f-p5-scalef-unroll128.c",
"src/f32-raddstoreexpminusmax/avx512f-p5-scalef-unroll128.c",
"src/f32-rmax/avx512f.c",
+ "src/f32-vscale/avx512f-unroll64.c",
"src/f32-vscaleexpminusmax/avx512f-p5-scalef-unroll128.c",
"src/math/exp-avx512f-p5-scalef.c",
"src/math/exp-avx512f-p5.c",
@@ -504,6 +506,7 @@
"src/xnnpack/vadd.h",
"src/xnnpack/vmul.h",
"src/xnnpack/vmulcaddc.h",
+ "src/xnnpack/vscale.h",
"src/xnnpack/vscaleexpminusmax.h",
"src/xnnpack/vsub.h",
"src/xnnpack/zip.h",
@@ -1317,6 +1320,15 @@
)
xnnpack_unit_test(
+ name = "f32_vscale_test",
+ srcs = [
+ "test/f32-vscale.cc",
+ "test/vscale-microkernel-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
name = "f32_vscaleexpminusmax_test",
srcs = [
"test/f32-vscaleexpminusmax.cc",