F32 Sigmoid micro-kernels in AVX2 implementation
PiperOrigin-RevId: 288397166
diff --git a/src/init.c b/src/init.c
index f0bf70f..2f9c728 100644
--- a/src/init.c
+++ b/src/init.c
@@ -901,7 +901,11 @@
} else {
xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse_x8;
}
- xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
+ if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx2()) {
+ xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40;
+ } else {
+ xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
+ }
xnn_params.f32.prelu = (struct prelu_parameters) {
.ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
.row_tile = 2,