SSE2 Sigmoid micro-kernels

- Sigmoid micro-kernels in SSE2 implementation
- Unit tests and microbenchmarks
- Enable Sigmoid operator on x86/x86-64

PiperOrigin-RevId: 281164623
diff --git a/src/init.c b/src/init.c
index 5b87fe6..f43fcf1 100644
--- a/src/init.c
+++ b/src/init.c
@@ -680,6 +680,7 @@
     };
     xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
     xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
+    xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
     xnn_params.f32.prelu = (struct prelu_parameters) {
       .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
       .row_tile = 2,