Sigmoid evaluation stubs, micro-kernels, and operator

Only ARM64 architecture is supported

PiperOrigin-RevId: 280671182
diff --git a/src/init.c b/src/init.c
index b93c0b0..1bdf220 100644
--- a/src/init.c
+++ b/src/init.c
@@ -41,6 +41,7 @@
 #include <xnnpack/vadd.h>
 #include <xnnpack/vbinop.h>
 #include <xnnpack/vmulcaddc.h>
+#include <xnnpack/vunop.h>
 #include <xnnpack/zip.h>
 
 #ifndef XNN_ENABLE_ASSEMBLY
@@ -455,6 +456,7 @@
     };
     xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
     xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
+    xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__neonfma_p5_x16;
     xnn_params.f32.prelu = (struct prelu_parameters) {
       .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
       .row_tile = 2,