Sigmoid evaluation stubs, micro-kernels, and operator

Only ARM64 architecture is supported

PiperOrigin-RevId: 280671182
diff --git a/src/math/exp-neonfma-lut64-p2.c b/src/math/exp-neonfma-lut64-p2.c
index e45bc30..033857e 100644
--- a/src/math/exp-neonfma-lut64-p2.c
+++ b/src/math/exp-neonfma-lut64-p2.c
@@ -28,7 +28,7 @@
     const float* input,
     float* output)
 {
-  assert(n % (16 * sizeof(float)) == 0);
+  assert(n % (4 * sizeof(float)) == 0);
 
   const float32x4_t vmagic_bias = vmovq_n_f32(0x1.800000p23f);
   // The smallest x for which expf(x) is non-zero.