Refactor Sigmoid evaluation stubs
PiperOrigin-RevId: 334257369
diff --git a/src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div.c b/src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div.c
index b8be24e..5c68c02 100644
--- a/src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div.c
+++ b/src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div.c
@@ -55,10 +55,10 @@
const __m512 vz = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx), vsign_mask));
// Compute reduced argument n := round(z / log(2), 5).
- // We do it by adding a large number (magic bias), which cause rounding of the result to 5 fractional bits, then
+ // We do it by adding a large number (magic bias), which cause rounding of the result to 4 fractional bits, then
// subtracing the large number back. The addition is combined with multiplication by log2e into a single FMA
// instruction. The trick with adding large number is valid only within certain bounds (|z / log(2)| <= 2**17,
- // i.e. |z| <= 0x1.62E43p+17 = 181704.375), but that is acceptable, because inputs x outside of
+ // i.e. |z| <= 0x1.62E43p+16 = 90852.1875), but that is acceptable, because inputs x outside of
// [-87.336544, 17.328678] (i.e. z outsize [87.336544, 0]) underflow or saturate sigmoidf(x). We fixup the result
// for such inputs at the very end of the algorithm.
__m512 vn = _mm512_fmadd_ps(vz, vlog2e, vmagic_bias);
@@ -66,7 +66,7 @@
// Use the low 5 bits of n (as integer) for table lookup.
const __m512 vl = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn), vtable_hi);
- // Subtract the large number back to get final n := round(z / log(2), 5).
+ // Subtract the large number back to get the final n := round(z / log(2), 5) as a floating-point number.
vn = _mm512_sub_ps(vn, vmagic_bias);
// Compute reduced argument t := z - n * log(2).