Work around msan failured in AVX micro-kernels
Avoid using VMASKMOVPS to store values in loop epilog: MemorySanitizer doesn't
seem to recognize initialization via VMASKMOVPS.
PiperOrigin-RevId: 293517405
diff --git a/src/f32-vbinary/gen/vrsubc-avx-x8.c b/src/f32-vbinary/gen/vrsubc-avx-x8.c
index 0c866bf..ab49141 100644
--- a/src/f32-vbinary/gen/vrsubc-avx-x8.c
+++ b/src/f32-vbinary/gen/vrsubc-avx-x8.c
@@ -64,6 +64,21 @@
__m256 vy = _mm256_sub_ps(vb, va);
vy = _mm256_max_ps(vy, vy_min);
vy = _mm256_min_ps(vy, vy_max);
- _mm256_maskstore_ps(y, vmask, vy);
+
+ // _mm256_maskstore_ps(y, vmask, vy) could be used here, but triggers msan failures (probably an msan bug).
+ __m128 vy_lo = _mm256_castps256_ps128(vy);
+ if (n & (4 * sizeof(float))) {
+ _mm_storeu_ps(y, vy_lo);
+ vy_lo = _mm256_extractf128_ps(vy, 1);
+ y += 4;
+ }
+ if (n & (2 * sizeof(float))) {
+ _mm_storel_pi((__m64*) y, vy_lo);
+ vy_lo = _mm_movehl_ps(vy_lo, vy_lo);
+ y += 2;
+ }
+ if (n & (1 * sizeof(float))) {
+ _mm_store_ss(y, vy_lo);
+ }
}
}