F32 CLAMP micro-kernels in AVX and AVX512F implementations
PiperOrigin-RevId: 282845725
diff --git a/src/init.c b/src/init.c
index 4b9e0ce..e7ce5b3 100644
--- a/src/init.c
+++ b/src/init.c
@@ -771,7 +771,13 @@
.pixel_tile = 1,
.channel_tile = 8,
};
- xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
+ if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx512f()) {
+ xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx512f;
+ } else if (!XNN_PLATFORM_MOBILE && cpuinfo_has_x86_avx()) {
+ xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__avx;
+ } else {
+ xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
+ }
xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
xnn_params.f32.sigmoid = (xnn_univector_ukernel_function) xnn_f32_sigmoid_ukernel__sse2_p5_div_x16;
xnn_params.f32.prelu = (struct prelu_parameters) {