FP16 hswish, clamp and prelu microkernels

PiperOrigin-RevId: 311194603
diff --git a/test/hswish-microkernel-tester.h b/test/hswish-microkernel-tester.h
index eef7698..d08228b 100644
--- a/test/hswish-microkernel-tester.h
+++ b/test/hswish-microkernel-tester.h
@@ -15,6 +15,8 @@
 #include <random>
 #include <vector>
 
+#include <fp16.h>
+
 #include <xnnpack.h>
 #include <xnnpack/params-init.h>
 #include <xnnpack/params.h>
@@ -55,6 +57,43 @@
     return this->iterations_;
   }
 
+  void Test(xnn_f16_hswish_ukernel_function hswish) const {
+    std::random_device random_device;
+    auto rng = std::mt19937(random_device());
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
+    auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
+
+    std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
+    std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
+    std::vector<float> y_ref(batch_size());
+    for (size_t iteration = 0; iteration < iterations(); iteration++) {
+      std::generate(x.begin(), x.end(), std::ref(f16rng));
+      if (inplace()) {
+        std::generate(y.begin(), y.end(), std::ref(f16rng));
+      } else {
+        std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
+      }
+      const uint16_t* x_data = inplace() ? y.data() : x.data();
+
+      // Prepare micro-kernel parameters.
+      struct xnn_f16_hswish_params params = xnn_init_f16_hswish_params();
+
+      // Compute reference results.
+      for (size_t i = 0; i < batch_size(); i++) {
+        y_ref[i] = fp16_ieee_to_fp32_value(x_data[i]) * std::max(std::min(fp16_ieee_to_fp32_value(x_data[i]) + 3.0f, 6.0f), 0.0f) / 6.0f;
+      }
+
+      // Call optimized micro-kernel.
+      hswish(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
+
+      // Verify results.
+      for (size_t i = 0; i < batch_size(); i++) {
+        ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::abs(y_ref[i]) * 1.0e-2f)
+          << "at position " << i << ", batch_size = " << batch_size();
+      }
+    }
+  }
+
   void Test(xnn_f32_hswish_ukernel_function hswish, Variant variant = Variant::Native) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());