FMA3 implementation of F16 DWCONV/VCLAMP/VMULCADDC microkernels

PiperOrigin-RevId: 420676523
diff --git a/test/f16-vsub-minmax.cc b/test/f16-vsub-minmax.cc
index b31f7a9..ac403e9 100644
--- a/test/f16-vsub-minmax.cc
+++ b/test/f16-vsub-minmax.cc
@@ -23,7 +23,7 @@
     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
     VBinaryMicrokernelTester()
       .batch_size(8)
-      .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+      .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
   }
 
   TEST(F16_VSUB_MINMAX__NEONFP16ARITH_X8, batch_div_8) {
@@ -31,7 +31,7 @@
     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -40,7 +40,7 @@
     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -49,7 +49,7 @@
     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -59,7 +59,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .inplace_a(true)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -69,7 +69,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .inplace_b(true)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -80,7 +80,7 @@
         .batch_size(batch_size)
         .inplace_a(true)
         .inplace_b(true)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -90,7 +90,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .qmin(128)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -100,7 +100,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .qmax(128)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 #endif  // XNN_ARCH_ARM64
@@ -111,7 +111,7 @@
     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
     VBinaryMicrokernelTester()
       .batch_size(16)
-      .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+      .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
   }
 
   TEST(F16_VSUB_MINMAX__NEONFP16ARITH_X16, batch_div_16) {
@@ -119,7 +119,7 @@
     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -128,7 +128,7 @@
     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -137,7 +137,7 @@
     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -147,7 +147,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .inplace_a(true)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -157,7 +157,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .inplace_b(true)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -168,7 +168,7 @@
         .batch_size(batch_size)
         .inplace_a(true)
         .inplace_b(true)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -178,7 +178,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .qmin(128)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 
@@ -188,7 +188,7 @@
       VBinaryMicrokernelTester()
         .batch_size(batch_size)
         .qmax(128)
-        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+        .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
     }
   }
 #endif  // XNN_ARCH_ARM64