FMA3 implementation of F16 DWCONV/VCLAMP/VMULCADDC microkernels
PiperOrigin-RevId: 420676523
diff --git a/test/f16-vsub-minmax.cc b/test/f16-vsub-minmax.cc
index b31f7a9..ac403e9 100644
--- a/test/f16-vsub-minmax.cc
+++ b/test/f16-vsub-minmax.cc
@@ -23,7 +23,7 @@
TEST_REQUIRES_ARM_NEON_FP16_ARITH;
VBinaryMicrokernelTester()
.batch_size(8)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
TEST(F16_VSUB_MINMAX__NEONFP16ARITH_X8, batch_div_8) {
@@ -31,7 +31,7 @@
for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
VBinaryMicrokernelTester()
.batch_size(batch_size)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -40,7 +40,7 @@
for (size_t batch_size = 1; batch_size < 8; batch_size++) {
VBinaryMicrokernelTester()
.batch_size(batch_size)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -49,7 +49,7 @@
for (size_t batch_size = 9; batch_size < 16; batch_size++) {
VBinaryMicrokernelTester()
.batch_size(batch_size)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -59,7 +59,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.inplace_a(true)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -69,7 +69,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.inplace_b(true)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -80,7 +80,7 @@
.batch_size(batch_size)
.inplace_a(true)
.inplace_b(true)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -90,7 +90,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.qmin(128)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -100,7 +100,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.qmax(128)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
#endif // XNN_ARCH_ARM64
@@ -111,7 +111,7 @@
TEST_REQUIRES_ARM_NEON_FP16_ARITH;
VBinaryMicrokernelTester()
.batch_size(16)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
TEST(F16_VSUB_MINMAX__NEONFP16ARITH_X16, batch_div_16) {
@@ -119,7 +119,7 @@
for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
VBinaryMicrokernelTester()
.batch_size(batch_size)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -128,7 +128,7 @@
for (size_t batch_size = 1; batch_size < 16; batch_size++) {
VBinaryMicrokernelTester()
.batch_size(batch_size)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -137,7 +137,7 @@
for (size_t batch_size = 17; batch_size < 32; batch_size++) {
VBinaryMicrokernelTester()
.batch_size(batch_size)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -147,7 +147,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.inplace_a(true)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -157,7 +157,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.inplace_b(true)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -168,7 +168,7 @@
.batch_size(batch_size)
.inplace_a(true)
.inplace_b(true)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -178,7 +178,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.qmin(128)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
@@ -188,7 +188,7 @@
VBinaryMicrokernelTester()
.batch_size(batch_size)
.qmax(128)
- .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub);
+ .Test(xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16, VBinaryMicrokernelTester::OpType::Sub, xnn_init_f16_minmax_neon_params);
}
}
#endif // XNN_ARCH_ARM64