Split qs8-igemm-minmax-rndnu tests into 1 more file (4 total), seeing compile timeouts in coverage runs
PiperOrigin-RevId: 426186255
diff --git a/test/qs8-igemm-minmax-rndnu.cc b/test/qs8-igemm-minmax-rndnu.cc
index e7305dc..cb1e58e 100644
--- a/test/qs8-igemm-minmax-rndnu.cc
+++ b/test/qs8-igemm-minmax-rndnu.cc
@@ -491,8 +491,8 @@
#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
-#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
+#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -502,10 +502,10 @@
.m(4)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -516,10 +516,10 @@
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -532,12 +532,12 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -549,11 +549,11 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -565,11 +565,11 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
@@ -580,11 +580,11 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -598,13 +598,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
@@ -615,11 +615,11 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -633,13 +633,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
@@ -650,11 +650,11 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -668,13 +668,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -686,12 +686,12 @@
.m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -704,12 +704,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -723,13 +723,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -741,12 +741,12 @@
.m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -759,12 +759,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -778,13 +778,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -796,11 +796,11 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -815,13 +815,13 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -834,12 +834,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -852,12 +852,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -872,13 +872,13 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -891,11 +891,11 @@
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t mz = 0; mz < 4; mz++) {
@@ -910,12 +910,12 @@
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -926,10 +926,10 @@
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -940,10 +940,10 @@
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -954,4693 +954,13 @@
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
-#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
-
-
-#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
+#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 2; mz++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -5650,10 +970,10 @@
.m(2)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -5664,10 +984,10 @@
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -5680,12 +1000,12 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -5697,11 +1017,11 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -5713,11 +1033,11 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
@@ -5728,11 +1048,11 @@
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -5746,13 +1066,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
@@ -5763,11 +1083,11 @@
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -5781,13 +1101,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
@@ -5798,11 +1118,11 @@
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -5816,13 +1136,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5834,12 +1154,12 @@
.m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5852,12 +1172,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5871,13 +1191,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5889,12 +1209,12 @@
.m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5907,12 +1227,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5926,13 +1246,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -5944,11 +1264,11 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -5963,13 +1283,13 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -5982,12 +1302,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -6000,12 +1320,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -6020,13 +1340,13 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -6039,11 +1359,11 @@
.k(k)
.ks(3)
.a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t mz = 0; mz < 2; mz++) {
@@ -6058,12 +1378,12 @@
.ks(3)
.a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -6074,10 +1394,10 @@
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -6088,10 +1408,10 @@
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -6102,7 +1422,1411 @@
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -7980,6 +4704,1410 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(16)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(251)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(251)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(16)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -8448,474 +6576,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -9384,322 +7044,322 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
- .n(16)
+ .n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
@@ -9707,146 +7367,146 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
+ .m(4)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
- .cm_stride(19)
+ .cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
+ .mr(4)
+ .nr(8)
.kr(4)
.sr(2)
- .m(1)
- .n(16)
+ .m(4)
+ .n(8)
.k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -10788,1879 +8448,7 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(4)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(1)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(2)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
@@ -12669,11 +8457,11 @@
.sr(4)
.m(3)
.n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
@@ -12682,12 +8470,12 @@
.sr(4)
.m(3)
.n(8)
- .k(8)
+ .k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 3; m++) {
@@ -12698,14 +8486,14 @@
.sr(4)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
@@ -12715,13 +8503,13 @@
.sr(4)
.m(m)
.n(8)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -12731,15 +8519,15 @@
.sr(4)
.m(3)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12748,13 +8536,13 @@
.m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
@@ -12766,15 +8554,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12783,13 +8571,13 @@
.m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
@@ -12801,15 +8589,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12818,13 +8606,13 @@
.m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
@@ -12836,16 +8624,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12854,15 +8642,15 @@
.m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12872,15 +8660,15 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
.mr(3)
@@ -12891,16 +8679,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12909,15 +8697,15 @@
.m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12927,15 +8715,15 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
.mr(3)
@@ -12946,15 +8734,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -12964,13 +8752,13 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
@@ -12983,16 +8771,16 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -13002,15 +8790,15 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -13020,14 +8808,14 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
@@ -13040,15 +8828,15 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
@@ -13058,14 +8846,14 @@
.n(8)
.k(k)
.ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(251)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t mz = 0; mz < 3; mz++) {
GemmMicrokernelTester()
.mr(3)
@@ -13076,14 +8864,14 @@
.n(8)
.k(k)
.ks(3)
- .a_offset(127)
+ .a_offset(251)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
@@ -13092,12 +8880,12 @@
.sr(4)
.m(3)
.n(8)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
@@ -13106,12 +8894,12 @@
.sr(4)
.m(3)
.n(8)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
@@ -13120,15 +8908,15 @@
.sr(4)
.m(3)
.n(8)
- .k(8)
+ .k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -13137,11 +8925,11 @@
.sr(4)
.m(4)
.n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -13150,12 +8938,12 @@
.sr(4)
.m(4)
.n(8)
- .k(8)
+ .k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -13166,14 +8954,14 @@
.sr(4)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -13183,13 +8971,13 @@
.sr(4)
.m(m)
.n(8)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -13199,15 +8987,15 @@
.sr(4)
.m(4)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13216,13 +9004,13 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -13234,15 +9022,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13251,13 +9039,13 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -13269,15 +9057,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13286,13 +9074,13 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -13304,16 +9092,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13322,15 +9110,15 @@
.m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13340,15 +9128,15 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
@@ -13359,16 +9147,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13377,15 +9165,15 @@
.m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13395,15 +9183,15 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
@@ -13414,15 +9202,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13432,13 +9220,13 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -13451,16 +9239,16 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13470,15 +9258,15 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13488,14 +9276,14 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -13508,15 +9296,15 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
@@ -13526,14 +9314,14 @@
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(331)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t mz = 0; mz < 4; mz++) {
GemmMicrokernelTester()
.mr(4)
@@ -13544,14 +9332,14 @@
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
+ .a_offset(331)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -13560,12 +9348,12 @@
.sr(4)
.m(4)
.n(8)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -13574,12 +9362,12 @@
.sr(4)
.m(4)
.n(8)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -13588,15 +9376,15 @@
.sr(4)
.m(4)
.n(8)
- .k(8)
+ .k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -13605,11 +9393,11 @@
.sr(4)
.m(2)
.n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -13618,12 +9406,12 @@
.sr(4)
.m(2)
.n(16)
- .k(8)
+ .k(16)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -13634,14 +9422,14 @@
.sr(4)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -13651,13 +9439,13 @@
.sr(4)
.m(m)
.n(16)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -13667,15 +9455,15 @@
.sr(4)
.m(2)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13684,13 +9472,13 @@
.m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -13702,15 +9490,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13719,13 +9507,13 @@
.m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -13737,15 +9525,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13754,13 +9542,13 @@
.m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -13772,16 +9560,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13790,15 +9578,15 @@
.m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13808,15 +9596,15 @@
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
@@ -13827,16 +9615,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13845,15 +9633,15 @@
.m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13863,15 +9651,15 @@
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
@@ -13882,15 +9670,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13900,13 +9688,13 @@
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -13919,16 +9707,16 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13938,15 +9726,15 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13956,14 +9744,14 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -13976,15 +9764,15 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(16)
@@ -13994,14 +9782,14 @@
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t mz = 0; mz < 2; mz++) {
GemmMicrokernelTester()
.mr(2)
@@ -14012,14 +9800,14 @@
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -14028,12 +9816,12 @@
.sr(4)
.m(2)
.n(16)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -14042,12 +9830,12 @@
.sr(4)
.m(2)
.n(16)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -14056,47 +9844,47 @@
.sr(4)
.m(2)
.n(16)
- .k(8)
+ .k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(16)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14104,16 +9892,16 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14121,48 +9909,48 @@
.n(16)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14170,34 +9958,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14205,34 +9993,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14240,54 +10028,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14295,54 +10083,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14350,35 +10138,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14387,55 +10175,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
@@ -14444,127 +10232,127 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(251)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(251)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(2)
.sr(4)
- .m(1)
+ .m(3)
.n(16)
.k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14572,16 +10360,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14589,48 +10377,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14638,34 +10426,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14673,34 +10461,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14708,54 +10496,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14763,54 +10551,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14818,35 +10606,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14855,55 +10643,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -14912,89 +10700,89 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -15468,39 +11256,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15508,16 +11296,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15525,48 +11313,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15574,34 +11362,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15609,34 +11397,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15644,54 +11432,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15699,54 +11487,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15754,35 +11542,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15791,55 +11579,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
@@ -15848,557 +11636,89 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 3; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(127)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(4)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 2; mz++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -16872,7 +12192,1411 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -16882,10 +13606,10 @@
.m(1)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -16896,10 +13620,10 @@
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 1; m++) {
@@ -16912,12 +13636,12 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
@@ -16929,11 +13653,11 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -16945,11 +13669,11 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
@@ -16960,11 +13684,11 @@
.m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -16978,13 +13702,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
@@ -16995,11 +13719,11 @@
.m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -17013,13 +13737,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
@@ -17030,11 +13754,11 @@
.m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -17048,13 +13772,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17066,12 +13790,12 @@
.m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17084,12 +13808,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17103,13 +13827,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17121,12 +13845,12 @@
.m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17139,12 +13863,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17158,13 +13882,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -17176,11 +13900,11 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -17195,13 +13919,13 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17214,12 +13938,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -17232,12 +13956,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -17252,13 +13976,13 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -17271,11 +13995,11 @@
.k(k)
.ks(3)
.a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t mz = 0; mz < 1; mz++) {
@@ -17290,12 +14014,12 @@
.ks(3)
.a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -17306,10 +14030,10 @@
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -17320,10 +14044,10 @@
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -17334,949 +14058,13 @@
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -18286,10 +14074,10 @@
.m(1)
.n(8)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -18300,10 +14088,10 @@
.n(8)
.k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 1; m++) {
@@ -18316,12 +14104,12 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
@@ -18333,11 +14121,11 @@
.n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -18349,11 +14137,11 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
@@ -18364,11 +14152,11 @@
.m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -18382,13 +14170,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
@@ -18399,11 +14187,11 @@
.m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -18417,13 +14205,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
@@ -18434,11 +14222,11 @@
.m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -18452,13 +14240,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18470,12 +14258,12 @@
.m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18488,12 +14276,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18507,13 +14295,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18525,12 +14313,12 @@
.m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18543,12 +14331,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18562,13 +14350,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
@@ -18580,11 +14368,11 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -18599,13 +14387,13 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18618,12 +14406,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -18636,12 +14424,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -18656,13 +14444,13 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
@@ -18675,11 +14463,11 @@
.k(k)
.ks(3)
.a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t mz = 0; mz < 1; mz++) {
@@ -18694,12 +14482,12 @@
.ks(3)
.a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -18710,10 +14498,10 @@
.n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -18724,10 +14512,10 @@
.n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -18738,3283 +14526,943 @@
.n(8)
.k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(8)
- .k(16)
+ .k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
- .k(16)
+ .k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(8)
- .k(16)
+ .k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
- .k(16)
+ .k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
+ for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
+ for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
+ for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
+ for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
+ for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
+ for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
- .kr(4)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
.sr(1)
.m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(3)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(127)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t mz = 0; mz < 3; mz++) {
GemmMicrokernelTester()
.mr(3)
.nr(8)
- .kr(4)
+ .kr(2)
.sr(1)
.m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 2; mz++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
.a_offset(127)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -22488,105 +15936,573 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(331)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(331)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(16)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(16)
+ .k(16)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
.m(m)
.n(16)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22594,34 +16510,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22629,34 +16545,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22664,54 +16580,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22719,54 +16635,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22774,35 +16690,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22811,55 +16727,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
@@ -22868,2433 +16784,93 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
.k(k)
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(2)
.nr(16)
.kr(2)
.sr(1)
- .m(4)
+ .m(2)
.n(16)
- .k(8)
+ .k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 1; mz++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 2; mz++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 2; mz++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
-
-
-#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 2; mz++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
-
-
#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
@@ -25764,7 +17340,7 @@
#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -25774,10 +17350,10 @@
.m(4)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -25788,10 +17364,10 @@
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -25804,12 +17380,12 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -25821,11 +17397,11 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -25837,11 +17413,11 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
@@ -25852,11 +17428,11 @@
.m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 16; n++) {
@@ -25870,13 +17446,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
@@ -25887,11 +17463,11 @@
.m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
@@ -25905,13 +17481,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
@@ -25922,11 +17498,11 @@
.m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 16; n++) {
@@ -25940,13 +17516,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -25958,12 +17534,12 @@
.m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -25976,12 +17552,12 @@
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -25995,13 +17571,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -26013,12 +17589,12 @@
.m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -26031,12 +17607,12 @@
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -26050,13 +17626,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -26068,11 +17644,11 @@
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
@@ -26087,13 +17663,13 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -26106,12 +17682,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -26124,12 +17700,12 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
@@ -26144,13 +17720,13 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -26163,11 +17739,11 @@
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t mz = 0; mz < 4; mz++) {
@@ -26182,12 +17758,12 @@
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -26198,10 +17774,10 @@
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -26212,10 +17788,10 @@
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -26226,7 +17802,475 @@
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
+
+
+#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
@@ -26700,34 +18744,970 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(8)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(8)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(8)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t m = 1; m <= 3; m++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(127)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(127)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(3)
+ .nr(16)
+ .kr(8)
+ .sr(1)
+ .m(3)
+ .n(16)
+ .k(8)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -26738,14 +19718,14 @@
.sr(1)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -26755,13 +19735,13 @@
.sr(1)
.m(m)
.n(8)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -26771,15 +19751,15 @@
.sr(1)
.m(2)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26788,13 +19768,13 @@
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -26806,15 +19786,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26823,13 +19803,13 @@
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -26841,15 +19821,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26858,13 +19838,13 @@
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -26876,16 +19856,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26894,15 +19874,15 @@
.m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26912,15 +19892,15 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
@@ -26931,16 +19911,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26949,15 +19929,15 @@
.m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -26967,15 +19947,15 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
@@ -26986,15 +19966,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -27004,13 +19984,13 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -27023,16 +20003,16 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -27042,15 +20022,15 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -27060,14 +20040,14 @@
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
@@ -27080,15 +20060,15 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
@@ -27098,14 +20078,14 @@
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t mz = 0; mz < 2; mz++) {
GemmMicrokernelTester()
.mr(2)
@@ -27116,14 +20096,14 @@
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -27132,12 +20112,12 @@
.sr(1)
.m(2)
.n(8)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -27146,12 +20126,12 @@
.sr(1)
.m(2)
.n(8)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
@@ -27160,113 +20140,113 @@
.sr(1)
.m(2)
.n(8)
- .k(8)
+ .k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
- .k(8)
+ .k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
.m(m)
.n(8)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27274,34 +20254,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27309,34 +20289,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27344,54 +20324,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27399,54 +20379,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27454,35 +20434,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27491,55 +20471,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
@@ -27548,2431 +20528,1027 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(331)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 3; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .a_offset(127)
+ .a_offset(331)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(8)
.kr(8)
.sr(1)
- .m(3)
+ .m(4)
.n(8)
- .k(8)
+ .k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
- .k(8)
+ .k(16)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(16)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t n = 1; n <= 16; n++) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t mz = 0; mz < 1; mz++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
- .kr(8)
+ .kr(16)
.sr(1)
.m(1)
.n(16)
.k(k)
.ks(3)
+ .a_offset(83)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(16)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(16)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(16)
+ .kr(16)
+ .sr(1)
+ .m(1)
+ .n(16)
+ .k(16)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) {
+ TEST_REQUIRES_ARM_NEON_DOT;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
.a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmin) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
+ TEST_REQUIRES_ARM_NEON_DOT;
GemmMicrokernelTester()
.mr(1)
- .nr(16)
- .kr(8)
+ .nr(8)
+ .kr(4)
.sr(1)
.m(1)
- .n(16)
+ .n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmax) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
+ TEST_REQUIRES_ARM_NEON_DOT;
GemmMicrokernelTester()
.mr(1)
- .nr(16)
- .kr(8)
+ .nr(8)
+ .kr(4)
.sr(1)
.m(1)
- .n(16)
+ .n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
+ TEST_REQUIRES_ARM_NEON_DOT;
GemmMicrokernelTester()
.mr(1)
- .nr(16)
- .kr(8)
+ .nr(8)
+ .kr(4)
.sr(1)
.m(1)
- .n(16)
+ .n(8)
.k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(8)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t mz = 0; mz < 3; mz++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
@@ -30911,474 +22487,6 @@
#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, a_offset) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, zero) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-
-
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
@@ -32316,39 +23424,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32356,16 +23464,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32373,48 +23481,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32422,34 +23530,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32457,34 +23565,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32492,54 +23600,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32547,54 +23655,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32602,35 +23710,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32639,55 +23747,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
@@ -32696,595 +23804,127 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(127)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 6; mz++) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .a_offset(251)
+ .a_offset(127)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(6)
+ .mr(3)
.nr(16)
.kr(1)
.sr(1)
- .m(6)
+ .m(3)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 6; m++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 6; mz++) {
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(6)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(6)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33292,16 +23932,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33309,48 +23949,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33358,34 +23998,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33393,34 +24033,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33428,54 +24068,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33483,54 +24123,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33538,35 +24178,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33575,55 +24215,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 16; n++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
@@ -33632,89 +24272,89 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -34188,39 +24828,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34228,16 +24868,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34245,48 +24885,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34294,34 +24934,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34329,34 +24969,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34364,54 +25004,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34419,54 +25059,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34474,35 +25114,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34511,55 +25151,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
@@ -34568,127 +25208,127 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(8)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34696,16 +25336,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34713,48 +25353,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34762,34 +25402,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34797,34 +25437,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34832,54 +25472,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34887,54 +25527,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34942,35 +25582,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -34979,55 +25619,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
@@ -35036,556 +25676,88 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(127)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
+ .a_offset(127)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(1)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .ks(3)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t n = 1; n <= 8; n++) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, a_offset) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, zero) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t mz = 0; mz < 4; mz++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT