Add missing generated unit tests to BUILD and CMakeLists.txt.
Also bump qs8-igemm-minmax-rndnu to 3 files as compile time was still too slow.
PiperOrigin-RevId: 421052556
diff --git a/test/qs8-igemm-minmax-rndnu.cc b/test/qs8-igemm-minmax-rndnu.cc
index e8a4c93..5a91090 100644
--- a/test/qs8-igemm-minmax-rndnu.cc
+++ b/test/qs8-igemm-minmax-rndnu.cc
@@ -23,8 +23,8 @@
#include "gemm-microkernel-tester.h"
-#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
+#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -34,10 +34,10 @@
.m(4)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -48,10 +48,10 @@
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
@@ -64,12 +64,12 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
@@ -81,11 +81,11 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
@@ -97,11 +97,11 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
@@ -112,11 +112,11 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -130,13 +130,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
@@ -147,11 +147,11 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -165,13 +165,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
@@ -182,11 +182,11 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -200,13 +200,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -218,12 +218,12 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -236,12 +236,12 @@
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -255,13 +255,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -273,12 +273,12 @@
.m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -291,12 +291,12 @@
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -310,13 +310,13 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -328,11 +328,11 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -347,13 +347,13 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -366,12 +366,12 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -384,12 +384,12 @@
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
@@ -404,13 +404,13 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
@@ -423,11 +423,11 @@
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 4; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
@@ -442,12 +442,12 @@
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -458,10 +458,10 @@
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -472,10 +472,10 @@
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
@@ -486,45 +486,513 @@
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
-#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
+#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8) {
+#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(4)
.nr(8)
- .kr(2)
+ .kr(1)
.sr(1)
- .m(2)
+ .m(4)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(4)
.nr(8)
- .kr(2)
+ .kr(1)
.sr(1)
- .m(2)
+ .m(4)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 4; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(8)
+ .kr(1)
+ .sr(1)
+ .m(4)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -532,16 +1000,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -549,48 +1017,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -598,34 +1066,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -633,34 +1101,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -668,54 +1136,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -723,54 +1191,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -778,35 +1246,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -815,55 +1283,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -872,1961 +1340,1025 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 4; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
.n(8)
.k(k)
.ks(3)
.a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
.a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -3300,475 +2832,7 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -3777,11 +2841,11 @@
.sr(1)
.m(1)
.n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
@@ -3790,12 +2854,12 @@
.sr(1)
.m(1)
.n(16)
- .k(8)
+ .k(16)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
@@ -3806,14 +2870,14 @@
.sr(1)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
@@ -3823,13 +2887,13 @@
.sr(1)
.m(m)
.n(16)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -3839,15 +2903,15 @@
.sr(1)
.m(1)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -3856,13 +2920,13 @@
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -3874,15 +2938,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -3891,13 +2955,13 @@
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -3909,15 +2973,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -3926,13 +2990,13 @@
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -3944,16 +3008,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -3962,15 +3026,15 @@
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -3980,15 +3044,15 @@
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
@@ -3999,16 +3063,16 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -4017,15 +3081,15 @@
.m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -4035,15 +3099,15 @@
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
@@ -4054,15 +3118,15 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -4072,13 +3136,13 @@
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -4091,16 +3155,16 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -4110,15 +3174,15 @@
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -4128,14 +3192,14 @@
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
@@ -4148,15 +3212,15 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
.nr(16)
@@ -4166,539 +3230,71 @@
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
.a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
.a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(2)
.sr(1)
- .m(2)
+ .m(1)
.n(16)
- .k(8)
+ .k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -5172,1410 +3768,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -7512,474 +4704,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -8448,6 +5172,474 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(83)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD4R, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -10320,1878 +7512,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -12660,6 +7980,474 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(4)
+ .sr(2)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -13128,322 +8916,322 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
- .n(8)
+ .n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
+ .m(1)
.n(n)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
@@ -13451,146 +9239,146 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
.m(m)
.n(n)
.k(k)
- .cm_stride(11)
+ .cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
.ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(k)
.ks(3)
- .a_offset(163)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
- .nr(8)
+ .mr(1)
+ .nr(16)
.kr(4)
.sr(2)
- .m(4)
- .n(8)
+ .m(1)
+ .n(16)
.k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -14532,474 +10320,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(4)
- .sr(2)
- .m(3)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -15468,39 +10788,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(16)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15508,16 +10828,16 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15525,48 +10845,48 @@
.n(16)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15574,34 +10894,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15609,34 +10929,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15644,54 +10964,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15699,54 +11019,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15754,35 +11074,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15791,55 +11111,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
@@ -15848,127 +11168,127 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(163)
+ .a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(16)
.kr(4)
.sr(2)
- .m(2)
+ .m(1)
.n(16)
.k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(16)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -15976,16 +11296,16 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -15993,48 +11313,48 @@
.n(16)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16042,34 +11362,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16077,34 +11397,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16112,54 +11432,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16167,54 +11487,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16222,35 +11542,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16259,55 +11579,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
@@ -16316,89 +11636,557 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(331)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .a_offset(251)
+ .a_offset(331)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(4)
.nr(16)
.kr(4)
.sr(2)
- .m(3)
+ .m(4)
.n(16)
.k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(8)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 1; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(43)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(4)
+ .m(1)
+ .n(8)
+ .k(8)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -17808,2809 +13596,469 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(3)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(2)
- .sr(4)
- .m(4)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(2)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(2)
- .sr(4)
- .m(3)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .n(16)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .n(16)
+ .k(16)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
- .n(8)
- .k(8)
+ .n(16)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
.n(n)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(m)
.n(n)
.k(k)
- .cm_stride(11)
+ .cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
- .k(8)
+ .n(16)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
- .k(8)
+ .n(16)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
+ .nr(16)
+ .kr(2)
+ .sr(4)
.m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .n(16)
+ .k(16)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -21552,39 +15000,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21592,16 +15040,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21609,48 +15057,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21658,34 +15106,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21693,34 +15141,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21728,54 +15176,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21783,54 +15231,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21838,35 +15286,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21875,55 +15323,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
@@ -21932,89 +15380,557 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(127)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(127)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(3)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(4)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -22488,1443 +16404,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(251)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(3)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -23932,16 +16444,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -23949,48 +16461,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -23998,34 +16510,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -24033,34 +16545,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -24068,54 +16580,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -24123,54 +16635,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -24178,35 +16690,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -24215,55 +16727,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -24272,89 +16784,89 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -24828,322 +17340,322 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
- .n(8)
+ .n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
.n(n)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
@@ -25151,184 +17663,184 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(k)
- .cm_stride(11)
+ .cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 4; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25336,16 +17848,16 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25353,48 +17865,48 @@
.n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25402,34 +17914,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25437,34 +17949,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25472,54 +17984,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25527,54 +18039,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25582,35 +18094,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25619,55 +18131,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
@@ -25676,89 +18188,89 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
+ .a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(4)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -26232,1041 +18744,105 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(4)
.sr(1)
.m(m)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
.nr(16)
.kr(4)
.sr(1)
.m(m)
.n(16)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27274,34 +18850,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27309,34 +18885,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27344,54 +18920,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27399,54 +18975,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27454,35 +19030,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27491,55 +19067,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
@@ -27548,89 +19124,89 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, a_offset) {
TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(331)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(331)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(16)
.kr(4)
.sr(1)
- .m(1)
+ .m(4)
.n(16)
- .k(8)
+ .k(16)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -28572,1878 +20148,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(4)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -31380,39 +21084,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31420,16 +21124,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31437,48 +21141,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31486,34 +21190,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31521,34 +21225,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31556,54 +21260,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31611,54 +21315,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31666,35 +21370,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31703,55 +21407,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
@@ -31760,1025 +21464,89 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 4; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(4)
.nr(8)
.kr(2)
.sr(1)
- .m(1)
+ .m(4)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(2)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(2)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -33252,39 +22020,507 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(2)
.sr(1)
.m(4)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .n(16)
+ .k(8)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
+ .nr(16)
.kr(2)
.sr(1)
.m(4)
+ .n(16)
+ .k(8)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(16)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(8)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 8; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 9; k < 16; k++) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 16; k <= 80; k += 8) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(n)
+ .k(k)
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 17; n < 32; n++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 32; n <= 48; n += 16) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t n = 1; n <= 16; n++) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(19)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 4; mz++) {
+ for (size_t k = 1; k <= 40; k += 9) {
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(4)
+ .nr(16)
+ .kr(2)
+ .sr(1)
+ .m(4)
+ .n(16)
+ .k(8)
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(1)
+ .nr(8)
+ .kr(2)
+ .sr(1)
+ .m(1)
.n(8)
.k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33292,16 +22528,16 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33309,48 +22545,48 @@
.n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33358,34 +22594,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33393,34 +22629,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33428,54 +22664,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33483,54 +22719,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33538,35 +22774,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33575,55 +22811,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
@@ -33632,410 +22868,410 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(331)
+ .a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(1)
.nr(8)
.kr(2)
.sr(1)
- .m(4)
+ .m(1)
.n(8)
.k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
- .n(16)
+ .n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
.n(n)
.k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
@@ -34043,467 +23279,467 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
- .cm_stride(19)
+ .cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 2; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(2)
- .n(16)
+ .n(8)
.k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
- .n(16)
+ .n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
.n(n)
.k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
@@ -34511,712 +23747,244 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
+ for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
+ for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
+ for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(m)
.n(n)
.k(k)
- .cm_stride(19)
+ .cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
.a_offset(251)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 3; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(k)
.ks(3)
.a_offset(251)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(3)
- .nr(16)
+ .nr(8)
.kr(2)
.sr(1)
.m(3)
- .n(16)
+ .n(8)
.k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(2)
- .sr(1)
- .m(4)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -35224,34 +23992,34 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -35259,34 +24027,34 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -35294,54 +24062,54 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -35349,54 +24117,54 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
@@ -35404,35 +24172,35 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -35440,56 +24208,56 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 2; m++) {
@@ -35497,44 +24265,44 @@
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(k)
.ks(3)
.a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 2; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
@@ -35542,51 +24310,519 @@
.ks(3)
.a_offset(163)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(2)
.nr(8)
- .kr(16)
+ .kr(8)
.sr(1)
.m(2)
.n(8)
.k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
+
+
+#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(8)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(16)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(n)
+ .k(k)
+ .cn_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .ks(3)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 9; n < 16; n++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t n = 16; n <= 24; n += 8) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t n = 1; n <= 8; n++) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(m)
+ .n(n)
+ .k(k)
+ .cm_stride(11)
+ .iterations(1)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 2; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(k)
+ .ks(3)
+ .a_offset(163)
+ .zero_index(mz)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+ }
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmin(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .qmax(128)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ }
+
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
+ GemmMicrokernelTester()
+ .mr(2)
+ .nr(8)
+ .kr(8)
+ .sr(1)
+ .m(2)
+ .n(8)
+ .k(16)
+ .cm_stride(11)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
@@ -36060,474 +25296,6 @@
#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
-
-
-#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -36996,474 +25764,6 @@
#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(4)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
-
-
-#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
TEST_REQUIRES_ARM_NEON_DOT;
GemmMicrokernelTester()
@@ -37932,474 +26232,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(43)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(1)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -38868,39 +26700,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -38908,16 +26740,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -38925,48 +26757,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -38974,34 +26806,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39009,34 +26841,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39044,54 +26876,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39099,54 +26931,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39154,35 +26986,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39191,55 +27023,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39248,127 +27080,127 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(127)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(163)
+ .a_offset(127)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(4)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(4)
+ .m(3)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39376,16 +27208,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39393,48 +27225,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39442,34 +27274,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39477,34 +27309,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39512,54 +27344,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39567,54 +27399,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39622,35 +27454,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39659,55 +27491,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
@@ -39716,127 +27548,127 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(k)
.ks(3)
- .a_offset(127)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(3)
+ .mr(1)
.nr(16)
.kr(8)
.sr(1)
- .m(3)
+ .m(1)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39844,16 +27676,16 @@
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39861,48 +27693,48 @@
.n(8)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39910,34 +27742,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39945,34 +27777,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -39980,54 +27812,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -40035,54 +27867,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -40090,35 +27922,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -40127,55 +27959,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
@@ -40184,1025 +28016,89 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(251)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 3; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(251)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
.kr(8)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(8)
- .sr(1)
- .m(4)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -41676,2811 +28572,939 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(1)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(8)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(8)
- .kr(16)
- .sr(1)
- .m(2)
- .n(8)
- .k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile_m) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
- .n(8)
+ .n(16)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile_n) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
.n(n)
.k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_lt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_lt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 16; k++) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 17; k < 32; k++) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 32; k <= 160; k += 16) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
.n(n)
.k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cn_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
+ for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
+ for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
+ for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(m)
.n(n)
.k(k)
- .cm_stride(11)
+ .cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
.a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, zero) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t mz = 0; mz < 4; mz++) {
for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(k)
.ks(3)
.a_offset(331)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
.mr(4)
- .nr(8)
- .kr(16)
+ .nr(16)
+ .kr(8)
.sr(1)
.m(4)
- .n(8)
+ .n(16)
.k(16)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .cm_stride(19)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 1; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(83)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(1)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(1)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(16)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(n)
- .k(16)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 17; k < 32; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 32; k <= 160; k += 16) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- for (uint32_t m = 1; m <= 2; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
- for (size_t k = 1; k <= 80; k += 17) {
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(2)
- .nr(16)
- .kr(16)
- .sr(1)
- .m(2)
- .n(16)
- .k(16)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .k(16)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
- .k(8)
+ .k(16)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile_m) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(8)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile_n) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(n)
- .k(8)
+ .k(16)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k < 16; k++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 17; k < 32; k++) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 32; k <= 160; k += 16) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 3; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
+ for (uint32_t m = 1; m <= 3; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
.m(m)
.n(n)
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, a_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(251)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t mz = 0; mz < 1; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, zero) {
+ TEST_REQUIRES_ARM_NEON;
+ for (uint32_t mz = 0; mz < 3; mz++) {
+ for (size_t k = 1; k <= 80; k += 17) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(251)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, qmin) {
+ TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
- .k(8)
+ .k(16)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, qmax) {
+ TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
- .k(8)
+ .k(16)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
- TEST_REQUIRES_ARM_NEON_DOT;
+ TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cm) {
+ TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(3)
.nr(8)
- .kr(4)
+ .kr(16)
.sr(1)
- .m(1)
+ .m(3)
.n(8)
- .k(8)
+ .k(16)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
-#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
@@ -45887,474 +30911,6 @@
#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t m = 1; m <= 8; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t m = 1; m <= 8; m++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 8; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 8; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 8; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 8; m++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 8; m++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 8; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 8; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, a_offset) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, zero) {
- TEST_REQUIRES_ARM_NEON_DOT;
- for (uint32_t mz = 0; mz < 8; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(331)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) {
- TEST_REQUIRES_ARM_NEON_DOT;
- GemmMicrokernelTester()
- .mr(8)
- .nr(16)
- .kr(4)
- .sr(1)
- .m(8)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-
-
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
@@ -46824,474 +31380,6 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
@@ -47760,39 +31848,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -47800,16 +31888,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -47817,48 +31905,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -47866,34 +31954,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -47901,34 +31989,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -47936,54 +32024,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -47991,54 +32079,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -48046,35 +32134,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -48083,55 +32171,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 6; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
@@ -48140,1025 +32228,89 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(251)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 6; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(251)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(6)
.nr(16)
.kr(1)
.sr(1)
- .m(2)
+ .m(6)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -49632,39 +32784,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(8)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49672,16 +32824,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49689,48 +32841,48 @@
.n(16)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49738,34 +32890,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49773,34 +32925,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49808,54 +32960,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49863,54 +33015,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(n)
.k(k)
.cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49918,35 +33070,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -49955,55 +33107,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 17; n < 32; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 32; n <= 48; n += 16) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 1; m++) {
+ for (uint32_t m = 1; m <= 2; m++) {
for (uint32_t n = 1; n <= 16; n++) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
@@ -50012,89 +33164,89 @@
.k(k)
.cm_stride(19)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(83)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 1; mz++) {
+ for (uint32_t mz = 0; mz < 2; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(k)
.ks(3)
- .a_offset(43)
+ .a_offset(83)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(1)
+ .mr(2)
.nr(16)
.kr(1)
.sr(1)
- .m(1)
+ .m(2)
.n(16)
.k(8)
.cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -50568,39 +33720,39 @@
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50608,16 +33760,16 @@
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50625,48 +33777,48 @@
.n(8)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(8)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50674,34 +33826,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50709,34 +33861,34 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50744,54 +33896,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50799,54 +33951,54 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(n)
.k(k)
.cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50854,35 +34006,35 @@
.n(n)
.k(k)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50891,55 +34043,55 @@
.k(k)
.ks(3)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
TEST_REQUIRES_ARM_NEON;
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 2; m++) {
+ for (uint32_t m = 1; m <= 1; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
@@ -50948,557 +34100,89 @@
.k(k)
.cm_stride(11)
.iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, a_offset) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, a_offset) {
TEST_REQUIRES_ARM_NEON;
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .a_offset(43)
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, zero) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, zero) {
TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 2; mz++) {
+ for (uint32_t mz = 0; mz < 1; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(k)
.ks(3)
- .a_offset(83)
+ .a_offset(43)
.zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
}
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmin) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, qmin) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmax) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, qmax) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
- TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
+ TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
TEST_REQUIRES_ARM_NEON;
GemmMicrokernelTester()
- .mr(2)
+ .mr(1)
.nr(8)
.kr(1)
.sr(1)
- .m(2)
+ .m(1)
.n(8)
.k(8)
.cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(3)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
+ .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
@@ -51971,484 +34655,16 @@
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(16)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(n)
- .k(k)
- .cn_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 17; n < 32; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t n = 32; n <= 48; n += 16) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 3; m++) {
- for (uint32_t n = 1; n <= 16; n++) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(19)
- .iterations(1)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, zero) {
- TEST_REQUIRES_ARM_NEON;
- for (uint32_t mz = 0; mz < 3; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(k)
- .ks(3)
- .a_offset(127)
- .zero_index(mz)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, qmin) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmin(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, qmax) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .qmax(128)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-
- TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- GemmMicrokernelTester()
- .mr(3)
- .nr(16)
- .kr(1)
- .sr(1)
- .m(3)
- .n(16)
- .k(8)
- .cm_stride(19)
- .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
+#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52457,15 +34673,15 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52475,17 +34691,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52497,16 +34713,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(8)
@@ -52517,16 +34733,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(n)
@@ -52537,16 +34753,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 1; k < 8; k++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52556,18 +34772,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 1; k < 8; k++) {
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52580,16 +34796,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 9; k < 16; k++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52599,18 +34815,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 9; k < 16; k++) {
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52623,16 +34839,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 16; k <= 80; k += 8) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52642,18 +34858,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 16; k <= 80; k += 8) {
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52666,17 +34882,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52687,17 +34903,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52709,18 +34925,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52733,17 +34949,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52754,17 +34970,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(n)
@@ -52776,18 +34992,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52800,16 +35016,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52820,18 +35036,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52845,17 +35061,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 9; n < 16; n++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52867,17 +35083,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t n = 16; n <= 24; n += 8) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52889,18 +35105,18 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 1; k <= 40; k += 9) {
for (uint32_t m = 1; m <= 4; m++) {
for (uint32_t n = 1; n <= 8; n++) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(m)
.n(n)
@@ -52914,16 +35130,16 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, a_offset) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52935,17 +35151,17 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, zero) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
for (uint32_t mz = 0; mz < 4; mz++) {
for (size_t k = 1; k <= 40; k += 9) {
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52959,15 +35175,15 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52977,15 +35193,15 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -52995,15 +35211,15 @@
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
+ TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
+ TEST_REQUIRES_ARM_NEON_DOT;
struct xnn_code_buffer code_buffer;
ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64(&code_buffer));
+ ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64(&code_buffer));
GemmMicrokernelTester()
.mr(4)
.nr(8)
- .kr(1)
+ .kr(4)
.sr(1)
.m(4)
.n(8)
@@ -53012,580 +35228,4 @@
.Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
}
-#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
-
-
-#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cn_stride(11)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(8)
- .k(8)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(8)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 1; k < 8; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 1; k < 8; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 9; k < 16; k++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 9; k < 16; k++) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 16; k <= 80; k += 8) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 16; k <= 80; k += 8) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .cn_stride(11)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(n)
- .k(k)
- .cn_stride(11)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .ks(3)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 9; n < 16; n++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t n = 16; n <= 24; n += 8) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 1; k <= 40; k += 9) {
- for (uint32_t m = 1; m <= 4; m++) {
- for (uint32_t n = 1; n <= 8; n++) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(m)
- .n(n)
- .k(k)
- .cm_stride(11)
- .iterations(1)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- for (uint32_t mz = 0; mz < 4; mz++) {
- for (size_t k = 1; k <= 40; k += 9) {
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(k)
- .ks(3)
- .a_offset(163)
- .zero_index(mz)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- }
- }
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmin(128)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .qmax(128)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-
- TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
- TEST_REQUIRES_ARM_NEON;
- struct xnn_code_buffer code_buffer;
- ASSERT_EQ(xnn_status_success, xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE));
- ASSERT_EQ(xnn_status_success, xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64(&code_buffer));
- GemmMicrokernelTester()
- .mr(4)
- .nr(8)
- .kr(1)
- .sr(1)
- .m(4)
- .n(8)
- .k(8)
- .cm_stride(11)
- .Test(reinterpret_cast<xnn_qs8_igemm_minmax_ukernel_function>(code_buffer.code), xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
- ASSERT_EQ(xnn_status_success, xnn_release_code_memory(&code_buffer));
- }
-#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
+#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT