blob: 0814d757062e152c5b02f37977dd3b55e71f1a29 [file] [log] [blame]
Marat Dukhane903dff2021-07-16 19:43:41 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-gemm-minmax-rndnu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhane903dff2021-07-16 19:43:41 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
27 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
28 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -080029 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -080033 .sr(1)
34 .m(4)
35 .n(8)
36 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -080038 }
39
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080040 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -080042 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080045 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -080046 .sr(1)
47 .m(4)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080051 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -080052 }
53
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080054 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_strided_a) {
55 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -080056 GemmMicrokernelTester()
57 .mr(4)
58 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080059 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -080060 .sr(1)
61 .m(4)
62 .n(8)
63 .k(8)
64 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080065 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -080066 }
67
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080068 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
69 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -080072 GemmMicrokernelTester()
73 .mr(4)
74 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080075 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -080076 .sr(1)
77 .m(m)
78 .n(n)
79 .k(8)
80 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080081 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -080082 }
83 }
84 }
85
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080086 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
87 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -080088 for (uint32_t m = 1; m <= 4; m++) {
89 GemmMicrokernelTester()
90 .mr(4)
91 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080092 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -080093 .sr(1)
94 .m(m)
95 .n(8)
96 .k(8)
97 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080098 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -080099 }
100 }
101
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800102 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
103 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(4)
107 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800108 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800109 .sr(1)
110 .m(4)
111 .n(n)
112 .k(8)
113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800115 }
116 }
117
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800118 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
119 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800120 for (size_t k = 1; k < 8; k++) {
121 GemmMicrokernelTester()
122 .mr(4)
123 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800124 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800125 .sr(1)
126 .m(4)
127 .n(8)
128 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800130 }
131 }
132
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800133 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_strided_a) {
134 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800135 for (size_t k = 1; k < 8; k++) {
136 GemmMicrokernelTester()
137 .mr(4)
138 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800139 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800140 .sr(1)
141 .m(4)
142 .n(8)
143 .k(k)
144 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800146 }
147 }
148
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800149 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
150 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800151 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800152 for (uint32_t n = 1; n <= 8; n++) {
153 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -0800154 GemmMicrokernelTester()
155 .mr(4)
156 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800157 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800164 }
165 }
166 }
167 }
168
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800169 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
170 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800171 for (size_t k = 9; k < 16; k++) {
172 GemmMicrokernelTester()
173 .mr(4)
174 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800175 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800176 .sr(1)
177 .m(4)
178 .n(8)
179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800181 }
182 }
183
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800184 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_strided_a) {
185 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800186 for (size_t k = 9; k < 16; k++) {
187 GemmMicrokernelTester()
188 .mr(4)
189 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800190 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800191 .sr(1)
192 .m(4)
193 .n(8)
194 .k(k)
195 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800197 }
198 }
199
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800200 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
201 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800202 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800203 for (uint32_t n = 1; n <= 8; n++) {
204 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -0800205 GemmMicrokernelTester()
206 .mr(4)
207 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800208 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800215 }
216 }
217 }
218 }
219
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800220 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
221 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800222 for (size_t k = 16; k <= 80; k += 8) {
223 GemmMicrokernelTester()
224 .mr(4)
225 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800226 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800227 .sr(1)
228 .m(4)
229 .n(8)
230 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800232 }
233 }
234
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800235 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_strided_a) {
236 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800237 for (size_t k = 16; k <= 80; k += 8) {
238 GemmMicrokernelTester()
239 .mr(4)
240 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800241 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800242 .sr(1)
243 .m(4)
244 .n(8)
245 .k(k)
246 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800248 }
249 }
250
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
252 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800253 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -0800256 GemmMicrokernelTester()
257 .mr(4)
258 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800259 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800266 }
267 }
268 }
269 }
270
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800271 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
272 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 40; k += 9) {
275 GemmMicrokernelTester()
276 .mr(4)
277 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800278 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800279 .sr(1)
280 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800282 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800284 }
285 }
286 }
287
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800288 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
289 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800290 for (uint32_t n = 9; n < 16; n++) {
291 for (size_t k = 1; k <= 40; k += 9) {
292 GemmMicrokernelTester()
293 .mr(4)
294 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800295 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800296 .sr(1)
297 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800298 .n(n)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800299 .k(k)
300 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800302 }
303 }
304 }
305
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800306 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_a) {
307 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800308 for (uint32_t n = 9; n < 16; n++) {
309 for (size_t k = 1; k <= 40; k += 9) {
310 GemmMicrokernelTester()
311 .mr(4)
312 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800313 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800314 .sr(1)
315 .m(4)
316 .n(n)
317 .k(k)
318 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800320 }
321 }
322 }
323
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800324 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
325 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800326 for (uint32_t n = 9; n < 16; n++) {
327 for (size_t k = 1; k <= 40; k += 9) {
328 for (uint32_t m = 1; m <= 4; m++) {
329 GemmMicrokernelTester()
330 .mr(4)
331 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800332 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800339 }
340 }
341 }
342 }
343
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800344 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
345 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800346 for (uint32_t n = 16; n <= 24; n += 8) {
347 for (size_t k = 1; k <= 40; k += 9) {
348 GemmMicrokernelTester()
349 .mr(4)
350 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800351 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800352 .sr(1)
353 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800354 .n(n)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800355 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800356 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800357 }
358 }
359 }
360
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800361 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
362 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800363 for (uint32_t n = 16; n <= 24; n += 8) {
364 for (size_t k = 1; k <= 40; k += 9) {
365 GemmMicrokernelTester()
366 .mr(4)
367 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800368 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800369 .sr(1)
370 .m(4)
371 .n(n)
372 .k(k)
373 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800374 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800375 }
376 }
377 }
378
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800379 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_a) {
380 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800381 for (uint32_t n = 16; n <= 24; n += 8) {
382 for (size_t k = 1; k <= 40; k += 9) {
383 GemmMicrokernelTester()
384 .mr(4)
385 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800386 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800387 .sr(1)
388 .m(4)
389 .n(n)
390 .k(k)
391 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800393 }
394 }
395 }
396
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800397 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
398 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800399 for (uint32_t n = 16; n <= 24; n += 8) {
400 for (size_t k = 1; k <= 40; k += 9) {
401 for (uint32_t m = 1; m <= 4; m++) {
402 GemmMicrokernelTester()
403 .mr(4)
404 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800405 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800411 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800412 }
413 }
414 }
415 }
416
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800417 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800419 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800420 for (uint32_t n = 1; n <= 8; n++) {
421 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -0800422 GemmMicrokernelTester()
423 .mr(4)
424 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800425 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(11)
431 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800432 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800433 }
434 }
435 }
436 }
437
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800438 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
439 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800440 GemmMicrokernelTester()
441 .mr(4)
442 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800443 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800444 .sr(1)
445 .m(4)
446 .n(8)
447 .k(8)
448 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800450 }
451
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800452 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
453 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800454 GemmMicrokernelTester()
455 .mr(4)
456 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800457 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800458 .sr(1)
459 .m(4)
460 .n(8)
461 .k(8)
462 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800463 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800464 }
465
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800466 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
467 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchardda7b2e22021-12-13 23:50:53 -0800468 GemmMicrokernelTester()
469 .mr(4)
470 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800471 .kr(4)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800472 .sr(1)
473 .m(4)
474 .n(8)
475 .k(8)
476 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800477 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800478 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800479#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
Frank Barchardda7b2e22021-12-13 23:50:53 -0800480
481
Frank Barcharde31f29e2021-12-21 15:57:10 -0800482#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchard9e4d2aa2022-02-02 00:31:21 -0800483 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
484 TEST_REQUIRES_ARM_NEON;
485 GemmMicrokernelTester()
486 .mr(4)
487 .nr(8)
488 .kr(1)
489 .sr(1)
490 .m(4)
491 .n(8)
492 .k(8)
493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
494 }
495
496 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
497 TEST_REQUIRES_ARM_NEON;
498 GemmMicrokernelTester()
499 .mr(4)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(4)
504 .n(8)
505 .k(8)
506 .cn_stride(11)
507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
508 }
509
510 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
511 TEST_REQUIRES_ARM_NEON;
512 GemmMicrokernelTester()
513 .mr(4)
514 .nr(8)
515 .kr(1)
516 .sr(1)
517 .m(4)
518 .n(8)
519 .k(8)
520 .a_stride(11)
521 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
522 }
523
524 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
525 TEST_REQUIRES_ARM_NEON;
526 for (uint32_t n = 1; n <= 8; n++) {
527 for (uint32_t m = 1; m <= 4; m++) {
528 GemmMicrokernelTester()
529 .mr(4)
530 .nr(8)
531 .kr(1)
532 .sr(1)
533 .m(m)
534 .n(n)
535 .k(8)
536 .iterations(1)
537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
538 }
539 }
540 }
541
542 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
543 TEST_REQUIRES_ARM_NEON;
544 for (uint32_t m = 1; m <= 4; m++) {
545 GemmMicrokernelTester()
546 .mr(4)
547 .nr(8)
548 .kr(1)
549 .sr(1)
550 .m(m)
551 .n(8)
552 .k(8)
553 .iterations(1)
554 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
555 }
556 }
557
558 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
559 TEST_REQUIRES_ARM_NEON;
560 for (uint32_t n = 1; n <= 8; n++) {
561 GemmMicrokernelTester()
562 .mr(4)
563 .nr(8)
564 .kr(1)
565 .sr(1)
566 .m(4)
567 .n(n)
568 .k(8)
569 .iterations(1)
570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
571 }
572 }
573
574 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
575 TEST_REQUIRES_ARM_NEON;
576 for (size_t k = 1; k < 8; k++) {
577 GemmMicrokernelTester()
578 .mr(4)
579 .nr(8)
580 .kr(1)
581 .sr(1)
582 .m(4)
583 .n(8)
584 .k(k)
585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
586 }
587 }
588
589 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
590 TEST_REQUIRES_ARM_NEON;
591 for (size_t k = 1; k < 8; k++) {
592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(4)
598 .n(8)
599 .k(k)
600 .a_stride(11)
601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
602 }
603 }
604
605 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
606 TEST_REQUIRES_ARM_NEON;
607 for (size_t k = 1; k < 8; k++) {
608 for (uint32_t n = 1; n <= 8; n++) {
609 for (uint32_t m = 1; m <= 4; m++) {
610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(m)
616 .n(n)
617 .k(k)
618 .iterations(1)
619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
620 }
621 }
622 }
623 }
624
625 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
626 TEST_REQUIRES_ARM_NEON;
627 for (size_t k = 9; k < 16; k++) {
628 GemmMicrokernelTester()
629 .mr(4)
630 .nr(8)
631 .kr(1)
632 .sr(1)
633 .m(4)
634 .n(8)
635 .k(k)
636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
637 }
638 }
639
640 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
641 TEST_REQUIRES_ARM_NEON;
642 for (size_t k = 9; k < 16; k++) {
643 GemmMicrokernelTester()
644 .mr(4)
645 .nr(8)
646 .kr(1)
647 .sr(1)
648 .m(4)
649 .n(8)
650 .k(k)
651 .a_stride(19)
652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
653 }
654 }
655
656 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
657 TEST_REQUIRES_ARM_NEON;
658 for (size_t k = 9; k < 16; k++) {
659 for (uint32_t n = 1; n <= 8; n++) {
660 for (uint32_t m = 1; m <= 4; m++) {
661 GemmMicrokernelTester()
662 .mr(4)
663 .nr(8)
664 .kr(1)
665 .sr(1)
666 .m(m)
667 .n(n)
668 .k(k)
669 .iterations(1)
670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
671 }
672 }
673 }
674 }
675
676 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
677 TEST_REQUIRES_ARM_NEON;
678 for (size_t k = 16; k <= 80; k += 8) {
679 GemmMicrokernelTester()
680 .mr(4)
681 .nr(8)
682 .kr(1)
683 .sr(1)
684 .m(4)
685 .n(8)
686 .k(k)
687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
688 }
689 }
690
691 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
692 TEST_REQUIRES_ARM_NEON;
693 for (size_t k = 16; k <= 80; k += 8) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(8)
697 .kr(1)
698 .sr(1)
699 .m(4)
700 .n(8)
701 .k(k)
702 .a_stride(83)
703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
704 }
705 }
706
707 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
708 TEST_REQUIRES_ARM_NEON;
709 for (size_t k = 16; k <= 80; k += 8) {
710 for (uint32_t n = 1; n <= 8; n++) {
711 for (uint32_t m = 1; m <= 4; m++) {
712 GemmMicrokernelTester()
713 .mr(4)
714 .nr(8)
715 .kr(1)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
722 }
723 }
724 }
725 }
726
727 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8) {
728 TEST_REQUIRES_ARM_NEON;
729 for (uint32_t n = 9; n < 16; n++) {
730 for (size_t k = 1; k <= 40; k += 9) {
731 GemmMicrokernelTester()
732 .mr(4)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(4)
737 .n(n)
738 .k(k)
739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
740 }
741 }
742 }
743
744 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) {
745 TEST_REQUIRES_ARM_NEON;
746 for (uint32_t n = 9; n < 16; n++) {
747 for (size_t k = 1; k <= 40; k += 9) {
748 GemmMicrokernelTester()
749 .mr(4)
750 .nr(8)
751 .kr(1)
752 .sr(1)
753 .m(4)
754 .n(n)
755 .k(k)
756 .cn_stride(11)
757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
758 }
759 }
760 }
761
762 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_a) {
763 TEST_REQUIRES_ARM_NEON;
764 for (uint32_t n = 9; n < 16; n++) {
765 for (size_t k = 1; k <= 40; k += 9) {
766 GemmMicrokernelTester()
767 .mr(4)
768 .nr(8)
769 .kr(1)
770 .sr(1)
771 .m(4)
772 .n(n)
773 .k(k)
774 .a_stride(43)
775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
776 }
777 }
778 }
779
780 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) {
781 TEST_REQUIRES_ARM_NEON;
782 for (uint32_t n = 9; n < 16; n++) {
783 for (size_t k = 1; k <= 40; k += 9) {
784 for (uint32_t m = 1; m <= 4; m++) {
785 GemmMicrokernelTester()
786 .mr(4)
787 .nr(8)
788 .kr(1)
789 .sr(1)
790 .m(m)
791 .n(n)
792 .k(k)
793 .iterations(1)
794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
795 }
796 }
797 }
798 }
799
800 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8) {
801 TEST_REQUIRES_ARM_NEON;
802 for (uint32_t n = 16; n <= 24; n += 8) {
803 for (size_t k = 1; k <= 40; k += 9) {
804 GemmMicrokernelTester()
805 .mr(4)
806 .nr(8)
807 .kr(1)
808 .sr(1)
809 .m(4)
810 .n(n)
811 .k(k)
812 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
813 }
814 }
815 }
816
817 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) {
818 TEST_REQUIRES_ARM_NEON;
819 for (uint32_t n = 16; n <= 24; n += 8) {
820 for (size_t k = 1; k <= 40; k += 9) {
821 GemmMicrokernelTester()
822 .mr(4)
823 .nr(8)
824 .kr(1)
825 .sr(1)
826 .m(4)
827 .n(n)
828 .k(k)
829 .cn_stride(11)
830 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
831 }
832 }
833 }
834
835 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_a) {
836 TEST_REQUIRES_ARM_NEON;
837 for (uint32_t n = 16; n <= 24; n += 8) {
838 for (size_t k = 1; k <= 40; k += 9) {
839 GemmMicrokernelTester()
840 .mr(4)
841 .nr(8)
842 .kr(1)
843 .sr(1)
844 .m(4)
845 .n(n)
846 .k(k)
847 .a_stride(43)
848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
849 }
850 }
851 }
852
853 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_subtile) {
854 TEST_REQUIRES_ARM_NEON;
855 for (uint32_t n = 16; n <= 24; n += 8) {
856 for (size_t k = 1; k <= 40; k += 9) {
857 for (uint32_t m = 1; m <= 4; m++) {
858 GemmMicrokernelTester()
859 .mr(4)
860 .nr(8)
861 .kr(1)
862 .sr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .iterations(1)
867 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
868 }
869 }
870 }
871 }
872
873 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
874 TEST_REQUIRES_ARM_NEON;
875 for (size_t k = 1; k <= 40; k += 9) {
876 for (uint32_t n = 1; n <= 8; n++) {
877 for (uint32_t m = 1; m <= 4; m++) {
878 GemmMicrokernelTester()
879 .mr(4)
880 .nr(8)
881 .kr(1)
882 .sr(1)
883 .m(m)
884 .n(n)
885 .k(k)
886 .cm_stride(11)
887 .iterations(1)
888 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
889 }
890 }
891 }
892 }
893
894 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmin) {
895 TEST_REQUIRES_ARM_NEON;
896 GemmMicrokernelTester()
897 .mr(4)
898 .nr(8)
899 .kr(1)
900 .sr(1)
901 .m(4)
902 .n(8)
903 .k(8)
904 .qmin(128)
905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
906 }
907
908 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmax) {
909 TEST_REQUIRES_ARM_NEON;
910 GemmMicrokernelTester()
911 .mr(4)
912 .nr(8)
913 .kr(1)
914 .sr(1)
915 .m(4)
916 .n(8)
917 .k(8)
918 .qmax(128)
919 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
920 }
921
922 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
923 TEST_REQUIRES_ARM_NEON;
924 GemmMicrokernelTester()
925 .mr(4)
926 .nr(8)
927 .kr(1)
928 .sr(1)
929 .m(4)
930 .n(8)
931 .k(8)
932 .cm_stride(11)
933 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
934 }
935#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
936
937
938#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchardda7b2e22021-12-13 23:50:53 -0800939 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
940 TEST_REQUIRES_ARM_NEON;
941 GemmMicrokernelTester()
942 .mr(4)
943 .nr(8)
944 .kr(1)
945 .sr(1)
946 .m(4)
947 .n(8)
948 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800950 }
951
952 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
953 TEST_REQUIRES_ARM_NEON;
954 GemmMicrokernelTester()
955 .mr(4)
956 .nr(8)
957 .kr(1)
958 .sr(1)
959 .m(4)
960 .n(8)
961 .k(8)
962 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800964 }
965
966 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
967 TEST_REQUIRES_ARM_NEON;
968 GemmMicrokernelTester()
969 .mr(4)
970 .nr(8)
971 .kr(1)
972 .sr(1)
973 .m(4)
974 .n(8)
975 .k(8)
976 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800977 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800978 }
979
980 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
981 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800982 for (uint32_t n = 1; n <= 8; n++) {
983 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -0800984 GemmMicrokernelTester()
985 .mr(4)
986 .nr(8)
987 .kr(1)
988 .sr(1)
989 .m(m)
990 .n(n)
991 .k(8)
992 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -0800994 }
995 }
996 }
997
998 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
999 TEST_REQUIRES_ARM_NEON;
1000 for (uint32_t m = 1; m <= 4; m++) {
1001 GemmMicrokernelTester()
1002 .mr(4)
1003 .nr(8)
1004 .kr(1)
1005 .sr(1)
1006 .m(m)
1007 .n(8)
1008 .k(8)
1009 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001010 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001011 }
1012 }
1013
1014 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
1015 TEST_REQUIRES_ARM_NEON;
1016 for (uint32_t n = 1; n <= 8; n++) {
1017 GemmMicrokernelTester()
1018 .mr(4)
1019 .nr(8)
1020 .kr(1)
1021 .sr(1)
1022 .m(4)
1023 .n(n)
1024 .k(8)
1025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001027 }
1028 }
1029
1030 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
1031 TEST_REQUIRES_ARM_NEON;
1032 for (size_t k = 1; k < 8; k++) {
1033 GemmMicrokernelTester()
1034 .mr(4)
1035 .nr(8)
1036 .kr(1)
1037 .sr(1)
1038 .m(4)
1039 .n(8)
1040 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001042 }
1043 }
1044
1045 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
1046 TEST_REQUIRES_ARM_NEON;
1047 for (size_t k = 1; k < 8; k++) {
1048 GemmMicrokernelTester()
1049 .mr(4)
1050 .nr(8)
1051 .kr(1)
1052 .sr(1)
1053 .m(4)
1054 .n(8)
1055 .k(k)
1056 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001058 }
1059 }
1060
1061 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
1062 TEST_REQUIRES_ARM_NEON;
1063 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001064 for (uint32_t n = 1; n <= 8; n++) {
1065 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -08001066 GemmMicrokernelTester()
1067 .mr(4)
1068 .nr(8)
1069 .kr(1)
1070 .sr(1)
1071 .m(m)
1072 .n(n)
1073 .k(k)
1074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001076 }
1077 }
1078 }
1079 }
1080
1081 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
1082 TEST_REQUIRES_ARM_NEON;
1083 for (size_t k = 9; k < 16; k++) {
1084 GemmMicrokernelTester()
1085 .mr(4)
1086 .nr(8)
1087 .kr(1)
1088 .sr(1)
1089 .m(4)
1090 .n(8)
1091 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001093 }
1094 }
1095
1096 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
1097 TEST_REQUIRES_ARM_NEON;
1098 for (size_t k = 9; k < 16; k++) {
1099 GemmMicrokernelTester()
1100 .mr(4)
1101 .nr(8)
1102 .kr(1)
1103 .sr(1)
1104 .m(4)
1105 .n(8)
1106 .k(k)
1107 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001109 }
1110 }
1111
1112 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
1113 TEST_REQUIRES_ARM_NEON;
1114 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001115 for (uint32_t n = 1; n <= 8; n++) {
1116 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -08001117 GemmMicrokernelTester()
1118 .mr(4)
1119 .nr(8)
1120 .kr(1)
1121 .sr(1)
1122 .m(m)
1123 .n(n)
1124 .k(k)
1125 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001127 }
1128 }
1129 }
1130 }
1131
1132 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
1133 TEST_REQUIRES_ARM_NEON;
1134 for (size_t k = 16; k <= 80; k += 8) {
1135 GemmMicrokernelTester()
1136 .mr(4)
1137 .nr(8)
1138 .kr(1)
1139 .sr(1)
1140 .m(4)
1141 .n(8)
1142 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001144 }
1145 }
1146
1147 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
1148 TEST_REQUIRES_ARM_NEON;
1149 for (size_t k = 16; k <= 80; k += 8) {
1150 GemmMicrokernelTester()
1151 .mr(4)
1152 .nr(8)
1153 .kr(1)
1154 .sr(1)
1155 .m(4)
1156 .n(8)
1157 .k(k)
1158 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001160 }
1161 }
1162
1163 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
1164 TEST_REQUIRES_ARM_NEON;
1165 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001166 for (uint32_t n = 1; n <= 8; n++) {
1167 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -08001168 GemmMicrokernelTester()
1169 .mr(4)
1170 .nr(8)
1171 .kr(1)
1172 .sr(1)
1173 .m(m)
1174 .n(n)
1175 .k(k)
1176 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001178 }
1179 }
1180 }
1181 }
1182
1183 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
1184 TEST_REQUIRES_ARM_NEON;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 40; k += 9) {
1187 GemmMicrokernelTester()
1188 .mr(4)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001193 .n(n)
Frank Barchardda7b2e22021-12-13 23:50:53 -08001194 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001196 }
1197 }
1198 }
1199
1200 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
1201 TEST_REQUIRES_ARM_NEON;
1202 for (uint32_t n = 9; n < 16; n++) {
1203 for (size_t k = 1; k <= 40; k += 9) {
1204 GemmMicrokernelTester()
1205 .mr(4)
1206 .nr(8)
1207 .kr(1)
1208 .sr(1)
1209 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Frank Barchardda7b2e22021-12-13 23:50:53 -08001211 .k(k)
1212 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001214 }
1215 }
1216 }
1217
1218 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_a) {
1219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t n = 9; n < 16; n++) {
1221 for (size_t k = 1; k <= 40; k += 9) {
1222 GemmMicrokernelTester()
1223 .mr(4)
1224 .nr(8)
1225 .kr(1)
1226 .sr(1)
1227 .m(4)
1228 .n(n)
1229 .k(k)
1230 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001232 }
1233 }
1234 }
1235
1236 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (uint32_t n = 9; n < 16; n++) {
1239 for (size_t k = 1; k <= 40; k += 9) {
1240 for (uint32_t m = 1; m <= 4; m++) {
1241 GemmMicrokernelTester()
1242 .mr(4)
1243 .nr(8)
1244 .kr(1)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001251 }
1252 }
1253 }
1254 }
1255
1256 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
1257 TEST_REQUIRES_ARM_NEON;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 40; k += 9) {
1260 GemmMicrokernelTester()
1261 .mr(4)
1262 .nr(8)
1263 .kr(1)
1264 .sr(1)
1265 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001266 .n(n)
Frank Barchardda7b2e22021-12-13 23:50:53 -08001267 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001268 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001269 }
1270 }
1271 }
1272
1273 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
1274 TEST_REQUIRES_ARM_NEON;
1275 for (uint32_t n = 16; n <= 24; n += 8) {
1276 for (size_t k = 1; k <= 40; k += 9) {
1277 GemmMicrokernelTester()
1278 .mr(4)
1279 .nr(8)
1280 .kr(1)
1281 .sr(1)
1282 .m(4)
1283 .n(n)
1284 .k(k)
1285 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001286 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001287 }
1288 }
1289 }
1290
1291 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_a) {
1292 TEST_REQUIRES_ARM_NEON;
1293 for (uint32_t n = 16; n <= 24; n += 8) {
1294 for (size_t k = 1; k <= 40; k += 9) {
1295 GemmMicrokernelTester()
1296 .mr(4)
1297 .nr(8)
1298 .kr(1)
1299 .sr(1)
1300 .m(4)
1301 .n(n)
1302 .k(k)
1303 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001305 }
1306 }
1307 }
1308
1309 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
1310 TEST_REQUIRES_ARM_NEON;
1311 for (uint32_t n = 16; n <= 24; n += 8) {
1312 for (size_t k = 1; k <= 40; k += 9) {
1313 for (uint32_t m = 1; m <= 4; m++) {
1314 GemmMicrokernelTester()
1315 .mr(4)
1316 .nr(8)
1317 .kr(1)
1318 .sr(1)
1319 .m(m)
1320 .n(n)
1321 .k(k)
1322 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001323 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001324 }
1325 }
1326 }
1327 }
1328
1329 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardda7b2e22021-12-13 23:50:53 -08001334 GemmMicrokernelTester()
1335 .mr(4)
1336 .nr(8)
1337 .kr(1)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001344 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001345 }
1346 }
1347 }
1348 }
1349
1350 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
1351 TEST_REQUIRES_ARM_NEON;
1352 GemmMicrokernelTester()
1353 .mr(4)
1354 .nr(8)
1355 .kr(1)
1356 .sr(1)
1357 .m(4)
1358 .n(8)
1359 .k(8)
1360 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001362 }
1363
1364 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
1365 TEST_REQUIRES_ARM_NEON;
1366 GemmMicrokernelTester()
1367 .mr(4)
1368 .nr(8)
1369 .kr(1)
1370 .sr(1)
1371 .m(4)
1372 .n(8)
1373 .k(8)
1374 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001375 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001376 }
1377
1378 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
1379 TEST_REQUIRES_ARM_NEON;
1380 GemmMicrokernelTester()
1381 .mr(4)
1382 .nr(8)
1383 .kr(1)
1384 .sr(1)
1385 .m(4)
1386 .n(8)
1387 .k(8)
1388 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001389 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardda7b2e22021-12-13 23:50:53 -08001390 }
Frank Barcharde31f29e2021-12-21 15:57:10 -08001391#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchardda7b2e22021-12-13 23:50:53 -08001392
1393
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001394#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1395 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08001396 TEST_REQUIRES_ARM_NEON;
1397 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001398 .mr(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08001399 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001400 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08001401 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001402 .m(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08001403 .n(8)
1404 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08001406 }
1407
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001408 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cn) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08001409 TEST_REQUIRES_ARM_NEON;
1410 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001411 .mr(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08001412 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001413 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08001414 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001415 .m(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08001416 .n(8)
1417 .k(8)
1418 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08001420 }
1421
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001422 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_strided_a) {
1423 TEST_REQUIRES_ARM_NEON;
1424 GemmMicrokernelTester()
1425 .mr(3)
1426 .nr(8)
1427 .kr(2)
1428 .sr(1)
1429 .m(3)
1430 .n(8)
1431 .k(8)
1432 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001433 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001434 }
1435
1436 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
1437 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001438 for (uint32_t n = 1; n <= 8; n++) {
1439 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001440 GemmMicrokernelTester()
1441 .mr(3)
1442 .nr(8)
1443 .kr(2)
1444 .sr(1)
1445 .m(m)
1446 .n(n)
1447 .k(8)
1448 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001450 }
1451 }
1452 }
1453
1454 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
1455 TEST_REQUIRES_ARM_NEON;
1456 for (uint32_t m = 1; m <= 3; m++) {
1457 GemmMicrokernelTester()
1458 .mr(3)
1459 .nr(8)
1460 .kr(2)
1461 .sr(1)
1462 .m(m)
1463 .n(8)
1464 .k(8)
1465 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001466 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001467 }
1468 }
1469
1470 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
1471 TEST_REQUIRES_ARM_NEON;
1472 for (uint32_t n = 1; n <= 8; n++) {
1473 GemmMicrokernelTester()
1474 .mr(3)
1475 .nr(8)
1476 .kr(2)
1477 .sr(1)
1478 .m(3)
1479 .n(n)
1480 .k(8)
1481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001483 }
1484 }
1485
1486 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8) {
1487 TEST_REQUIRES_ARM_NEON;
1488 for (size_t k = 1; k < 8; k++) {
1489 GemmMicrokernelTester()
1490 .mr(3)
1491 .nr(8)
1492 .kr(2)
1493 .sr(1)
1494 .m(3)
1495 .n(8)
1496 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001498 }
1499 }
1500
1501 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8_strided_a) {
1502 TEST_REQUIRES_ARM_NEON;
1503 for (size_t k = 1; k < 8; k++) {
1504 GemmMicrokernelTester()
1505 .mr(3)
1506 .nr(8)
1507 .kr(2)
1508 .sr(1)
1509 .m(3)
1510 .n(8)
1511 .k(k)
1512 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001514 }
1515 }
1516
1517 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
1518 TEST_REQUIRES_ARM_NEON;
1519 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001520 for (uint32_t n = 1; n <= 8; n++) {
1521 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001522 GemmMicrokernelTester()
1523 .mr(3)
1524 .nr(8)
1525 .kr(2)
1526 .sr(1)
1527 .m(m)
1528 .n(n)
1529 .k(k)
1530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001532 }
1533 }
1534 }
1535 }
1536
1537 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8) {
1538 TEST_REQUIRES_ARM_NEON;
1539 for (size_t k = 9; k < 16; k++) {
1540 GemmMicrokernelTester()
1541 .mr(3)
1542 .nr(8)
1543 .kr(2)
1544 .sr(1)
1545 .m(3)
1546 .n(8)
1547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001549 }
1550 }
1551
1552 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8_strided_a) {
1553 TEST_REQUIRES_ARM_NEON;
1554 for (size_t k = 9; k < 16; k++) {
1555 GemmMicrokernelTester()
1556 .mr(3)
1557 .nr(8)
1558 .kr(2)
1559 .sr(1)
1560 .m(3)
1561 .n(8)
1562 .k(k)
1563 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001565 }
1566 }
1567
1568 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
1569 TEST_REQUIRES_ARM_NEON;
1570 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001571 for (uint32_t n = 1; n <= 8; n++) {
1572 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001573 GemmMicrokernelTester()
1574 .mr(3)
1575 .nr(8)
1576 .kr(2)
1577 .sr(1)
1578 .m(m)
1579 .n(n)
1580 .k(k)
1581 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001583 }
1584 }
1585 }
1586 }
1587
1588 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8) {
1589 TEST_REQUIRES_ARM_NEON;
1590 for (size_t k = 16; k <= 80; k += 8) {
1591 GemmMicrokernelTester()
1592 .mr(3)
1593 .nr(8)
1594 .kr(2)
1595 .sr(1)
1596 .m(3)
1597 .n(8)
1598 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001600 }
1601 }
1602
1603 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8_strided_a) {
1604 TEST_REQUIRES_ARM_NEON;
1605 for (size_t k = 16; k <= 80; k += 8) {
1606 GemmMicrokernelTester()
1607 .mr(3)
1608 .nr(8)
1609 .kr(2)
1610 .sr(1)
1611 .m(3)
1612 .n(8)
1613 .k(k)
1614 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001616 }
1617 }
1618
1619 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
1620 TEST_REQUIRES_ARM_NEON;
1621 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001622 for (uint32_t n = 1; n <= 8; n++) {
1623 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001624 GemmMicrokernelTester()
1625 .mr(3)
1626 .nr(8)
1627 .kr(2)
1628 .sr(1)
1629 .m(m)
1630 .n(n)
1631 .k(k)
1632 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001634 }
1635 }
1636 }
1637 }
1638
1639 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8) {
1640 TEST_REQUIRES_ARM_NEON;
1641 for (uint32_t n = 9; n < 16; n++) {
1642 for (size_t k = 1; k <= 40; k += 9) {
1643 GemmMicrokernelTester()
1644 .mr(3)
1645 .nr(8)
1646 .kr(2)
1647 .sr(1)
1648 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001649 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001650 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001652 }
1653 }
1654 }
1655
1656 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
1657 TEST_REQUIRES_ARM_NEON;
1658 for (uint32_t n = 9; n < 16; n++) {
1659 for (size_t k = 1; k <= 40; k += 9) {
1660 GemmMicrokernelTester()
1661 .mr(3)
1662 .nr(8)
1663 .kr(2)
1664 .sr(1)
1665 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001666 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001667 .k(k)
1668 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001670 }
1671 }
1672 }
1673
1674 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_strided_a) {
1675 TEST_REQUIRES_ARM_NEON;
1676 for (uint32_t n = 9; n < 16; n++) {
1677 for (size_t k = 1; k <= 40; k += 9) {
1678 GemmMicrokernelTester()
1679 .mr(3)
1680 .nr(8)
1681 .kr(2)
1682 .sr(1)
1683 .m(3)
1684 .n(n)
1685 .k(k)
1686 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001688 }
1689 }
1690 }
1691
1692 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
1693 TEST_REQUIRES_ARM_NEON;
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 40; k += 9) {
1696 for (uint32_t m = 1; m <= 3; m++) {
1697 GemmMicrokernelTester()
1698 .mr(3)
1699 .nr(8)
1700 .kr(2)
1701 .sr(1)
1702 .m(m)
1703 .n(n)
1704 .k(k)
1705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001707 }
1708 }
1709 }
1710 }
1711
1712 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8) {
1713 TEST_REQUIRES_ARM_NEON;
1714 for (uint32_t n = 16; n <= 24; n += 8) {
1715 for (size_t k = 1; k <= 40; k += 9) {
1716 GemmMicrokernelTester()
1717 .mr(3)
1718 .nr(8)
1719 .kr(2)
1720 .sr(1)
1721 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001722 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001723 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001724 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001725 }
1726 }
1727 }
1728
1729 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
1730 TEST_REQUIRES_ARM_NEON;
1731 for (uint32_t n = 16; n <= 24; n += 8) {
1732 for (size_t k = 1; k <= 40; k += 9) {
1733 GemmMicrokernelTester()
1734 .mr(3)
1735 .nr(8)
1736 .kr(2)
1737 .sr(1)
1738 .m(3)
1739 .n(n)
1740 .k(k)
1741 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001742 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001743 }
1744 }
1745 }
1746
1747 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_strided_a) {
1748 TEST_REQUIRES_ARM_NEON;
1749 for (uint32_t n = 16; n <= 24; n += 8) {
1750 for (size_t k = 1; k <= 40; k += 9) {
1751 GemmMicrokernelTester()
1752 .mr(3)
1753 .nr(8)
1754 .kr(2)
1755 .sr(1)
1756 .m(3)
1757 .n(n)
1758 .k(k)
1759 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001761 }
1762 }
1763 }
1764
1765 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
1766 TEST_REQUIRES_ARM_NEON;
1767 for (uint32_t n = 16; n <= 24; n += 8) {
1768 for (size_t k = 1; k <= 40; k += 9) {
1769 for (uint32_t m = 1; m <= 3; m++) {
1770 GemmMicrokernelTester()
1771 .mr(3)
1772 .nr(8)
1773 .kr(2)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001779 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001780 }
1781 }
1782 }
1783 }
1784
1785 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
1786 TEST_REQUIRES_ARM_NEON;
1787 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001788 for (uint32_t n = 1; n <= 8; n++) {
1789 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001790 GemmMicrokernelTester()
1791 .mr(3)
1792 .nr(8)
1793 .kr(2)
1794 .sr(1)
1795 .m(m)
1796 .n(n)
1797 .k(k)
1798 .cm_stride(11)
1799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001800 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001801 }
1802 }
1803 }
1804 }
1805
1806 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, qmin) {
1807 TEST_REQUIRES_ARM_NEON;
1808 GemmMicrokernelTester()
1809 .mr(3)
1810 .nr(8)
1811 .kr(2)
1812 .sr(1)
1813 .m(3)
1814 .n(8)
1815 .k(8)
1816 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001818 }
1819
1820 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, qmax) {
1821 TEST_REQUIRES_ARM_NEON;
1822 GemmMicrokernelTester()
1823 .mr(3)
1824 .nr(8)
1825 .kr(2)
1826 .sr(1)
1827 .m(3)
1828 .n(8)
1829 .k(8)
1830 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001831 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001832 }
1833
1834 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cm) {
1835 TEST_REQUIRES_ARM_NEON;
1836 GemmMicrokernelTester()
1837 .mr(3)
1838 .nr(8)
1839 .kr(2)
1840 .sr(1)
1841 .m(3)
1842 .n(8)
1843 .k(8)
1844 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001845 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001846 }
1847#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1848
1849
1850#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1851 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8) {
1852 TEST_REQUIRES_ARM_NEON;
1853 GemmMicrokernelTester()
1854 .mr(4)
1855 .nr(16)
1856 .kr(2)
1857 .sr(1)
1858 .m(4)
1859 .n(16)
1860 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001862 }
1863
1864 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cn) {
1865 TEST_REQUIRES_ARM_NEON;
1866 GemmMicrokernelTester()
1867 .mr(4)
1868 .nr(16)
1869 .kr(2)
1870 .sr(1)
1871 .m(4)
1872 .n(16)
1873 .k(8)
1874 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001876 }
1877
1878 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_strided_a) {
1879 TEST_REQUIRES_ARM_NEON;
1880 GemmMicrokernelTester()
1881 .mr(4)
1882 .nr(16)
1883 .kr(2)
1884 .sr(1)
1885 .m(4)
1886 .n(16)
1887 .k(8)
1888 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001889 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001890 }
1891
1892 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile) {
1893 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001894 for (uint32_t n = 1; n <= 16; n++) {
1895 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001896 GemmMicrokernelTester()
1897 .mr(4)
1898 .nr(16)
1899 .kr(2)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(8)
1904 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001906 }
1907 }
1908 }
1909
1910 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
1911 TEST_REQUIRES_ARM_NEON;
1912 for (uint32_t m = 1; m <= 4; m++) {
1913 GemmMicrokernelTester()
1914 .mr(4)
1915 .nr(16)
1916 .kr(2)
1917 .sr(1)
1918 .m(m)
1919 .n(16)
1920 .k(8)
1921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001922 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001923 }
1924 }
1925
1926 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
1927 TEST_REQUIRES_ARM_NEON;
1928 for (uint32_t n = 1; n <= 16; n++) {
1929 GemmMicrokernelTester()
1930 .mr(4)
1931 .nr(16)
1932 .kr(2)
1933 .sr(1)
1934 .m(4)
1935 .n(n)
1936 .k(8)
1937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001939 }
1940 }
1941
1942 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8) {
1943 TEST_REQUIRES_ARM_NEON;
1944 for (size_t k = 1; k < 8; k++) {
1945 GemmMicrokernelTester()
1946 .mr(4)
1947 .nr(16)
1948 .kr(2)
1949 .sr(1)
1950 .m(4)
1951 .n(16)
1952 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001954 }
1955 }
1956
1957 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8_strided_a) {
1958 TEST_REQUIRES_ARM_NEON;
1959 for (size_t k = 1; k < 8; k++) {
1960 GemmMicrokernelTester()
1961 .mr(4)
1962 .nr(16)
1963 .kr(2)
1964 .sr(1)
1965 .m(4)
1966 .n(16)
1967 .k(k)
1968 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001970 }
1971 }
1972
1973 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_lt_8_subtile) {
1974 TEST_REQUIRES_ARM_NEON;
1975 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001976 for (uint32_t n = 1; n <= 16; n++) {
1977 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001978 GemmMicrokernelTester()
1979 .mr(4)
1980 .nr(16)
1981 .kr(2)
1982 .sr(1)
1983 .m(m)
1984 .n(n)
1985 .k(k)
1986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001988 }
1989 }
1990 }
1991 }
1992
1993 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8) {
1994 TEST_REQUIRES_ARM_NEON;
1995 for (size_t k = 9; k < 16; k++) {
1996 GemmMicrokernelTester()
1997 .mr(4)
1998 .nr(16)
1999 .kr(2)
2000 .sr(1)
2001 .m(4)
2002 .n(16)
2003 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002005 }
2006 }
2007
2008 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8_strided_a) {
2009 TEST_REQUIRES_ARM_NEON;
2010 for (size_t k = 9; k < 16; k++) {
2011 GemmMicrokernelTester()
2012 .mr(4)
2013 .nr(16)
2014 .kr(2)
2015 .sr(1)
2016 .m(4)
2017 .n(16)
2018 .k(k)
2019 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002021 }
2022 }
2023
2024 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_gt_8_subtile) {
2025 TEST_REQUIRES_ARM_NEON;
2026 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002027 for (uint32_t n = 1; n <= 16; n++) {
2028 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002029 GemmMicrokernelTester()
2030 .mr(4)
2031 .nr(16)
2032 .kr(2)
2033 .sr(1)
2034 .m(m)
2035 .n(n)
2036 .k(k)
2037 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002039 }
2040 }
2041 }
2042 }
2043
2044 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8) {
2045 TEST_REQUIRES_ARM_NEON;
2046 for (size_t k = 16; k <= 80; k += 8) {
2047 GemmMicrokernelTester()
2048 .mr(4)
2049 .nr(16)
2050 .kr(2)
2051 .sr(1)
2052 .m(4)
2053 .n(16)
2054 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002056 }
2057 }
2058
2059 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8_strided_a) {
2060 TEST_REQUIRES_ARM_NEON;
2061 for (size_t k = 16; k <= 80; k += 8) {
2062 GemmMicrokernelTester()
2063 .mr(4)
2064 .nr(16)
2065 .kr(2)
2066 .sr(1)
2067 .m(4)
2068 .n(16)
2069 .k(k)
2070 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002072 }
2073 }
2074
2075 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, k_div_8_subtile) {
2076 TEST_REQUIRES_ARM_NEON;
2077 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002078 for (uint32_t n = 1; n <= 16; n++) {
2079 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002080 GemmMicrokernelTester()
2081 .mr(4)
2082 .nr(16)
2083 .kr(2)
2084 .sr(1)
2085 .m(m)
2086 .n(n)
2087 .k(k)
2088 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002090 }
2091 }
2092 }
2093 }
2094
2095 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16) {
2096 TEST_REQUIRES_ARM_NEON;
2097 for (uint32_t n = 17; n < 32; n++) {
2098 for (size_t k = 1; k <= 40; k += 9) {
2099 GemmMicrokernelTester()
2100 .mr(4)
2101 .nr(16)
2102 .kr(2)
2103 .sr(1)
2104 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002105 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002106 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002108 }
2109 }
2110 }
2111
2112 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) {
2113 TEST_REQUIRES_ARM_NEON;
2114 for (uint32_t n = 17; n < 32; n++) {
2115 for (size_t k = 1; k <= 40; k += 9) {
2116 GemmMicrokernelTester()
2117 .mr(4)
2118 .nr(16)
2119 .kr(2)
2120 .sr(1)
2121 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002122 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002123 .k(k)
2124 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002126 }
2127 }
2128 }
2129
2130 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_strided_a) {
2131 TEST_REQUIRES_ARM_NEON;
2132 for (uint32_t n = 17; n < 32; n++) {
2133 for (size_t k = 1; k <= 40; k += 9) {
2134 GemmMicrokernelTester()
2135 .mr(4)
2136 .nr(16)
2137 .kr(2)
2138 .sr(1)
2139 .m(4)
2140 .n(n)
2141 .k(k)
2142 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002144 }
2145 }
2146 }
2147
2148 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_gt_16_subtile) {
2149 TEST_REQUIRES_ARM_NEON;
2150 for (uint32_t n = 17; n < 32; n++) {
2151 for (size_t k = 1; k <= 40; k += 9) {
2152 for (uint32_t m = 1; m <= 4; m++) {
2153 GemmMicrokernelTester()
2154 .mr(4)
2155 .nr(16)
2156 .kr(2)
2157 .sr(1)
2158 .m(m)
2159 .n(n)
2160 .k(k)
2161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002163 }
2164 }
2165 }
2166 }
2167
2168 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16) {
2169 TEST_REQUIRES_ARM_NEON;
2170 for (uint32_t n = 32; n <= 48; n += 16) {
2171 for (size_t k = 1; k <= 40; k += 9) {
2172 GemmMicrokernelTester()
2173 .mr(4)
2174 .nr(16)
2175 .kr(2)
2176 .sr(1)
2177 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002178 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002181 }
2182 }
2183 }
2184
2185 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) {
2186 TEST_REQUIRES_ARM_NEON;
2187 for (uint32_t n = 32; n <= 48; n += 16) {
2188 for (size_t k = 1; k <= 40; k += 9) {
2189 GemmMicrokernelTester()
2190 .mr(4)
2191 .nr(16)
2192 .kr(2)
2193 .sr(1)
2194 .m(4)
2195 .n(n)
2196 .k(k)
2197 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002198 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002199 }
2200 }
2201 }
2202
2203 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_strided_a) {
2204 TEST_REQUIRES_ARM_NEON;
2205 for (uint32_t n = 32; n <= 48; n += 16) {
2206 for (size_t k = 1; k <= 40; k += 9) {
2207 GemmMicrokernelTester()
2208 .mr(4)
2209 .nr(16)
2210 .kr(2)
2211 .sr(1)
2212 .m(4)
2213 .n(n)
2214 .k(k)
2215 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002217 }
2218 }
2219 }
2220
2221 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, n_div_16_subtile) {
2222 TEST_REQUIRES_ARM_NEON;
2223 for (uint32_t n = 32; n <= 48; n += 16) {
2224 for (size_t k = 1; k <= 40; k += 9) {
2225 for (uint32_t m = 1; m <= 4; m++) {
2226 GemmMicrokernelTester()
2227 .mr(4)
2228 .nr(16)
2229 .kr(2)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002235 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002236 }
2237 }
2238 }
2239 }
2240
2241 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cm_subtile) {
2242 TEST_REQUIRES_ARM_NEON;
2243 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002244 for (uint32_t n = 1; n <= 16; n++) {
2245 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002246 GemmMicrokernelTester()
2247 .mr(4)
2248 .nr(16)
2249 .kr(2)
2250 .sr(1)
2251 .m(m)
2252 .n(n)
2253 .k(k)
2254 .cm_stride(19)
2255 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002256 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002257 }
2258 }
2259 }
2260 }
2261
2262 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, qmin) {
2263 TEST_REQUIRES_ARM_NEON;
2264 GemmMicrokernelTester()
2265 .mr(4)
2266 .nr(16)
2267 .kr(2)
2268 .sr(1)
2269 .m(4)
2270 .n(16)
2271 .k(8)
2272 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002274 }
2275
2276 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, qmax) {
2277 TEST_REQUIRES_ARM_NEON;
2278 GemmMicrokernelTester()
2279 .mr(4)
2280 .nr(16)
2281 .kr(2)
2282 .sr(1)
2283 .m(4)
2284 .n(16)
2285 .k(8)
2286 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002287 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002288 }
2289
2290 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD1R, strided_cm) {
2291 TEST_REQUIRES_ARM_NEON;
2292 GemmMicrokernelTester()
2293 .mr(4)
2294 .nr(16)
2295 .kr(2)
2296 .sr(1)
2297 .m(4)
2298 .n(16)
2299 .k(8)
2300 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002302 }
2303#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2304
2305
2306#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2307 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
2308 TEST_REQUIRES_ARM_NEON;
2309 GemmMicrokernelTester()
2310 .mr(2)
2311 .nr(8)
2312 .kr(2)
2313 .sr(1)
2314 .m(2)
2315 .n(8)
2316 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002318 }
2319
2320 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cn) {
2321 TEST_REQUIRES_ARM_NEON;
2322 GemmMicrokernelTester()
2323 .mr(2)
2324 .nr(8)
2325 .kr(2)
2326 .sr(1)
2327 .m(2)
2328 .n(8)
2329 .k(16)
2330 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002332 }
2333
2334 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) {
2335 TEST_REQUIRES_ARM_NEON;
2336 GemmMicrokernelTester()
2337 .mr(2)
2338 .nr(8)
2339 .kr(2)
2340 .sr(1)
2341 .m(2)
2342 .n(8)
2343 .k(16)
2344 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002345 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002346 }
2347
2348 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
2349 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002350 for (uint32_t n = 1; n <= 8; n++) {
2351 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002352 GemmMicrokernelTester()
2353 .mr(2)
2354 .nr(8)
2355 .kr(2)
2356 .sr(1)
2357 .m(m)
2358 .n(n)
2359 .k(16)
2360 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002362 }
2363 }
2364 }
2365
2366 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
2367 TEST_REQUIRES_ARM_NEON;
2368 for (uint32_t m = 1; m <= 2; m++) {
2369 GemmMicrokernelTester()
2370 .mr(2)
2371 .nr(8)
2372 .kr(2)
2373 .sr(1)
2374 .m(m)
2375 .n(8)
2376 .k(16)
2377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002378 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002379 }
2380 }
2381
2382 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
2383 TEST_REQUIRES_ARM_NEON;
2384 for (uint32_t n = 1; n <= 8; n++) {
2385 GemmMicrokernelTester()
2386 .mr(2)
2387 .nr(8)
2388 .kr(2)
2389 .sr(1)
2390 .m(2)
2391 .n(n)
2392 .k(16)
2393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002395 }
2396 }
2397
2398 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
2399 TEST_REQUIRES_ARM_NEON;
2400 for (size_t k = 1; k < 16; k++) {
2401 GemmMicrokernelTester()
2402 .mr(2)
2403 .nr(8)
2404 .kr(2)
2405 .sr(1)
2406 .m(2)
2407 .n(8)
2408 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002410 }
2411 }
2412
2413 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) {
2414 TEST_REQUIRES_ARM_NEON;
2415 for (size_t k = 1; k < 16; k++) {
2416 GemmMicrokernelTester()
2417 .mr(2)
2418 .nr(8)
2419 .kr(2)
2420 .sr(1)
2421 .m(2)
2422 .n(8)
2423 .k(k)
2424 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002426 }
2427 }
2428
2429 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
2430 TEST_REQUIRES_ARM_NEON;
2431 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002432 for (uint32_t n = 1; n <= 8; n++) {
2433 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002434 GemmMicrokernelTester()
2435 .mr(2)
2436 .nr(8)
2437 .kr(2)
2438 .sr(1)
2439 .m(m)
2440 .n(n)
2441 .k(k)
2442 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002444 }
2445 }
2446 }
2447 }
2448
2449 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
2450 TEST_REQUIRES_ARM_NEON;
2451 for (size_t k = 17; k < 32; k++) {
2452 GemmMicrokernelTester()
2453 .mr(2)
2454 .nr(8)
2455 .kr(2)
2456 .sr(1)
2457 .m(2)
2458 .n(8)
2459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002461 }
2462 }
2463
2464 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) {
2465 TEST_REQUIRES_ARM_NEON;
2466 for (size_t k = 17; k < 32; k++) {
2467 GemmMicrokernelTester()
2468 .mr(2)
2469 .nr(8)
2470 .kr(2)
2471 .sr(1)
2472 .m(2)
2473 .n(8)
2474 .k(k)
2475 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08002476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002477 }
2478 }
2479
2480 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
2481 TEST_REQUIRES_ARM_NEON;
2482 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002483 for (uint32_t n = 1; n <= 8; n++) {
2484 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002485 GemmMicrokernelTester()
2486 .mr(2)
2487 .nr(8)
2488 .kr(2)
2489 .sr(1)
2490 .m(m)
2491 .n(n)
2492 .k(k)
2493 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002495 }
2496 }
2497 }
2498 }
2499
2500 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16) {
2501 TEST_REQUIRES_ARM_NEON;
2502 for (size_t k = 32; k <= 160; k += 16) {
2503 GemmMicrokernelTester()
2504 .mr(2)
2505 .nr(8)
2506 .kr(2)
2507 .sr(1)
2508 .m(2)
2509 .n(8)
2510 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002512 }
2513 }
2514
2515 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) {
2516 TEST_REQUIRES_ARM_NEON;
2517 for (size_t k = 32; k <= 160; k += 16) {
2518 GemmMicrokernelTester()
2519 .mr(2)
2520 .nr(8)
2521 .kr(2)
2522 .sr(1)
2523 .m(2)
2524 .n(8)
2525 .k(k)
2526 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002528 }
2529 }
2530
2531 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
2532 TEST_REQUIRES_ARM_NEON;
2533 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002534 for (uint32_t n = 1; n <= 8; n++) {
2535 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002536 GemmMicrokernelTester()
2537 .mr(2)
2538 .nr(8)
2539 .kr(2)
2540 .sr(1)
2541 .m(m)
2542 .n(n)
2543 .k(k)
2544 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002546 }
2547 }
2548 }
2549 }
2550
2551 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
2552 TEST_REQUIRES_ARM_NEON;
2553 for (uint32_t n = 9; n < 16; n++) {
2554 for (size_t k = 1; k <= 80; k += 17) {
2555 GemmMicrokernelTester()
2556 .mr(2)
2557 .nr(8)
2558 .kr(2)
2559 .sr(1)
2560 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002561 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002562 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002564 }
2565 }
2566 }
2567
2568 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
2569 TEST_REQUIRES_ARM_NEON;
2570 for (uint32_t n = 9; n < 16; n++) {
2571 for (size_t k = 1; k <= 80; k += 17) {
2572 GemmMicrokernelTester()
2573 .mr(2)
2574 .nr(8)
2575 .kr(2)
2576 .sr(1)
2577 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002578 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002579 .k(k)
2580 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002582 }
2583 }
2584 }
2585
2586 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) {
2587 TEST_REQUIRES_ARM_NEON;
2588 for (uint32_t n = 9; n < 16; n++) {
2589 for (size_t k = 1; k <= 80; k += 17) {
2590 GemmMicrokernelTester()
2591 .mr(2)
2592 .nr(8)
2593 .kr(2)
2594 .sr(1)
2595 .m(2)
2596 .n(n)
2597 .k(k)
2598 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002600 }
2601 }
2602 }
2603
2604 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
2605 TEST_REQUIRES_ARM_NEON;
2606 for (uint32_t n = 9; n < 16; n++) {
2607 for (size_t k = 1; k <= 80; k += 17) {
2608 for (uint32_t m = 1; m <= 2; m++) {
2609 GemmMicrokernelTester()
2610 .mr(2)
2611 .nr(8)
2612 .kr(2)
2613 .sr(1)
2614 .m(m)
2615 .n(n)
2616 .k(k)
2617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002619 }
2620 }
2621 }
2622 }
2623
2624 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8) {
2625 TEST_REQUIRES_ARM_NEON;
2626 for (uint32_t n = 16; n <= 24; n += 8) {
2627 for (size_t k = 1; k <= 80; k += 17) {
2628 GemmMicrokernelTester()
2629 .mr(2)
2630 .nr(8)
2631 .kr(2)
2632 .sr(1)
2633 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002634 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002637 }
2638 }
2639 }
2640
2641 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
2642 TEST_REQUIRES_ARM_NEON;
2643 for (uint32_t n = 16; n <= 24; n += 8) {
2644 for (size_t k = 1; k <= 80; k += 17) {
2645 GemmMicrokernelTester()
2646 .mr(2)
2647 .nr(8)
2648 .kr(2)
2649 .sr(1)
2650 .m(2)
2651 .n(n)
2652 .k(k)
2653 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002654 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002655 }
2656 }
2657 }
2658
2659 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) {
2660 TEST_REQUIRES_ARM_NEON;
2661 for (uint32_t n = 16; n <= 24; n += 8) {
2662 for (size_t k = 1; k <= 80; k += 17) {
2663 GemmMicrokernelTester()
2664 .mr(2)
2665 .nr(8)
2666 .kr(2)
2667 .sr(1)
2668 .m(2)
2669 .n(n)
2670 .k(k)
2671 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002673 }
2674 }
2675 }
2676
2677 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
2678 TEST_REQUIRES_ARM_NEON;
2679 for (uint32_t n = 16; n <= 24; n += 8) {
2680 for (size_t k = 1; k <= 80; k += 17) {
2681 for (uint32_t m = 1; m <= 2; m++) {
2682 GemmMicrokernelTester()
2683 .mr(2)
2684 .nr(8)
2685 .kr(2)
2686 .sr(1)
2687 .m(m)
2688 .n(n)
2689 .k(k)
2690 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002691 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002692 }
2693 }
2694 }
2695 }
2696
2697 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
2698 TEST_REQUIRES_ARM_NEON;
2699 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002700 for (uint32_t n = 1; n <= 8; n++) {
2701 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002702 GemmMicrokernelTester()
2703 .mr(2)
2704 .nr(8)
2705 .kr(2)
2706 .sr(1)
2707 .m(m)
2708 .n(n)
2709 .k(k)
2710 .cm_stride(11)
2711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002712 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002713 }
2714 }
2715 }
2716 }
2717
2718 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmin) {
2719 TEST_REQUIRES_ARM_NEON;
2720 GemmMicrokernelTester()
2721 .mr(2)
2722 .nr(8)
2723 .kr(2)
2724 .sr(1)
2725 .m(2)
2726 .n(8)
2727 .k(16)
2728 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002730 }
2731
2732 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmax) {
2733 TEST_REQUIRES_ARM_NEON;
2734 GemmMicrokernelTester()
2735 .mr(2)
2736 .nr(8)
2737 .kr(2)
2738 .sr(1)
2739 .m(2)
2740 .n(8)
2741 .k(16)
2742 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002743 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002744 }
2745
2746 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm) {
2747 TEST_REQUIRES_ARM_NEON;
2748 GemmMicrokernelTester()
2749 .mr(2)
2750 .nr(8)
2751 .kr(2)
2752 .sr(1)
2753 .m(2)
2754 .n(8)
2755 .k(16)
2756 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002758 }
2759#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2760
2761
2762#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2763 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_eq_16) {
2764 TEST_REQUIRES_ARM_NEON;
2765 GemmMicrokernelTester()
2766 .mr(3)
2767 .nr(16)
2768 .kr(2)
2769 .sr(1)
2770 .m(3)
2771 .n(16)
2772 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002774 }
2775
2776 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, strided_cn) {
2777 TEST_REQUIRES_ARM_NEON;
2778 GemmMicrokernelTester()
2779 .mr(3)
2780 .nr(16)
2781 .kr(2)
2782 .sr(1)
2783 .m(3)
2784 .n(16)
2785 .k(16)
2786 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002788 }
2789
2790 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_eq_16_strided_a) {
2791 TEST_REQUIRES_ARM_NEON;
2792 GemmMicrokernelTester()
2793 .mr(3)
2794 .nr(16)
2795 .kr(2)
2796 .sr(1)
2797 .m(3)
2798 .n(16)
2799 .k(16)
2800 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002801 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002802 }
2803
2804 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
2805 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002806 for (uint32_t n = 1; n <= 16; n++) {
2807 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002808 GemmMicrokernelTester()
2809 .mr(3)
2810 .nr(16)
2811 .kr(2)
2812 .sr(1)
2813 .m(m)
2814 .n(n)
2815 .k(16)
2816 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002818 }
2819 }
2820 }
2821
2822 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
2823 TEST_REQUIRES_ARM_NEON;
2824 for (uint32_t m = 1; m <= 3; m++) {
2825 GemmMicrokernelTester()
2826 .mr(3)
2827 .nr(16)
2828 .kr(2)
2829 .sr(1)
2830 .m(m)
2831 .n(16)
2832 .k(16)
2833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002834 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002835 }
2836 }
2837
2838 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
2839 TEST_REQUIRES_ARM_NEON;
2840 for (uint32_t n = 1; n <= 16; n++) {
2841 GemmMicrokernelTester()
2842 .mr(3)
2843 .nr(16)
2844 .kr(2)
2845 .sr(1)
2846 .m(3)
2847 .n(n)
2848 .k(16)
2849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002851 }
2852 }
2853
2854 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_lt_16) {
2855 TEST_REQUIRES_ARM_NEON;
2856 for (size_t k = 1; k < 16; k++) {
2857 GemmMicrokernelTester()
2858 .mr(3)
2859 .nr(16)
2860 .kr(2)
2861 .sr(1)
2862 .m(3)
2863 .n(16)
2864 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002866 }
2867 }
2868
2869 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_lt_16_strided_a) {
2870 TEST_REQUIRES_ARM_NEON;
2871 for (size_t k = 1; k < 16; k++) {
2872 GemmMicrokernelTester()
2873 .mr(3)
2874 .nr(16)
2875 .kr(2)
2876 .sr(1)
2877 .m(3)
2878 .n(16)
2879 .k(k)
2880 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002882 }
2883 }
2884
2885 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
2886 TEST_REQUIRES_ARM_NEON;
2887 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002888 for (uint32_t n = 1; n <= 16; n++) {
2889 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002890 GemmMicrokernelTester()
2891 .mr(3)
2892 .nr(16)
2893 .kr(2)
2894 .sr(1)
2895 .m(m)
2896 .n(n)
2897 .k(k)
2898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002900 }
2901 }
2902 }
2903 }
2904
2905 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_gt_16) {
2906 TEST_REQUIRES_ARM_NEON;
2907 for (size_t k = 17; k < 32; k++) {
2908 GemmMicrokernelTester()
2909 .mr(3)
2910 .nr(16)
2911 .kr(2)
2912 .sr(1)
2913 .m(3)
2914 .n(16)
2915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002917 }
2918 }
2919
2920 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_gt_16_strided_a) {
2921 TEST_REQUIRES_ARM_NEON;
2922 for (size_t k = 17; k < 32; k++) {
2923 GemmMicrokernelTester()
2924 .mr(3)
2925 .nr(16)
2926 .kr(2)
2927 .sr(1)
2928 .m(3)
2929 .n(16)
2930 .k(k)
2931 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08002932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002933 }
2934 }
2935
2936 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
2937 TEST_REQUIRES_ARM_NEON;
2938 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002939 for (uint32_t n = 1; n <= 16; n++) {
2940 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002941 GemmMicrokernelTester()
2942 .mr(3)
2943 .nr(16)
2944 .kr(2)
2945 .sr(1)
2946 .m(m)
2947 .n(n)
2948 .k(k)
2949 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002951 }
2952 }
2953 }
2954 }
2955
2956 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_div_16) {
2957 TEST_REQUIRES_ARM_NEON;
2958 for (size_t k = 32; k <= 160; k += 16) {
2959 GemmMicrokernelTester()
2960 .mr(3)
2961 .nr(16)
2962 .kr(2)
2963 .sr(1)
2964 .m(3)
2965 .n(16)
2966 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002968 }
2969 }
2970
2971 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_div_16_strided_a) {
2972 TEST_REQUIRES_ARM_NEON;
2973 for (size_t k = 32; k <= 160; k += 16) {
2974 GemmMicrokernelTester()
2975 .mr(3)
2976 .nr(16)
2977 .kr(2)
2978 .sr(1)
2979 .m(3)
2980 .n(16)
2981 .k(k)
2982 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002984 }
2985 }
2986
2987 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, k_div_16_subtile) {
2988 TEST_REQUIRES_ARM_NEON;
2989 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002990 for (uint32_t n = 1; n <= 16; n++) {
2991 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002992 GemmMicrokernelTester()
2993 .mr(3)
2994 .nr(16)
2995 .kr(2)
2996 .sr(1)
2997 .m(m)
2998 .n(n)
2999 .k(k)
3000 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003002 }
3003 }
3004 }
3005 }
3006
3007 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_gt_16) {
3008 TEST_REQUIRES_ARM_NEON;
3009 for (uint32_t n = 17; n < 32; n++) {
3010 for (size_t k = 1; k <= 80; k += 17) {
3011 GemmMicrokernelTester()
3012 .mr(3)
3013 .nr(16)
3014 .kr(2)
3015 .sr(1)
3016 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003017 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003018 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003020 }
3021 }
3022 }
3023
3024 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_gt_16_strided_cn) {
3025 TEST_REQUIRES_ARM_NEON;
3026 for (uint32_t n = 17; n < 32; n++) {
3027 for (size_t k = 1; k <= 80; k += 17) {
3028 GemmMicrokernelTester()
3029 .mr(3)
3030 .nr(16)
3031 .kr(2)
3032 .sr(1)
3033 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003034 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003035 .k(k)
3036 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003038 }
3039 }
3040 }
3041
3042 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_gt_16_strided_a) {
3043 TEST_REQUIRES_ARM_NEON;
3044 for (uint32_t n = 17; n < 32; n++) {
3045 for (size_t k = 1; k <= 80; k += 17) {
3046 GemmMicrokernelTester()
3047 .mr(3)
3048 .nr(16)
3049 .kr(2)
3050 .sr(1)
3051 .m(3)
3052 .n(n)
3053 .k(k)
3054 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003056 }
3057 }
3058 }
3059
3060 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_gt_16_subtile) {
3061 TEST_REQUIRES_ARM_NEON;
3062 for (uint32_t n = 17; n < 32; n++) {
3063 for (size_t k = 1; k <= 80; k += 17) {
3064 for (uint32_t m = 1; m <= 3; m++) {
3065 GemmMicrokernelTester()
3066 .mr(3)
3067 .nr(16)
3068 .kr(2)
3069 .sr(1)
3070 .m(m)
3071 .n(n)
3072 .k(k)
3073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003075 }
3076 }
3077 }
3078 }
3079
3080 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_div_16) {
3081 TEST_REQUIRES_ARM_NEON;
3082 for (uint32_t n = 32; n <= 48; n += 16) {
3083 for (size_t k = 1; k <= 80; k += 17) {
3084 GemmMicrokernelTester()
3085 .mr(3)
3086 .nr(16)
3087 .kr(2)
3088 .sr(1)
3089 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003090 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003091 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003093 }
3094 }
3095 }
3096
3097 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_div_16_strided_cn) {
3098 TEST_REQUIRES_ARM_NEON;
3099 for (uint32_t n = 32; n <= 48; n += 16) {
3100 for (size_t k = 1; k <= 80; k += 17) {
3101 GemmMicrokernelTester()
3102 .mr(3)
3103 .nr(16)
3104 .kr(2)
3105 .sr(1)
3106 .m(3)
3107 .n(n)
3108 .k(k)
3109 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003110 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003111 }
3112 }
3113 }
3114
3115 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_div_16_strided_a) {
3116 TEST_REQUIRES_ARM_NEON;
3117 for (uint32_t n = 32; n <= 48; n += 16) {
3118 for (size_t k = 1; k <= 80; k += 17) {
3119 GemmMicrokernelTester()
3120 .mr(3)
3121 .nr(16)
3122 .kr(2)
3123 .sr(1)
3124 .m(3)
3125 .n(n)
3126 .k(k)
3127 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003128 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003129 }
3130 }
3131 }
3132
3133 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, n_div_16_subtile) {
3134 TEST_REQUIRES_ARM_NEON;
3135 for (uint32_t n = 32; n <= 48; n += 16) {
3136 for (size_t k = 1; k <= 80; k += 17) {
3137 for (uint32_t m = 1; m <= 3; m++) {
3138 GemmMicrokernelTester()
3139 .mr(3)
3140 .nr(16)
3141 .kr(2)
3142 .sr(1)
3143 .m(m)
3144 .n(n)
3145 .k(k)
3146 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003147 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003148 }
3149 }
3150 }
3151 }
3152
3153 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, strided_cm_subtile) {
3154 TEST_REQUIRES_ARM_NEON;
3155 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003156 for (uint32_t n = 1; n <= 16; n++) {
3157 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003158 GemmMicrokernelTester()
3159 .mr(3)
3160 .nr(16)
3161 .kr(2)
3162 .sr(1)
3163 .m(m)
3164 .n(n)
3165 .k(k)
3166 .cm_stride(19)
3167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003168 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003169 }
3170 }
3171 }
3172 }
3173
3174 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, qmin) {
3175 TEST_REQUIRES_ARM_NEON;
3176 GemmMicrokernelTester()
3177 .mr(3)
3178 .nr(16)
3179 .kr(2)
3180 .sr(1)
3181 .m(3)
3182 .n(16)
3183 .k(16)
3184 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003186 }
3187
3188 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, qmax) {
3189 TEST_REQUIRES_ARM_NEON;
3190 GemmMicrokernelTester()
3191 .mr(3)
3192 .nr(16)
3193 .kr(2)
3194 .sr(1)
3195 .m(3)
3196 .n(16)
3197 .k(16)
3198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003199 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003200 }
3201
3202 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD1R, strided_cm) {
3203 TEST_REQUIRES_ARM_NEON;
3204 GemmMicrokernelTester()
3205 .mr(3)
3206 .nr(16)
3207 .kr(2)
3208 .sr(1)
3209 .m(3)
3210 .n(16)
3211 .k(16)
3212 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003214 }
3215#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3216
3217
3218#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3219 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_eq_8) {
3220 TEST_REQUIRES_ARM_NEON;
3221 GemmMicrokernelTester()
3222 .mr(3)
3223 .nr(8)
3224 .kr(2)
3225 .sr(1)
3226 .m(3)
3227 .n(8)
3228 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003230 }
3231
3232 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, strided_cn) {
3233 TEST_REQUIRES_ARM_NEON;
3234 GemmMicrokernelTester()
3235 .mr(3)
3236 .nr(8)
3237 .kr(2)
3238 .sr(1)
3239 .m(3)
3240 .n(8)
3241 .k(8)
3242 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003244 }
3245
3246 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_eq_8_strided_a) {
3247 TEST_REQUIRES_ARM_NEON;
3248 GemmMicrokernelTester()
3249 .mr(3)
3250 .nr(8)
3251 .kr(2)
3252 .sr(1)
3253 .m(3)
3254 .n(8)
3255 .k(8)
3256 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003257 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003258 }
3259
3260 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
3261 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003262 for (uint32_t n = 1; n <= 8; n++) {
3263 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003264 GemmMicrokernelTester()
3265 .mr(3)
3266 .nr(8)
3267 .kr(2)
3268 .sr(1)
3269 .m(m)
3270 .n(n)
3271 .k(8)
3272 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003274 }
3275 }
3276 }
3277
3278 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
3279 TEST_REQUIRES_ARM_NEON;
3280 for (uint32_t m = 1; m <= 3; m++) {
3281 GemmMicrokernelTester()
3282 .mr(3)
3283 .nr(8)
3284 .kr(2)
3285 .sr(1)
3286 .m(m)
3287 .n(8)
3288 .k(8)
3289 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003290 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003291 }
3292 }
3293
3294 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
3295 TEST_REQUIRES_ARM_NEON;
3296 for (uint32_t n = 1; n <= 8; n++) {
3297 GemmMicrokernelTester()
3298 .mr(3)
3299 .nr(8)
3300 .kr(2)
3301 .sr(1)
3302 .m(3)
3303 .n(n)
3304 .k(8)
3305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003307 }
3308 }
3309
3310 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_lt_8) {
3311 TEST_REQUIRES_ARM_NEON;
3312 for (size_t k = 1; k < 8; k++) {
3313 GemmMicrokernelTester()
3314 .mr(3)
3315 .nr(8)
3316 .kr(2)
3317 .sr(1)
3318 .m(3)
3319 .n(8)
3320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003322 }
3323 }
3324
3325 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_lt_8_strided_a) {
3326 TEST_REQUIRES_ARM_NEON;
3327 for (size_t k = 1; k < 8; k++) {
3328 GemmMicrokernelTester()
3329 .mr(3)
3330 .nr(8)
3331 .kr(2)
3332 .sr(1)
3333 .m(3)
3334 .n(8)
3335 .k(k)
3336 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003338 }
3339 }
3340
3341 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
3342 TEST_REQUIRES_ARM_NEON;
3343 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003344 for (uint32_t n = 1; n <= 8; n++) {
3345 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003346 GemmMicrokernelTester()
3347 .mr(3)
3348 .nr(8)
3349 .kr(2)
3350 .sr(1)
3351 .m(m)
3352 .n(n)
3353 .k(k)
3354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003356 }
3357 }
3358 }
3359 }
3360
3361 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_gt_8) {
3362 TEST_REQUIRES_ARM_NEON;
3363 for (size_t k = 9; k < 16; k++) {
3364 GemmMicrokernelTester()
3365 .mr(3)
3366 .nr(8)
3367 .kr(2)
3368 .sr(1)
3369 .m(3)
3370 .n(8)
3371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003373 }
3374 }
3375
3376 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_gt_8_strided_a) {
3377 TEST_REQUIRES_ARM_NEON;
3378 for (size_t k = 9; k < 16; k++) {
3379 GemmMicrokernelTester()
3380 .mr(3)
3381 .nr(8)
3382 .kr(2)
3383 .sr(1)
3384 .m(3)
3385 .n(8)
3386 .k(k)
3387 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003389 }
3390 }
3391
3392 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
3393 TEST_REQUIRES_ARM_NEON;
3394 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003395 for (uint32_t n = 1; n <= 8; n++) {
3396 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003397 GemmMicrokernelTester()
3398 .mr(3)
3399 .nr(8)
3400 .kr(2)
3401 .sr(1)
3402 .m(m)
3403 .n(n)
3404 .k(k)
3405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003407 }
3408 }
3409 }
3410 }
3411
3412 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_div_8) {
3413 TEST_REQUIRES_ARM_NEON;
3414 for (size_t k = 16; k <= 80; k += 8) {
3415 GemmMicrokernelTester()
3416 .mr(3)
3417 .nr(8)
3418 .kr(2)
3419 .sr(1)
3420 .m(3)
3421 .n(8)
3422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003424 }
3425 }
3426
3427 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_div_8_strided_a) {
3428 TEST_REQUIRES_ARM_NEON;
3429 for (size_t k = 16; k <= 80; k += 8) {
3430 GemmMicrokernelTester()
3431 .mr(3)
3432 .nr(8)
3433 .kr(2)
3434 .sr(1)
3435 .m(3)
3436 .n(8)
3437 .k(k)
3438 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003440 }
3441 }
3442
3443 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
3444 TEST_REQUIRES_ARM_NEON;
3445 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003446 for (uint32_t n = 1; n <= 8; n++) {
3447 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003448 GemmMicrokernelTester()
3449 .mr(3)
3450 .nr(8)
3451 .kr(2)
3452 .sr(1)
3453 .m(m)
3454 .n(n)
3455 .k(k)
3456 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003458 }
3459 }
3460 }
3461 }
3462
3463 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_gt_8) {
3464 TEST_REQUIRES_ARM_NEON;
3465 for (uint32_t n = 9; n < 16; n++) {
3466 for (size_t k = 1; k <= 40; k += 9) {
3467 GemmMicrokernelTester()
3468 .mr(3)
3469 .nr(8)
3470 .kr(2)
3471 .sr(1)
3472 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003473 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003474 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003476 }
3477 }
3478 }
3479
3480 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
3481 TEST_REQUIRES_ARM_NEON;
3482 for (uint32_t n = 9; n < 16; n++) {
3483 for (size_t k = 1; k <= 40; k += 9) {
3484 GemmMicrokernelTester()
3485 .mr(3)
3486 .nr(8)
3487 .kr(2)
3488 .sr(1)
3489 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003490 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003491 .k(k)
3492 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003494 }
3495 }
3496 }
3497
3498 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_gt_8_strided_a) {
3499 TEST_REQUIRES_ARM_NEON;
3500 for (uint32_t n = 9; n < 16; n++) {
3501 for (size_t k = 1; k <= 40; k += 9) {
3502 GemmMicrokernelTester()
3503 .mr(3)
3504 .nr(8)
3505 .kr(2)
3506 .sr(1)
3507 .m(3)
3508 .n(n)
3509 .k(k)
3510 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003512 }
3513 }
3514 }
3515
3516 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
3517 TEST_REQUIRES_ARM_NEON;
3518 for (uint32_t n = 9; n < 16; n++) {
3519 for (size_t k = 1; k <= 40; k += 9) {
3520 for (uint32_t m = 1; m <= 3; m++) {
3521 GemmMicrokernelTester()
3522 .mr(3)
3523 .nr(8)
3524 .kr(2)
3525 .sr(1)
3526 .m(m)
3527 .n(n)
3528 .k(k)
3529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003531 }
3532 }
3533 }
3534 }
3535
3536 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_div_8) {
3537 TEST_REQUIRES_ARM_NEON;
3538 for (uint32_t n = 16; n <= 24; n += 8) {
3539 for (size_t k = 1; k <= 40; k += 9) {
3540 GemmMicrokernelTester()
3541 .mr(3)
3542 .nr(8)
3543 .kr(2)
3544 .sr(1)
3545 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003546 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003549 }
3550 }
3551 }
3552
3553 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
3554 TEST_REQUIRES_ARM_NEON;
3555 for (uint32_t n = 16; n <= 24; n += 8) {
3556 for (size_t k = 1; k <= 40; k += 9) {
3557 GemmMicrokernelTester()
3558 .mr(3)
3559 .nr(8)
3560 .kr(2)
3561 .sr(1)
3562 .m(3)
3563 .n(n)
3564 .k(k)
3565 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003566 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003567 }
3568 }
3569 }
3570
3571 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_div_8_strided_a) {
3572 TEST_REQUIRES_ARM_NEON;
3573 for (uint32_t n = 16; n <= 24; n += 8) {
3574 for (size_t k = 1; k <= 40; k += 9) {
3575 GemmMicrokernelTester()
3576 .mr(3)
3577 .nr(8)
3578 .kr(2)
3579 .sr(1)
3580 .m(3)
3581 .n(n)
3582 .k(k)
3583 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003584 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003585 }
3586 }
3587 }
3588
3589 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
3590 TEST_REQUIRES_ARM_NEON;
3591 for (uint32_t n = 16; n <= 24; n += 8) {
3592 for (size_t k = 1; k <= 40; k += 9) {
3593 for (uint32_t m = 1; m <= 3; m++) {
3594 GemmMicrokernelTester()
3595 .mr(3)
3596 .nr(8)
3597 .kr(2)
3598 .sr(1)
3599 .m(m)
3600 .n(n)
3601 .k(k)
3602 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003603 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003604 }
3605 }
3606 }
3607 }
3608
3609 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
3610 TEST_REQUIRES_ARM_NEON;
3611 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003612 for (uint32_t n = 1; n <= 8; n++) {
3613 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003614 GemmMicrokernelTester()
3615 .mr(3)
3616 .nr(8)
3617 .kr(2)
3618 .sr(1)
3619 .m(m)
3620 .n(n)
3621 .k(k)
3622 .cm_stride(11)
3623 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003624 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003625 }
3626 }
3627 }
3628 }
3629
3630 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, qmin) {
3631 TEST_REQUIRES_ARM_NEON;
3632 GemmMicrokernelTester()
3633 .mr(3)
3634 .nr(8)
3635 .kr(2)
3636 .sr(1)
3637 .m(3)
3638 .n(8)
3639 .k(8)
3640 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003642 }
3643
3644 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, qmax) {
3645 TEST_REQUIRES_ARM_NEON;
3646 GemmMicrokernelTester()
3647 .mr(3)
3648 .nr(8)
3649 .kr(2)
3650 .sr(1)
3651 .m(3)
3652 .n(8)
3653 .k(8)
3654 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003655 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003656 }
3657
3658 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD2R, strided_cm) {
3659 TEST_REQUIRES_ARM_NEON;
3660 GemmMicrokernelTester()
3661 .mr(3)
3662 .nr(8)
3663 .kr(2)
3664 .sr(1)
3665 .m(3)
3666 .n(8)
3667 .k(8)
3668 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003670 }
3671#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3672
3673
3674#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3675 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16) {
3676 TEST_REQUIRES_ARM_NEON;
3677 GemmMicrokernelTester()
3678 .mr(2)
3679 .nr(8)
3680 .kr(2)
3681 .sr(1)
3682 .m(2)
3683 .n(8)
3684 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003686 }
3687
3688 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, strided_cn) {
3689 TEST_REQUIRES_ARM_NEON;
3690 GemmMicrokernelTester()
3691 .mr(2)
3692 .nr(8)
3693 .kr(2)
3694 .sr(1)
3695 .m(2)
3696 .n(8)
3697 .k(16)
3698 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003700 }
3701
3702 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_strided_a) {
3703 TEST_REQUIRES_ARM_NEON;
3704 GemmMicrokernelTester()
3705 .mr(2)
3706 .nr(8)
3707 .kr(2)
3708 .sr(1)
3709 .m(2)
3710 .n(8)
3711 .k(16)
3712 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003713 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003714 }
3715
3716 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
3717 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003718 for (uint32_t n = 1; n <= 8; n++) {
3719 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003720 GemmMicrokernelTester()
3721 .mr(2)
3722 .nr(8)
3723 .kr(2)
3724 .sr(1)
3725 .m(m)
3726 .n(n)
3727 .k(16)
3728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003730 }
3731 }
3732 }
3733
3734 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
3735 TEST_REQUIRES_ARM_NEON;
3736 for (uint32_t m = 1; m <= 2; m++) {
3737 GemmMicrokernelTester()
3738 .mr(2)
3739 .nr(8)
3740 .kr(2)
3741 .sr(1)
3742 .m(m)
3743 .n(8)
3744 .k(16)
3745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003746 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003747 }
3748 }
3749
3750 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
3751 TEST_REQUIRES_ARM_NEON;
3752 for (uint32_t n = 1; n <= 8; n++) {
3753 GemmMicrokernelTester()
3754 .mr(2)
3755 .nr(8)
3756 .kr(2)
3757 .sr(1)
3758 .m(2)
3759 .n(n)
3760 .k(16)
3761 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003763 }
3764 }
3765
3766 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_lt_16) {
3767 TEST_REQUIRES_ARM_NEON;
3768 for (size_t k = 1; k < 16; k++) {
3769 GemmMicrokernelTester()
3770 .mr(2)
3771 .nr(8)
3772 .kr(2)
3773 .sr(1)
3774 .m(2)
3775 .n(8)
3776 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003778 }
3779 }
3780
3781 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_lt_16_strided_a) {
3782 TEST_REQUIRES_ARM_NEON;
3783 for (size_t k = 1; k < 16; k++) {
3784 GemmMicrokernelTester()
3785 .mr(2)
3786 .nr(8)
3787 .kr(2)
3788 .sr(1)
3789 .m(2)
3790 .n(8)
3791 .k(k)
3792 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003794 }
3795 }
3796
3797 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
3798 TEST_REQUIRES_ARM_NEON;
3799 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 8; n++) {
3801 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003802 GemmMicrokernelTester()
3803 .mr(2)
3804 .nr(8)
3805 .kr(2)
3806 .sr(1)
3807 .m(m)
3808 .n(n)
3809 .k(k)
3810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003812 }
3813 }
3814 }
3815 }
3816
3817 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_gt_16) {
3818 TEST_REQUIRES_ARM_NEON;
3819 for (size_t k = 17; k < 32; k++) {
3820 GemmMicrokernelTester()
3821 .mr(2)
3822 .nr(8)
3823 .kr(2)
3824 .sr(1)
3825 .m(2)
3826 .n(8)
3827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003829 }
3830 }
3831
3832 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_gt_16_strided_a) {
3833 TEST_REQUIRES_ARM_NEON;
3834 for (size_t k = 17; k < 32; k++) {
3835 GemmMicrokernelTester()
3836 .mr(2)
3837 .nr(8)
3838 .kr(2)
3839 .sr(1)
3840 .m(2)
3841 .n(8)
3842 .k(k)
3843 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08003844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003845 }
3846 }
3847
3848 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
3849 TEST_REQUIRES_ARM_NEON;
3850 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003851 for (uint32_t n = 1; n <= 8; n++) {
3852 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003853 GemmMicrokernelTester()
3854 .mr(2)
3855 .nr(8)
3856 .kr(2)
3857 .sr(1)
3858 .m(m)
3859 .n(n)
3860 .k(k)
3861 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003863 }
3864 }
3865 }
3866 }
3867
3868 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_div_16) {
3869 TEST_REQUIRES_ARM_NEON;
3870 for (size_t k = 32; k <= 160; k += 16) {
3871 GemmMicrokernelTester()
3872 .mr(2)
3873 .nr(8)
3874 .kr(2)
3875 .sr(1)
3876 .m(2)
3877 .n(8)
3878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003880 }
3881 }
3882
3883 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_div_16_strided_a) {
3884 TEST_REQUIRES_ARM_NEON;
3885 for (size_t k = 32; k <= 160; k += 16) {
3886 GemmMicrokernelTester()
3887 .mr(2)
3888 .nr(8)
3889 .kr(2)
3890 .sr(1)
3891 .m(2)
3892 .n(8)
3893 .k(k)
3894 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08003895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003896 }
3897 }
3898
3899 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
3900 TEST_REQUIRES_ARM_NEON;
3901 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003902 for (uint32_t n = 1; n <= 8; n++) {
3903 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003904 GemmMicrokernelTester()
3905 .mr(2)
3906 .nr(8)
3907 .kr(2)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003914 }
3915 }
3916 }
3917 }
3918
3919 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for (uint32_t n = 9; n < 16; n++) {
3922 for (size_t k = 1; k <= 80; k += 17) {
3923 GemmMicrokernelTester()
3924 .mr(2)
3925 .nr(8)
3926 .kr(2)
3927 .sr(1)
3928 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003929 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003930 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003932 }
3933 }
3934 }
3935
3936 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
3937 TEST_REQUIRES_ARM_NEON;
3938 for (uint32_t n = 9; n < 16; n++) {
3939 for (size_t k = 1; k <= 80; k += 17) {
3940 GemmMicrokernelTester()
3941 .mr(2)
3942 .nr(8)
3943 .kr(2)
3944 .sr(1)
3945 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003947 .k(k)
3948 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003950 }
3951 }
3952 }
3953
3954 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_a) {
3955 TEST_REQUIRES_ARM_NEON;
3956 for (uint32_t n = 9; n < 16; n++) {
3957 for (size_t k = 1; k <= 80; k += 17) {
3958 GemmMicrokernelTester()
3959 .mr(2)
3960 .nr(8)
3961 .kr(2)
3962 .sr(1)
3963 .m(2)
3964 .n(n)
3965 .k(k)
3966 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003968 }
3969 }
3970 }
3971
3972 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
3973 TEST_REQUIRES_ARM_NEON;
3974 for (uint32_t n = 9; n < 16; n++) {
3975 for (size_t k = 1; k <= 80; k += 17) {
3976 for (uint32_t m = 1; m <= 2; m++) {
3977 GemmMicrokernelTester()
3978 .mr(2)
3979 .nr(8)
3980 .kr(2)
3981 .sr(1)
3982 .m(m)
3983 .n(n)
3984 .k(k)
3985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003987 }
3988 }
3989 }
3990 }
3991
3992 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8) {
3993 TEST_REQUIRES_ARM_NEON;
3994 for (uint32_t n = 16; n <= 24; n += 8) {
3995 for (size_t k = 1; k <= 80; k += 17) {
3996 GemmMicrokernelTester()
3997 .mr(2)
3998 .nr(8)
3999 .kr(2)
4000 .sr(1)
4001 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004002 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004003 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004005 }
4006 }
4007 }
4008
4009 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
4010 TEST_REQUIRES_ARM_NEON;
4011 for (uint32_t n = 16; n <= 24; n += 8) {
4012 for (size_t k = 1; k <= 80; k += 17) {
4013 GemmMicrokernelTester()
4014 .mr(2)
4015 .nr(8)
4016 .kr(2)
4017 .sr(1)
4018 .m(2)
4019 .n(n)
4020 .k(k)
4021 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004022 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004023 }
4024 }
4025 }
4026
4027 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_a) {
4028 TEST_REQUIRES_ARM_NEON;
4029 for (uint32_t n = 16; n <= 24; n += 8) {
4030 for (size_t k = 1; k <= 80; k += 17) {
4031 GemmMicrokernelTester()
4032 .mr(2)
4033 .nr(8)
4034 .kr(2)
4035 .sr(1)
4036 .m(2)
4037 .n(n)
4038 .k(k)
4039 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004040 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004041 }
4042 }
4043 }
4044
4045 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
4046 TEST_REQUIRES_ARM_NEON;
4047 for (uint32_t n = 16; n <= 24; n += 8) {
4048 for (size_t k = 1; k <= 80; k += 17) {
4049 for (uint32_t m = 1; m <= 2; m++) {
4050 GemmMicrokernelTester()
4051 .mr(2)
4052 .nr(8)
4053 .kr(2)
4054 .sr(1)
4055 .m(m)
4056 .n(n)
4057 .k(k)
4058 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004059 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004060 }
4061 }
4062 }
4063 }
4064
4065 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
4066 TEST_REQUIRES_ARM_NEON;
4067 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004068 for (uint32_t n = 1; n <= 8; n++) {
4069 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004070 GemmMicrokernelTester()
4071 .mr(2)
4072 .nr(8)
4073 .kr(2)
4074 .sr(1)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .cm_stride(11)
4079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004080 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004081 }
4082 }
4083 }
4084 }
4085
4086 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, qmin) {
4087 TEST_REQUIRES_ARM_NEON;
4088 GemmMicrokernelTester()
4089 .mr(2)
4090 .nr(8)
4091 .kr(2)
4092 .sr(1)
4093 .m(2)
4094 .n(8)
4095 .k(16)
4096 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004098 }
4099
4100 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, qmax) {
4101 TEST_REQUIRES_ARM_NEON;
4102 GemmMicrokernelTester()
4103 .mr(2)
4104 .nr(8)
4105 .kr(2)
4106 .sr(1)
4107 .m(2)
4108 .n(8)
4109 .k(16)
4110 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004111 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004112 }
4113
4114 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, strided_cm) {
4115 TEST_REQUIRES_ARM_NEON;
4116 GemmMicrokernelTester()
4117 .mr(2)
4118 .nr(8)
4119 .kr(2)
4120 .sr(1)
4121 .m(2)
4122 .n(8)
4123 .k(16)
4124 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004126 }
4127#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4128
4129
4130#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4131 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_eq_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004132 TEST_REQUIRES_ARM_NEON;
4133 GemmMicrokernelTester()
4134 .mr(4)
4135 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004136 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004137 .sr(1)
4138 .m(4)
4139 .n(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004140 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08004142 }
4143
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004144 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, strided_cn) {
4145 TEST_REQUIRES_ARM_NEON;
4146 GemmMicrokernelTester()
4147 .mr(4)
4148 .nr(8)
4149 .kr(2)
4150 .sr(1)
4151 .m(4)
4152 .n(8)
4153 .k(16)
4154 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004155 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004156 }
4157
4158 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_eq_16_strided_a) {
4159 TEST_REQUIRES_ARM_NEON;
4160 GemmMicrokernelTester()
4161 .mr(4)
4162 .nr(8)
4163 .kr(2)
4164 .sr(1)
4165 .m(4)
4166 .n(8)
4167 .k(16)
4168 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004169 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004170 }
4171
4172 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004173 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004174 for (uint32_t n = 1; n <= 8; n++) {
4175 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004176 GemmMicrokernelTester()
4177 .mr(4)
4178 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004179 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004180 .sr(1)
4181 .m(m)
4182 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004183 .k(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004184 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08004186 }
4187 }
4188 }
4189
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004190 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004191 TEST_REQUIRES_ARM_NEON;
4192 for (uint32_t m = 1; m <= 4; m++) {
4193 GemmMicrokernelTester()
4194 .mr(4)
4195 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004196 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004197 .sr(1)
4198 .m(m)
4199 .n(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004200 .k(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004202 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08004203 }
4204 }
4205
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004206 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004207 TEST_REQUIRES_ARM_NEON;
4208 for (uint32_t n = 1; n <= 8; n++) {
4209 GemmMicrokernelTester()
4210 .mr(4)
4211 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004212 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004213 .sr(1)
4214 .m(4)
4215 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004216 .k(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08004219 }
4220 }
4221
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004222 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_lt_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004223 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004224 for (size_t k = 1; k < 16; k++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004225 GemmMicrokernelTester()
4226 .mr(4)
4227 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004228 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004229 .sr(1)
4230 .m(4)
4231 .n(8)
4232 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08004234 }
4235 }
4236
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004237 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_lt_16_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004238 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004239 for (size_t k = 1; k < 16; k++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08004240 GemmMicrokernelTester()
4241 .mr(4)
4242 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004243 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08004244 .sr(1)
4245 .m(4)
4246 .n(8)
4247 .k(k)
4248 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08004250 }
4251 }
4252
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004253 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
4254 TEST_REQUIRES_ARM_NEON;
4255 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004256 for (uint32_t n = 1; n <= 8; n++) {
4257 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004258 GemmMicrokernelTester()
4259 .mr(4)
4260 .nr(8)
4261 .kr(2)
4262 .sr(1)
4263 .m(m)
4264 .n(n)
4265 .k(k)
4266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004268 }
4269 }
4270 }
4271 }
4272
4273 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_gt_16) {
4274 TEST_REQUIRES_ARM_NEON;
4275 for (size_t k = 17; k < 32; k++) {
4276 GemmMicrokernelTester()
4277 .mr(4)
4278 .nr(8)
4279 .kr(2)
4280 .sr(1)
4281 .m(4)
4282 .n(8)
4283 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004285 }
4286 }
4287
4288 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_gt_16_strided_a) {
4289 TEST_REQUIRES_ARM_NEON;
4290 for (size_t k = 17; k < 32; k++) {
4291 GemmMicrokernelTester()
4292 .mr(4)
4293 .nr(8)
4294 .kr(2)
4295 .sr(1)
4296 .m(4)
4297 .n(8)
4298 .k(k)
4299 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08004300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004301 }
4302 }
4303
4304 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
4305 TEST_REQUIRES_ARM_NEON;
4306 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004307 for (uint32_t n = 1; n <= 8; n++) {
4308 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004309 GemmMicrokernelTester()
4310 .mr(4)
4311 .nr(8)
4312 .kr(2)
4313 .sr(1)
4314 .m(m)
4315 .n(n)
4316 .k(k)
4317 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004319 }
4320 }
4321 }
4322 }
4323
4324 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_div_16) {
4325 TEST_REQUIRES_ARM_NEON;
4326 for (size_t k = 32; k <= 160; k += 16) {
4327 GemmMicrokernelTester()
4328 .mr(4)
4329 .nr(8)
4330 .kr(2)
4331 .sr(1)
4332 .m(4)
4333 .n(8)
4334 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004336 }
4337 }
4338
4339 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_div_16_strided_a) {
4340 TEST_REQUIRES_ARM_NEON;
4341 for (size_t k = 32; k <= 160; k += 16) {
4342 GemmMicrokernelTester()
4343 .mr(4)
4344 .nr(8)
4345 .kr(2)
4346 .sr(1)
4347 .m(4)
4348 .n(8)
4349 .k(k)
4350 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004352 }
4353 }
4354
4355 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
4356 TEST_REQUIRES_ARM_NEON;
4357 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004358 for (uint32_t n = 1; n <= 8; n++) {
4359 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004360 GemmMicrokernelTester()
4361 .mr(4)
4362 .nr(8)
4363 .kr(2)
4364 .sr(1)
4365 .m(m)
4366 .n(n)
4367 .k(k)
4368 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004370 }
4371 }
4372 }
4373 }
4374
4375 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_gt_8) {
4376 TEST_REQUIRES_ARM_NEON;
4377 for (uint32_t n = 9; n < 16; n++) {
4378 for (size_t k = 1; k <= 80; k += 17) {
4379 GemmMicrokernelTester()
4380 .mr(4)
4381 .nr(8)
4382 .kr(2)
4383 .sr(1)
4384 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004385 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004386 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004388 }
4389 }
4390 }
4391
4392 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
4393 TEST_REQUIRES_ARM_NEON;
4394 for (uint32_t n = 9; n < 16; n++) {
4395 for (size_t k = 1; k <= 80; k += 17) {
4396 GemmMicrokernelTester()
4397 .mr(4)
4398 .nr(8)
4399 .kr(2)
4400 .sr(1)
4401 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004402 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004403 .k(k)
4404 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004406 }
4407 }
4408 }
4409
4410 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_gt_8_strided_a) {
4411 TEST_REQUIRES_ARM_NEON;
4412 for (uint32_t n = 9; n < 16; n++) {
4413 for (size_t k = 1; k <= 80; k += 17) {
4414 GemmMicrokernelTester()
4415 .mr(4)
4416 .nr(8)
4417 .kr(2)
4418 .sr(1)
4419 .m(4)
4420 .n(n)
4421 .k(k)
4422 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004424 }
4425 }
4426 }
4427
4428 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
4429 TEST_REQUIRES_ARM_NEON;
4430 for (uint32_t n = 9; n < 16; n++) {
4431 for (size_t k = 1; k <= 80; k += 17) {
4432 for (uint32_t m = 1; m <= 4; m++) {
4433 GemmMicrokernelTester()
4434 .mr(4)
4435 .nr(8)
4436 .kr(2)
4437 .sr(1)
4438 .m(m)
4439 .n(n)
4440 .k(k)
4441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004443 }
4444 }
4445 }
4446 }
4447
4448 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_div_8) {
4449 TEST_REQUIRES_ARM_NEON;
4450 for (uint32_t n = 16; n <= 24; n += 8) {
4451 for (size_t k = 1; k <= 80; k += 17) {
4452 GemmMicrokernelTester()
4453 .mr(4)
4454 .nr(8)
4455 .kr(2)
4456 .sr(1)
4457 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004458 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004461 }
4462 }
4463 }
4464
4465 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
4466 TEST_REQUIRES_ARM_NEON;
4467 for (uint32_t n = 16; n <= 24; n += 8) {
4468 for (size_t k = 1; k <= 80; k += 17) {
4469 GemmMicrokernelTester()
4470 .mr(4)
4471 .nr(8)
4472 .kr(2)
4473 .sr(1)
4474 .m(4)
4475 .n(n)
4476 .k(k)
4477 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004478 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004479 }
4480 }
4481 }
4482
4483 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_div_8_strided_a) {
4484 TEST_REQUIRES_ARM_NEON;
4485 for (uint32_t n = 16; n <= 24; n += 8) {
4486 for (size_t k = 1; k <= 80; k += 17) {
4487 GemmMicrokernelTester()
4488 .mr(4)
4489 .nr(8)
4490 .kr(2)
4491 .sr(1)
4492 .m(4)
4493 .n(n)
4494 .k(k)
4495 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004496 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004497 }
4498 }
4499 }
4500
4501 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
4502 TEST_REQUIRES_ARM_NEON;
4503 for (uint32_t n = 16; n <= 24; n += 8) {
4504 for (size_t k = 1; k <= 80; k += 17) {
4505 for (uint32_t m = 1; m <= 4; m++) {
4506 GemmMicrokernelTester()
4507 .mr(4)
4508 .nr(8)
4509 .kr(2)
4510 .sr(1)
4511 .m(m)
4512 .n(n)
4513 .k(k)
4514 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004515 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004516 }
4517 }
4518 }
4519 }
4520
4521 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
4522 TEST_REQUIRES_ARM_NEON;
4523 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004524 for (uint32_t n = 1; n <= 8; n++) {
4525 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004526 GemmMicrokernelTester()
4527 .mr(4)
4528 .nr(8)
4529 .kr(2)
4530 .sr(1)
4531 .m(m)
4532 .n(n)
4533 .k(k)
4534 .cm_stride(11)
4535 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004536 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004537 }
4538 }
4539 }
4540 }
4541
4542 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, qmin) {
4543 TEST_REQUIRES_ARM_NEON;
4544 GemmMicrokernelTester()
4545 .mr(4)
4546 .nr(8)
4547 .kr(2)
4548 .sr(1)
4549 .m(4)
4550 .n(8)
4551 .k(16)
4552 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004554 }
4555
4556 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, qmax) {
4557 TEST_REQUIRES_ARM_NEON;
4558 GemmMicrokernelTester()
4559 .mr(4)
4560 .nr(8)
4561 .kr(2)
4562 .sr(1)
4563 .m(4)
4564 .n(8)
4565 .k(16)
4566 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004567 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004568 }
4569
4570 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD2R, strided_cm) {
4571 TEST_REQUIRES_ARM_NEON;
4572 GemmMicrokernelTester()
4573 .mr(4)
4574 .nr(8)
4575 .kr(2)
4576 .sr(1)
4577 .m(4)
4578 .n(8)
4579 .k(16)
4580 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004582 }
4583#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4584
4585
4586#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4587 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8) {
4588 TEST_REQUIRES_ARM_NEON;
4589 GemmMicrokernelTester()
4590 .mr(3)
4591 .nr(8)
4592 .kr(2)
4593 .sr(1)
4594 .m(3)
4595 .n(8)
4596 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004598 }
4599
4600 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, strided_cn) {
4601 TEST_REQUIRES_ARM_NEON;
4602 GemmMicrokernelTester()
4603 .mr(3)
4604 .nr(8)
4605 .kr(2)
4606 .sr(1)
4607 .m(3)
4608 .n(8)
4609 .k(8)
4610 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004611 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004612 }
4613
4614 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_strided_a) {
4615 TEST_REQUIRES_ARM_NEON;
4616 GemmMicrokernelTester()
4617 .mr(3)
4618 .nr(8)
4619 .kr(2)
4620 .sr(1)
4621 .m(3)
4622 .n(8)
4623 .k(8)
4624 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004625 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004626 }
4627
4628 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_subtile) {
4629 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004630 for (uint32_t n = 1; n <= 8; n++) {
4631 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004632 GemmMicrokernelTester()
4633 .mr(3)
4634 .nr(8)
4635 .kr(2)
4636 .sr(1)
4637 .m(m)
4638 .n(n)
4639 .k(8)
4640 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004642 }
4643 }
4644 }
4645
4646 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
4647 TEST_REQUIRES_ARM_NEON;
4648 for (uint32_t m = 1; m <= 3; m++) {
4649 GemmMicrokernelTester()
4650 .mr(3)
4651 .nr(8)
4652 .kr(2)
4653 .sr(1)
4654 .m(m)
4655 .n(8)
4656 .k(8)
4657 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004658 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004659 }
4660 }
4661
4662 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
4663 TEST_REQUIRES_ARM_NEON;
4664 for (uint32_t n = 1; n <= 8; n++) {
4665 GemmMicrokernelTester()
4666 .mr(3)
4667 .nr(8)
4668 .kr(2)
4669 .sr(1)
4670 .m(3)
4671 .n(n)
4672 .k(8)
4673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004675 }
4676 }
4677
4678 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_lt_8) {
4679 TEST_REQUIRES_ARM_NEON;
4680 for (size_t k = 1; k < 8; k++) {
4681 GemmMicrokernelTester()
4682 .mr(3)
4683 .nr(8)
4684 .kr(2)
4685 .sr(1)
4686 .m(3)
4687 .n(8)
4688 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004690 }
4691 }
4692
4693 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_lt_8_strided_a) {
4694 TEST_REQUIRES_ARM_NEON;
4695 for (size_t k = 1; k < 8; k++) {
4696 GemmMicrokernelTester()
4697 .mr(3)
4698 .nr(8)
4699 .kr(2)
4700 .sr(1)
4701 .m(3)
4702 .n(8)
4703 .k(k)
4704 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004706 }
4707 }
4708
4709 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_lt_8_subtile) {
4710 TEST_REQUIRES_ARM_NEON;
4711 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004712 for (uint32_t n = 1; n <= 8; n++) {
4713 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004714 GemmMicrokernelTester()
4715 .mr(3)
4716 .nr(8)
4717 .kr(2)
4718 .sr(1)
4719 .m(m)
4720 .n(n)
4721 .k(k)
4722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004724 }
4725 }
4726 }
4727 }
4728
4729 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_gt_8) {
4730 TEST_REQUIRES_ARM_NEON;
4731 for (size_t k = 9; k < 16; k++) {
4732 GemmMicrokernelTester()
4733 .mr(3)
4734 .nr(8)
4735 .kr(2)
4736 .sr(1)
4737 .m(3)
4738 .n(8)
4739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004741 }
4742 }
4743
4744 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_gt_8_strided_a) {
4745 TEST_REQUIRES_ARM_NEON;
4746 for (size_t k = 9; k < 16; k++) {
4747 GemmMicrokernelTester()
4748 .mr(3)
4749 .nr(8)
4750 .kr(2)
4751 .sr(1)
4752 .m(3)
4753 .n(8)
4754 .k(k)
4755 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004757 }
4758 }
4759
4760 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_gt_8_subtile) {
4761 TEST_REQUIRES_ARM_NEON;
4762 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004763 for (uint32_t n = 1; n <= 8; n++) {
4764 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004765 GemmMicrokernelTester()
4766 .mr(3)
4767 .nr(8)
4768 .kr(2)
4769 .sr(1)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004775 }
4776 }
4777 }
4778 }
4779
4780 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_div_8) {
4781 TEST_REQUIRES_ARM_NEON;
4782 for (size_t k = 16; k <= 80; k += 8) {
4783 GemmMicrokernelTester()
4784 .mr(3)
4785 .nr(8)
4786 .kr(2)
4787 .sr(1)
4788 .m(3)
4789 .n(8)
4790 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004792 }
4793 }
4794
4795 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_div_8_strided_a) {
4796 TEST_REQUIRES_ARM_NEON;
4797 for (size_t k = 16; k <= 80; k += 8) {
4798 GemmMicrokernelTester()
4799 .mr(3)
4800 .nr(8)
4801 .kr(2)
4802 .sr(1)
4803 .m(3)
4804 .n(8)
4805 .k(k)
4806 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004808 }
4809 }
4810
4811 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_div_8_subtile) {
4812 TEST_REQUIRES_ARM_NEON;
4813 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004814 for (uint32_t n = 1; n <= 8; n++) {
4815 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004816 GemmMicrokernelTester()
4817 .mr(3)
4818 .nr(8)
4819 .kr(2)
4820 .sr(1)
4821 .m(m)
4822 .n(n)
4823 .k(k)
4824 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004826 }
4827 }
4828 }
4829 }
4830
4831 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8) {
4832 TEST_REQUIRES_ARM_NEON;
4833 for (uint32_t n = 9; n < 16; n++) {
4834 for (size_t k = 1; k <= 40; k += 9) {
4835 GemmMicrokernelTester()
4836 .mr(3)
4837 .nr(8)
4838 .kr(2)
4839 .sr(1)
4840 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004841 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004842 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004844 }
4845 }
4846 }
4847
4848 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) {
4849 TEST_REQUIRES_ARM_NEON;
4850 for (uint32_t n = 9; n < 16; n++) {
4851 for (size_t k = 1; k <= 40; k += 9) {
4852 GemmMicrokernelTester()
4853 .mr(3)
4854 .nr(8)
4855 .kr(2)
4856 .sr(1)
4857 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004858 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004859 .k(k)
4860 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004862 }
4863 }
4864 }
4865
4866 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8_strided_a) {
4867 TEST_REQUIRES_ARM_NEON;
4868 for (uint32_t n = 9; n < 16; n++) {
4869 for (size_t k = 1; k <= 40; k += 9) {
4870 GemmMicrokernelTester()
4871 .mr(3)
4872 .nr(8)
4873 .kr(2)
4874 .sr(1)
4875 .m(3)
4876 .n(n)
4877 .k(k)
4878 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004880 }
4881 }
4882 }
4883
4884 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8_subtile) {
4885 TEST_REQUIRES_ARM_NEON;
4886 for (uint32_t n = 9; n < 16; n++) {
4887 for (size_t k = 1; k <= 40; k += 9) {
4888 for (uint32_t m = 1; m <= 3; m++) {
4889 GemmMicrokernelTester()
4890 .mr(3)
4891 .nr(8)
4892 .kr(2)
4893 .sr(1)
4894 .m(m)
4895 .n(n)
4896 .k(k)
4897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004899 }
4900 }
4901 }
4902 }
4903
4904 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8) {
4905 TEST_REQUIRES_ARM_NEON;
4906 for (uint32_t n = 16; n <= 24; n += 8) {
4907 for (size_t k = 1; k <= 40; k += 9) {
4908 GemmMicrokernelTester()
4909 .mr(3)
4910 .nr(8)
4911 .kr(2)
4912 .sr(1)
4913 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004914 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004917 }
4918 }
4919 }
4920
4921 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) {
4922 TEST_REQUIRES_ARM_NEON;
4923 for (uint32_t n = 16; n <= 24; n += 8) {
4924 for (size_t k = 1; k <= 40; k += 9) {
4925 GemmMicrokernelTester()
4926 .mr(3)
4927 .nr(8)
4928 .kr(2)
4929 .sr(1)
4930 .m(3)
4931 .n(n)
4932 .k(k)
4933 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004934 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004935 }
4936 }
4937 }
4938
4939 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8_strided_a) {
4940 TEST_REQUIRES_ARM_NEON;
4941 for (uint32_t n = 16; n <= 24; n += 8) {
4942 for (size_t k = 1; k <= 40; k += 9) {
4943 GemmMicrokernelTester()
4944 .mr(3)
4945 .nr(8)
4946 .kr(2)
4947 .sr(1)
4948 .m(3)
4949 .n(n)
4950 .k(k)
4951 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004952 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004953 }
4954 }
4955 }
4956
4957 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8_subtile) {
4958 TEST_REQUIRES_ARM_NEON;
4959 for (uint32_t n = 16; n <= 24; n += 8) {
4960 for (size_t k = 1; k <= 40; k += 9) {
4961 for (uint32_t m = 1; m <= 3; m++) {
4962 GemmMicrokernelTester()
4963 .mr(3)
4964 .nr(8)
4965 .kr(2)
4966 .sr(1)
4967 .m(m)
4968 .n(n)
4969 .k(k)
4970 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004971 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004972 }
4973 }
4974 }
4975 }
4976
4977 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, strided_cm_subtile) {
4978 TEST_REQUIRES_ARM_NEON;
4979 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004980 for (uint32_t n = 1; n <= 8; n++) {
4981 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004982 GemmMicrokernelTester()
4983 .mr(3)
4984 .nr(8)
4985 .kr(2)
4986 .sr(1)
4987 .m(m)
4988 .n(n)
4989 .k(k)
4990 .cm_stride(11)
4991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004992 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004993 }
4994 }
4995 }
4996 }
4997
4998 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, qmin) {
4999 TEST_REQUIRES_ARM_NEON;
5000 GemmMicrokernelTester()
5001 .mr(3)
5002 .nr(8)
5003 .kr(2)
5004 .sr(1)
5005 .m(3)
5006 .n(8)
5007 .k(8)
5008 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005010 }
5011
5012 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, qmax) {
5013 TEST_REQUIRES_ARM_NEON;
5014 GemmMicrokernelTester()
5015 .mr(3)
5016 .nr(8)
5017 .kr(2)
5018 .sr(1)
5019 .m(3)
5020 .n(8)
5021 .k(8)
5022 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005023 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005024 }
5025
5026 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, strided_cm) {
5027 TEST_REQUIRES_ARM_NEON;
5028 GemmMicrokernelTester()
5029 .mr(3)
5030 .nr(8)
5031 .kr(2)
5032 .sr(1)
5033 .m(3)
5034 .n(8)
5035 .k(8)
5036 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005038 }
5039#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5040
5041
5042#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5043 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8) {
5044 TEST_REQUIRES_ARM_NEON;
5045 GemmMicrokernelTester()
5046 .mr(4)
5047 .nr(8)
5048 .kr(2)
5049 .sr(1)
5050 .m(4)
5051 .n(8)
5052 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005054 }
5055
5056 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, strided_cn) {
5057 TEST_REQUIRES_ARM_NEON;
5058 GemmMicrokernelTester()
5059 .mr(4)
5060 .nr(8)
5061 .kr(2)
5062 .sr(1)
5063 .m(4)
5064 .n(8)
5065 .k(8)
5066 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005067 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005068 }
5069
5070 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_strided_a) {
5071 TEST_REQUIRES_ARM_NEON;
5072 GemmMicrokernelTester()
5073 .mr(4)
5074 .nr(8)
5075 .kr(2)
5076 .sr(1)
5077 .m(4)
5078 .n(8)
5079 .k(8)
5080 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005081 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005082 }
5083
5084 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_subtile) {
5085 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005086 for (uint32_t n = 1; n <= 8; n++) {
5087 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005088 GemmMicrokernelTester()
5089 .mr(4)
5090 .nr(8)
5091 .kr(2)
5092 .sr(1)
5093 .m(m)
5094 .n(n)
5095 .k(8)
5096 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005098 }
5099 }
5100 }
5101
5102 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
5103 TEST_REQUIRES_ARM_NEON;
5104 for (uint32_t m = 1; m <= 4; m++) {
5105 GemmMicrokernelTester()
5106 .mr(4)
5107 .nr(8)
5108 .kr(2)
5109 .sr(1)
5110 .m(m)
5111 .n(8)
5112 .k(8)
5113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005115 }
5116 }
5117
5118 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
5119 TEST_REQUIRES_ARM_NEON;
5120 for (uint32_t n = 1; n <= 8; n++) {
5121 GemmMicrokernelTester()
5122 .mr(4)
5123 .nr(8)
5124 .kr(2)
5125 .sr(1)
5126 .m(4)
5127 .n(n)
5128 .k(8)
5129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005131 }
5132 }
5133
5134 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_lt_8) {
5135 TEST_REQUIRES_ARM_NEON;
5136 for (size_t k = 1; k < 8; k++) {
5137 GemmMicrokernelTester()
5138 .mr(4)
5139 .nr(8)
5140 .kr(2)
5141 .sr(1)
5142 .m(4)
5143 .n(8)
5144 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005146 }
5147 }
5148
5149 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_lt_8_strided_a) {
5150 TEST_REQUIRES_ARM_NEON;
5151 for (size_t k = 1; k < 8; k++) {
5152 GemmMicrokernelTester()
5153 .mr(4)
5154 .nr(8)
5155 .kr(2)
5156 .sr(1)
5157 .m(4)
5158 .n(8)
5159 .k(k)
5160 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005162 }
5163 }
5164
5165 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_lt_8_subtile) {
5166 TEST_REQUIRES_ARM_NEON;
5167 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005168 for (uint32_t n = 1; n <= 8; n++) {
5169 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005170 GemmMicrokernelTester()
5171 .mr(4)
5172 .nr(8)
5173 .kr(2)
5174 .sr(1)
5175 .m(m)
5176 .n(n)
5177 .k(k)
5178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005180 }
5181 }
5182 }
5183 }
5184
5185 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_gt_8) {
5186 TEST_REQUIRES_ARM_NEON;
5187 for (size_t k = 9; k < 16; k++) {
5188 GemmMicrokernelTester()
5189 .mr(4)
5190 .nr(8)
5191 .kr(2)
5192 .sr(1)
5193 .m(4)
5194 .n(8)
5195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005197 }
5198 }
5199
5200 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_gt_8_strided_a) {
5201 TEST_REQUIRES_ARM_NEON;
5202 for (size_t k = 9; k < 16; k++) {
5203 GemmMicrokernelTester()
5204 .mr(4)
5205 .nr(8)
5206 .kr(2)
5207 .sr(1)
5208 .m(4)
5209 .n(8)
5210 .k(k)
5211 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005213 }
5214 }
5215
5216 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_gt_8_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005217 TEST_REQUIRES_ARM_NEON;
5218 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005219 for (uint32_t n = 1; n <= 8; n++) {
5220 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005221 GemmMicrokernelTester()
5222 .mr(4)
5223 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005224 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005225 .sr(1)
5226 .m(m)
5227 .n(n)
5228 .k(k)
5229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005231 }
5232 }
5233 }
5234 }
5235
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005236 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_div_8) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005237 TEST_REQUIRES_ARM_NEON;
5238 for (size_t k = 16; k <= 80; k += 8) {
5239 GemmMicrokernelTester()
5240 .mr(4)
5241 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005242 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005243 .sr(1)
5244 .m(4)
5245 .n(8)
5246 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005248 }
5249 }
5250
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_div_8_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005252 TEST_REQUIRES_ARM_NEON;
5253 for (size_t k = 16; k <= 80; k += 8) {
5254 GemmMicrokernelTester()
5255 .mr(4)
5256 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005257 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005258 .sr(1)
5259 .m(4)
5260 .n(8)
5261 .k(k)
5262 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005264 }
5265 }
5266
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005267 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_div_8_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005268 TEST_REQUIRES_ARM_NEON;
5269 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005270 for (uint32_t n = 1; n <= 8; n++) {
5271 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005272 GemmMicrokernelTester()
5273 .mr(4)
5274 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005275 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005276 .sr(1)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005282 }
5283 }
5284 }
5285 }
5286
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005287 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005288 TEST_REQUIRES_ARM_NEON;
5289 for (uint32_t n = 9; n < 16; n++) {
5290 for (size_t k = 1; k <= 40; k += 9) {
5291 GemmMicrokernelTester()
5292 .mr(4)
5293 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005294 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005295 .sr(1)
5296 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005297 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005298 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005300 }
5301 }
5302 }
5303
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005304 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005305 TEST_REQUIRES_ARM_NEON;
5306 for (uint32_t n = 9; n < 16; n++) {
5307 for (size_t k = 1; k <= 40; k += 9) {
5308 GemmMicrokernelTester()
5309 .mr(4)
5310 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005311 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005312 .sr(1)
5313 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005314 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005315 .k(k)
5316 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005318 }
5319 }
5320 }
5321
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005322 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005323 TEST_REQUIRES_ARM_NEON;
5324 for (uint32_t n = 9; n < 16; n++) {
5325 for (size_t k = 1; k <= 40; k += 9) {
5326 GemmMicrokernelTester()
5327 .mr(4)
5328 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005329 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005330 .sr(1)
5331 .m(4)
5332 .n(n)
5333 .k(k)
5334 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005336 }
5337 }
5338 }
5339
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005340 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005341 TEST_REQUIRES_ARM_NEON;
5342 for (uint32_t n = 9; n < 16; n++) {
5343 for (size_t k = 1; k <= 40; k += 9) {
5344 for (uint32_t m = 1; m <= 4; m++) {
5345 GemmMicrokernelTester()
5346 .mr(4)
5347 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005348 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005349 .sr(1)
5350 .m(m)
5351 .n(n)
5352 .k(k)
5353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005355 }
5356 }
5357 }
5358 }
5359
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005360 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005361 TEST_REQUIRES_ARM_NEON;
5362 for (uint32_t n = 16; n <= 24; n += 8) {
5363 for (size_t k = 1; k <= 40; k += 9) {
5364 GemmMicrokernelTester()
5365 .mr(4)
5366 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005367 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005368 .sr(1)
5369 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005370 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005373 }
5374 }
5375 }
5376
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005377 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005378 TEST_REQUIRES_ARM_NEON;
5379 for (uint32_t n = 16; n <= 24; n += 8) {
5380 for (size_t k = 1; k <= 40; k += 9) {
5381 GemmMicrokernelTester()
5382 .mr(4)
5383 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005384 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005385 .sr(1)
5386 .m(4)
5387 .n(n)
5388 .k(k)
5389 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005390 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005391 }
5392 }
5393 }
5394
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005395 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005396 TEST_REQUIRES_ARM_NEON;
5397 for (uint32_t n = 16; n <= 24; n += 8) {
5398 for (size_t k = 1; k <= 40; k += 9) {
5399 GemmMicrokernelTester()
5400 .mr(4)
5401 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005402 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005403 .sr(1)
5404 .m(4)
5405 .n(n)
5406 .k(k)
5407 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005408 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005409 }
5410 }
5411 }
5412
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005413 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005414 TEST_REQUIRES_ARM_NEON;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 40; k += 9) {
5417 for (uint32_t m = 1; m <= 4; m++) {
5418 GemmMicrokernelTester()
5419 .mr(4)
5420 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005421 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005422 .sr(1)
5423 .m(m)
5424 .n(n)
5425 .k(k)
5426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005427 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005428 }
5429 }
5430 }
5431 }
5432
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005433 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, strided_cm_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005434 TEST_REQUIRES_ARM_NEON;
5435 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005436 for (uint32_t n = 1; n <= 8; n++) {
5437 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005438 GemmMicrokernelTester()
5439 .mr(4)
5440 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005441 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005442 .sr(1)
5443 .m(m)
5444 .n(n)
5445 .k(k)
5446 .cm_stride(11)
5447 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005448 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005449 }
5450 }
5451 }
5452 }
5453
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005454 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, qmin) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005455 TEST_REQUIRES_ARM_NEON;
5456 GemmMicrokernelTester()
5457 .mr(4)
5458 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005459 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005460 .sr(1)
5461 .m(4)
5462 .n(8)
5463 .k(8)
5464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005466 }
5467
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005468 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, qmax) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005469 TEST_REQUIRES_ARM_NEON;
5470 GemmMicrokernelTester()
5471 .mr(4)
5472 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005473 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005474 .sr(1)
5475 .m(4)
5476 .n(8)
5477 .k(8)
5478 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005479 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005480 }
5481
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005482 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, strided_cm) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08005483 TEST_REQUIRES_ARM_NEON;
5484 GemmMicrokernelTester()
5485 .mr(4)
5486 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005487 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08005488 .sr(1)
5489 .m(4)
5490 .n(8)
5491 .k(8)
5492 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08005494 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005495#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde4d3f762021-12-23 15:31:43 -08005496
5497
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005498#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5499 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8) {
5500 TEST_REQUIRES_ARM_NEON;
5501 GemmMicrokernelTester()
5502 .mr(3)
5503 .nr(16)
5504 .kr(2)
5505 .sr(1)
5506 .m(3)
5507 .n(16)
5508 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005510 }
5511
5512 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cn) {
5513 TEST_REQUIRES_ARM_NEON;
5514 GemmMicrokernelTester()
5515 .mr(3)
5516 .nr(16)
5517 .kr(2)
5518 .sr(1)
5519 .m(3)
5520 .n(16)
5521 .k(8)
5522 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005523 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005524 }
5525
5526 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_strided_a) {
5527 TEST_REQUIRES_ARM_NEON;
5528 GemmMicrokernelTester()
5529 .mr(3)
5530 .nr(16)
5531 .kr(2)
5532 .sr(1)
5533 .m(3)
5534 .n(16)
5535 .k(8)
5536 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005538 }
5539
5540 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
5541 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005542 for (uint32_t n = 1; n <= 16; n++) {
5543 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005544 GemmMicrokernelTester()
5545 .mr(3)
5546 .nr(16)
5547 .kr(2)
5548 .sr(1)
5549 .m(m)
5550 .n(n)
5551 .k(8)
5552 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005554 }
5555 }
5556 }
5557
5558 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
5559 TEST_REQUIRES_ARM_NEON;
5560 for (uint32_t m = 1; m <= 3; m++) {
5561 GemmMicrokernelTester()
5562 .mr(3)
5563 .nr(16)
5564 .kr(2)
5565 .sr(1)
5566 .m(m)
5567 .n(16)
5568 .k(8)
5569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005571 }
5572 }
5573
5574 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
5575 TEST_REQUIRES_ARM_NEON;
5576 for (uint32_t n = 1; n <= 16; n++) {
5577 GemmMicrokernelTester()
5578 .mr(3)
5579 .nr(16)
5580 .kr(2)
5581 .sr(1)
5582 .m(3)
5583 .n(n)
5584 .k(8)
5585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005587 }
5588 }
5589
5590 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8) {
5591 TEST_REQUIRES_ARM_NEON;
5592 for (size_t k = 1; k < 8; k++) {
5593 GemmMicrokernelTester()
5594 .mr(3)
5595 .nr(16)
5596 .kr(2)
5597 .sr(1)
5598 .m(3)
5599 .n(16)
5600 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005602 }
5603 }
5604
5605 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8_strided_a) {
5606 TEST_REQUIRES_ARM_NEON;
5607 for (size_t k = 1; k < 8; k++) {
5608 GemmMicrokernelTester()
5609 .mr(3)
5610 .nr(16)
5611 .kr(2)
5612 .sr(1)
5613 .m(3)
5614 .n(16)
5615 .k(k)
5616 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005618 }
5619 }
5620
5621 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
5622 TEST_REQUIRES_ARM_NEON;
5623 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005624 for (uint32_t n = 1; n <= 16; n++) {
5625 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005626 GemmMicrokernelTester()
5627 .mr(3)
5628 .nr(16)
5629 .kr(2)
5630 .sr(1)
5631 .m(m)
5632 .n(n)
5633 .k(k)
5634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005636 }
5637 }
5638 }
5639 }
5640
5641 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8) {
5642 TEST_REQUIRES_ARM_NEON;
5643 for (size_t k = 9; k < 16; k++) {
5644 GemmMicrokernelTester()
5645 .mr(3)
5646 .nr(16)
5647 .kr(2)
5648 .sr(1)
5649 .m(3)
5650 .n(16)
5651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005653 }
5654 }
5655
5656 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8_strided_a) {
5657 TEST_REQUIRES_ARM_NEON;
5658 for (size_t k = 9; k < 16; k++) {
5659 GemmMicrokernelTester()
5660 .mr(3)
5661 .nr(16)
5662 .kr(2)
5663 .sr(1)
5664 .m(3)
5665 .n(16)
5666 .k(k)
5667 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005669 }
5670 }
5671
5672 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
5673 TEST_REQUIRES_ARM_NEON;
5674 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005675 for (uint32_t n = 1; n <= 16; n++) {
5676 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005677 GemmMicrokernelTester()
5678 .mr(3)
5679 .nr(16)
5680 .kr(2)
5681 .sr(1)
5682 .m(m)
5683 .n(n)
5684 .k(k)
5685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005687 }
5688 }
5689 }
5690 }
5691
5692 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8) {
5693 TEST_REQUIRES_ARM_NEON;
5694 for (size_t k = 16; k <= 80; k += 8) {
5695 GemmMicrokernelTester()
5696 .mr(3)
5697 .nr(16)
5698 .kr(2)
5699 .sr(1)
5700 .m(3)
5701 .n(16)
5702 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005704 }
5705 }
5706
5707 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8_strided_a) {
5708 TEST_REQUIRES_ARM_NEON;
5709 for (size_t k = 16; k <= 80; k += 8) {
5710 GemmMicrokernelTester()
5711 .mr(3)
5712 .nr(16)
5713 .kr(2)
5714 .sr(1)
5715 .m(3)
5716 .n(16)
5717 .k(k)
5718 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005720 }
5721 }
5722
5723 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
5724 TEST_REQUIRES_ARM_NEON;
5725 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005726 for (uint32_t n = 1; n <= 16; n++) {
5727 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005728 GemmMicrokernelTester()
5729 .mr(3)
5730 .nr(16)
5731 .kr(2)
5732 .sr(1)
5733 .m(m)
5734 .n(n)
5735 .k(k)
5736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005738 }
5739 }
5740 }
5741 }
5742
5743 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16) {
5744 TEST_REQUIRES_ARM_NEON;
5745 for (uint32_t n = 17; n < 32; n++) {
5746 for (size_t k = 1; k <= 40; k += 9) {
5747 GemmMicrokernelTester()
5748 .mr(3)
5749 .nr(16)
5750 .kr(2)
5751 .sr(1)
5752 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005753 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005754 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005756 }
5757 }
5758 }
5759
5760 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
5761 TEST_REQUIRES_ARM_NEON;
5762 for (uint32_t n = 17; n < 32; n++) {
5763 for (size_t k = 1; k <= 40; k += 9) {
5764 GemmMicrokernelTester()
5765 .mr(3)
5766 .nr(16)
5767 .kr(2)
5768 .sr(1)
5769 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005770 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005771 .k(k)
5772 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005774 }
5775 }
5776 }
5777
5778 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_strided_a) {
5779 TEST_REQUIRES_ARM_NEON;
5780 for (uint32_t n = 17; n < 32; n++) {
5781 for (size_t k = 1; k <= 40; k += 9) {
5782 GemmMicrokernelTester()
5783 .mr(3)
5784 .nr(16)
5785 .kr(2)
5786 .sr(1)
5787 .m(3)
5788 .n(n)
5789 .k(k)
5790 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005792 }
5793 }
5794 }
5795
5796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
5797 TEST_REQUIRES_ARM_NEON;
5798 for (uint32_t n = 17; n < 32; n++) {
5799 for (size_t k = 1; k <= 40; k += 9) {
5800 for (uint32_t m = 1; m <= 3; m++) {
5801 GemmMicrokernelTester()
5802 .mr(3)
5803 .nr(16)
5804 .kr(2)
5805 .sr(1)
5806 .m(m)
5807 .n(n)
5808 .k(k)
5809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005811 }
5812 }
5813 }
5814 }
5815
5816 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16) {
5817 TEST_REQUIRES_ARM_NEON;
5818 for (uint32_t n = 32; n <= 48; n += 16) {
5819 for (size_t k = 1; k <= 40; k += 9) {
5820 GemmMicrokernelTester()
5821 .mr(3)
5822 .nr(16)
5823 .kr(2)
5824 .sr(1)
5825 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005826 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005829 }
5830 }
5831 }
5832
5833 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
5834 TEST_REQUIRES_ARM_NEON;
5835 for (uint32_t n = 32; n <= 48; n += 16) {
5836 for (size_t k = 1; k <= 40; k += 9) {
5837 GemmMicrokernelTester()
5838 .mr(3)
5839 .nr(16)
5840 .kr(2)
5841 .sr(1)
5842 .m(3)
5843 .n(n)
5844 .k(k)
5845 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005846 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005847 }
5848 }
5849 }
5850
5851 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_strided_a) {
5852 TEST_REQUIRES_ARM_NEON;
5853 for (uint32_t n = 32; n <= 48; n += 16) {
5854 for (size_t k = 1; k <= 40; k += 9) {
5855 GemmMicrokernelTester()
5856 .mr(3)
5857 .nr(16)
5858 .kr(2)
5859 .sr(1)
5860 .m(3)
5861 .n(n)
5862 .k(k)
5863 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005864 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005865 }
5866 }
5867 }
5868
5869 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
5870 TEST_REQUIRES_ARM_NEON;
5871 for (uint32_t n = 32; n <= 48; n += 16) {
5872 for (size_t k = 1; k <= 40; k += 9) {
5873 for (uint32_t m = 1; m <= 3; m++) {
5874 GemmMicrokernelTester()
5875 .mr(3)
5876 .nr(16)
5877 .kr(2)
5878 .sr(1)
5879 .m(m)
5880 .n(n)
5881 .k(k)
5882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005883 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005884 }
5885 }
5886 }
5887 }
5888
5889 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
5890 TEST_REQUIRES_ARM_NEON;
5891 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005892 for (uint32_t n = 1; n <= 16; n++) {
5893 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005894 GemmMicrokernelTester()
5895 .mr(3)
5896 .nr(16)
5897 .kr(2)
5898 .sr(1)
5899 .m(m)
5900 .n(n)
5901 .k(k)
5902 .cm_stride(19)
5903 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005904 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005905 }
5906 }
5907 }
5908 }
5909
5910 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, qmin) {
5911 TEST_REQUIRES_ARM_NEON;
5912 GemmMicrokernelTester()
5913 .mr(3)
5914 .nr(16)
5915 .kr(2)
5916 .sr(1)
5917 .m(3)
5918 .n(16)
5919 .k(8)
5920 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005922 }
5923
5924 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, qmax) {
5925 TEST_REQUIRES_ARM_NEON;
5926 GemmMicrokernelTester()
5927 .mr(3)
5928 .nr(16)
5929 .kr(2)
5930 .sr(1)
5931 .m(3)
5932 .n(16)
5933 .k(8)
5934 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005935 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005936 }
5937
5938 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cm) {
5939 TEST_REQUIRES_ARM_NEON;
5940 GemmMicrokernelTester()
5941 .mr(3)
5942 .nr(16)
5943 .kr(2)
5944 .sr(1)
5945 .m(3)
5946 .n(16)
5947 .k(8)
5948 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005950 }
5951#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5952
5953
5954#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5955 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
5956 TEST_REQUIRES_ARM_NEON;
5957 GemmMicrokernelTester()
5958 .mr(1)
5959 .nr(8)
5960 .kr(2)
5961 .sr(1)
5962 .m(1)
5963 .n(8)
5964 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005966 }
5967
5968 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cn) {
5969 TEST_REQUIRES_ARM_NEON;
5970 GemmMicrokernelTester()
5971 .mr(1)
5972 .nr(8)
5973 .kr(2)
5974 .sr(1)
5975 .m(1)
5976 .n(8)
5977 .k(16)
5978 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005979 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005980 }
5981
5982 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_strided_a) {
5983 TEST_REQUIRES_ARM_NEON;
5984 GemmMicrokernelTester()
5985 .mr(1)
5986 .nr(8)
5987 .kr(2)
5988 .sr(1)
5989 .m(1)
5990 .n(8)
5991 .k(16)
5992 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005994 }
5995
5996 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5997 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005998 for (uint32_t n = 1; n <= 8; n++) {
5999 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006000 GemmMicrokernelTester()
6001 .mr(1)
6002 .nr(8)
6003 .kr(2)
6004 .sr(1)
6005 .m(m)
6006 .n(n)
6007 .k(16)
6008 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006010 }
6011 }
6012 }
6013
6014 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
6015 TEST_REQUIRES_ARM_NEON;
6016 for (uint32_t m = 1; m <= 1; m++) {
6017 GemmMicrokernelTester()
6018 .mr(1)
6019 .nr(8)
6020 .kr(2)
6021 .sr(1)
6022 .m(m)
6023 .n(8)
6024 .k(16)
6025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006027 }
6028 }
6029
6030 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
6031 TEST_REQUIRES_ARM_NEON;
6032 for (uint32_t n = 1; n <= 8; n++) {
6033 GemmMicrokernelTester()
6034 .mr(1)
6035 .nr(8)
6036 .kr(2)
6037 .sr(1)
6038 .m(1)
6039 .n(n)
6040 .k(16)
6041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006043 }
6044 }
6045
6046 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
6047 TEST_REQUIRES_ARM_NEON;
6048 for (size_t k = 1; k < 16; k++) {
6049 GemmMicrokernelTester()
6050 .mr(1)
6051 .nr(8)
6052 .kr(2)
6053 .sr(1)
6054 .m(1)
6055 .n(8)
6056 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006058 }
6059 }
6060
6061 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16_strided_a) {
6062 TEST_REQUIRES_ARM_NEON;
6063 for (size_t k = 1; k < 16; k++) {
6064 GemmMicrokernelTester()
6065 .mr(1)
6066 .nr(8)
6067 .kr(2)
6068 .sr(1)
6069 .m(1)
6070 .n(8)
6071 .k(k)
6072 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006074 }
6075 }
6076
6077 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
6078 TEST_REQUIRES_ARM_NEON;
6079 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006080 for (uint32_t n = 1; n <= 8; n++) {
6081 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006082 GemmMicrokernelTester()
6083 .mr(1)
6084 .nr(8)
6085 .kr(2)
6086 .sr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006092 }
6093 }
6094 }
6095 }
6096
6097 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
6098 TEST_REQUIRES_ARM_NEON;
6099 for (size_t k = 17; k < 32; k++) {
6100 GemmMicrokernelTester()
6101 .mr(1)
6102 .nr(8)
6103 .kr(2)
6104 .sr(1)
6105 .m(1)
6106 .n(8)
6107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006109 }
6110 }
6111
6112 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16_strided_a) {
6113 TEST_REQUIRES_ARM_NEON;
6114 for (size_t k = 17; k < 32; k++) {
6115 GemmMicrokernelTester()
6116 .mr(1)
6117 .nr(8)
6118 .kr(2)
6119 .sr(1)
6120 .m(1)
6121 .n(8)
6122 .k(k)
6123 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08006124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006125 }
6126 }
6127
6128 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
6129 TEST_REQUIRES_ARM_NEON;
6130 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006131 for (uint32_t n = 1; n <= 8; n++) {
6132 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006133 GemmMicrokernelTester()
6134 .mr(1)
6135 .nr(8)
6136 .kr(2)
6137 .sr(1)
6138 .m(m)
6139 .n(n)
6140 .k(k)
6141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006143 }
6144 }
6145 }
6146 }
6147
6148 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16) {
6149 TEST_REQUIRES_ARM_NEON;
6150 for (size_t k = 32; k <= 160; k += 16) {
6151 GemmMicrokernelTester()
6152 .mr(1)
6153 .nr(8)
6154 .kr(2)
6155 .sr(1)
6156 .m(1)
6157 .n(8)
6158 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006160 }
6161 }
6162
6163 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16_strided_a) {
6164 TEST_REQUIRES_ARM_NEON;
6165 for (size_t k = 32; k <= 160; k += 16) {
6166 GemmMicrokernelTester()
6167 .mr(1)
6168 .nr(8)
6169 .kr(2)
6170 .sr(1)
6171 .m(1)
6172 .n(8)
6173 .k(k)
6174 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006176 }
6177 }
6178
6179 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
6180 TEST_REQUIRES_ARM_NEON;
6181 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006182 for (uint32_t n = 1; n <= 8; n++) {
6183 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006184 GemmMicrokernelTester()
6185 .mr(1)
6186 .nr(8)
6187 .kr(2)
6188 .sr(1)
6189 .m(m)
6190 .n(n)
6191 .k(k)
6192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006194 }
6195 }
6196 }
6197 }
6198
6199 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
6200 TEST_REQUIRES_ARM_NEON;
6201 for (uint32_t n = 9; n < 16; n++) {
6202 for (size_t k = 1; k <= 80; k += 17) {
6203 GemmMicrokernelTester()
6204 .mr(1)
6205 .nr(8)
6206 .kr(2)
6207 .sr(1)
6208 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006209 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006210 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006212 }
6213 }
6214 }
6215
6216 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
6217 TEST_REQUIRES_ARM_NEON;
6218 for (uint32_t n = 9; n < 16; n++) {
6219 for (size_t k = 1; k <= 80; k += 17) {
6220 GemmMicrokernelTester()
6221 .mr(1)
6222 .nr(8)
6223 .kr(2)
6224 .sr(1)
6225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006226 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006227 .k(k)
6228 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006230 }
6231 }
6232 }
6233
6234 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_a) {
6235 TEST_REQUIRES_ARM_NEON;
6236 for (uint32_t n = 9; n < 16; n++) {
6237 for (size_t k = 1; k <= 80; k += 17) {
6238 GemmMicrokernelTester()
6239 .mr(1)
6240 .nr(8)
6241 .kr(2)
6242 .sr(1)
6243 .m(1)
6244 .n(n)
6245 .k(k)
6246 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006248 }
6249 }
6250 }
6251
6252 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
6253 TEST_REQUIRES_ARM_NEON;
6254 for (uint32_t n = 9; n < 16; n++) {
6255 for (size_t k = 1; k <= 80; k += 17) {
6256 for (uint32_t m = 1; m <= 1; m++) {
6257 GemmMicrokernelTester()
6258 .mr(1)
6259 .nr(8)
6260 .kr(2)
6261 .sr(1)
6262 .m(m)
6263 .n(n)
6264 .k(k)
6265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006267 }
6268 }
6269 }
6270 }
6271
6272 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8) {
6273 TEST_REQUIRES_ARM_NEON;
6274 for (uint32_t n = 16; n <= 24; n += 8) {
6275 for (size_t k = 1; k <= 80; k += 17) {
6276 GemmMicrokernelTester()
6277 .mr(1)
6278 .nr(8)
6279 .kr(2)
6280 .sr(1)
6281 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006282 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006283 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006285 }
6286 }
6287 }
6288
6289 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
6290 TEST_REQUIRES_ARM_NEON;
6291 for (uint32_t n = 16; n <= 24; n += 8) {
6292 for (size_t k = 1; k <= 80; k += 17) {
6293 GemmMicrokernelTester()
6294 .mr(1)
6295 .nr(8)
6296 .kr(2)
6297 .sr(1)
6298 .m(1)
6299 .n(n)
6300 .k(k)
6301 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006302 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006303 }
6304 }
6305 }
6306
6307 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_a) {
6308 TEST_REQUIRES_ARM_NEON;
6309 for (uint32_t n = 16; n <= 24; n += 8) {
6310 for (size_t k = 1; k <= 80; k += 17) {
6311 GemmMicrokernelTester()
6312 .mr(1)
6313 .nr(8)
6314 .kr(2)
6315 .sr(1)
6316 .m(1)
6317 .n(n)
6318 .k(k)
6319 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006320 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006321 }
6322 }
6323 }
6324
6325 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
6326 TEST_REQUIRES_ARM_NEON;
6327 for (uint32_t n = 16; n <= 24; n += 8) {
6328 for (size_t k = 1; k <= 80; k += 17) {
6329 for (uint32_t m = 1; m <= 1; m++) {
6330 GemmMicrokernelTester()
6331 .mr(1)
6332 .nr(8)
6333 .kr(2)
6334 .sr(1)
6335 .m(m)
6336 .n(n)
6337 .k(k)
6338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006339 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006340 }
6341 }
6342 }
6343 }
6344
6345 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6346 TEST_REQUIRES_ARM_NEON;
6347 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006348 for (uint32_t n = 1; n <= 8; n++) {
6349 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006350 GemmMicrokernelTester()
6351 .mr(1)
6352 .nr(8)
6353 .kr(2)
6354 .sr(1)
6355 .m(m)
6356 .n(n)
6357 .k(k)
6358 .cm_stride(11)
6359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006360 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006361 }
6362 }
6363 }
6364 }
6365
6366 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, qmin) {
6367 TEST_REQUIRES_ARM_NEON;
6368 GemmMicrokernelTester()
6369 .mr(1)
6370 .nr(8)
6371 .kr(2)
6372 .sr(1)
6373 .m(1)
6374 .n(8)
6375 .k(16)
6376 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006378 }
6379
6380 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, qmax) {
6381 TEST_REQUIRES_ARM_NEON;
6382 GemmMicrokernelTester()
6383 .mr(1)
6384 .nr(8)
6385 .kr(2)
6386 .sr(1)
6387 .m(1)
6388 .n(8)
6389 .k(16)
6390 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006391 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006392 }
6393
6394 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cm) {
6395 TEST_REQUIRES_ARM_NEON;
6396 GemmMicrokernelTester()
6397 .mr(1)
6398 .nr(8)
6399 .kr(2)
6400 .sr(1)
6401 .m(1)
6402 .n(8)
6403 .k(16)
6404 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006406 }
6407#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6408
6409
6410#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6411 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16) {
6412 TEST_REQUIRES_ARM_NEON;
6413 GemmMicrokernelTester()
6414 .mr(3)
6415 .nr(16)
6416 .kr(2)
6417 .sr(1)
6418 .m(3)
6419 .n(16)
6420 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006422 }
6423
6424 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cn) {
6425 TEST_REQUIRES_ARM_NEON;
6426 GemmMicrokernelTester()
6427 .mr(3)
6428 .nr(16)
6429 .kr(2)
6430 .sr(1)
6431 .m(3)
6432 .n(16)
6433 .k(16)
6434 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006435 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006436 }
6437
6438 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_strided_a) {
6439 TEST_REQUIRES_ARM_NEON;
6440 GemmMicrokernelTester()
6441 .mr(3)
6442 .nr(16)
6443 .kr(2)
6444 .sr(1)
6445 .m(3)
6446 .n(16)
6447 .k(16)
6448 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006450 }
6451
6452 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
6453 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006454 for (uint32_t n = 1; n <= 16; n++) {
6455 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006456 GemmMicrokernelTester()
6457 .mr(3)
6458 .nr(16)
6459 .kr(2)
6460 .sr(1)
6461 .m(m)
6462 .n(n)
6463 .k(16)
6464 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006466 }
6467 }
6468 }
6469
6470 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
6471 TEST_REQUIRES_ARM_NEON;
6472 for (uint32_t m = 1; m <= 3; m++) {
6473 GemmMicrokernelTester()
6474 .mr(3)
6475 .nr(16)
6476 .kr(2)
6477 .sr(1)
6478 .m(m)
6479 .n(16)
6480 .k(16)
6481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006483 }
6484 }
6485
6486 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
6487 TEST_REQUIRES_ARM_NEON;
6488 for (uint32_t n = 1; n <= 16; n++) {
6489 GemmMicrokernelTester()
6490 .mr(3)
6491 .nr(16)
6492 .kr(2)
6493 .sr(1)
6494 .m(3)
6495 .n(n)
6496 .k(16)
6497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006499 }
6500 }
6501
6502 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16) {
6503 TEST_REQUIRES_ARM_NEON;
6504 for (size_t k = 1; k < 16; k++) {
6505 GemmMicrokernelTester()
6506 .mr(3)
6507 .nr(16)
6508 .kr(2)
6509 .sr(1)
6510 .m(3)
6511 .n(16)
6512 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006514 }
6515 }
6516
6517 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16_strided_a) {
6518 TEST_REQUIRES_ARM_NEON;
6519 for (size_t k = 1; k < 16; k++) {
6520 GemmMicrokernelTester()
6521 .mr(3)
6522 .nr(16)
6523 .kr(2)
6524 .sr(1)
6525 .m(3)
6526 .n(16)
6527 .k(k)
6528 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006530 }
6531 }
6532
6533 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
6534 TEST_REQUIRES_ARM_NEON;
6535 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006536 for (uint32_t n = 1; n <= 16; n++) {
6537 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006538 GemmMicrokernelTester()
6539 .mr(3)
6540 .nr(16)
6541 .kr(2)
6542 .sr(1)
6543 .m(m)
6544 .n(n)
6545 .k(k)
6546 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006548 }
6549 }
6550 }
6551 }
6552
6553 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16) {
6554 TEST_REQUIRES_ARM_NEON;
6555 for (size_t k = 17; k < 32; k++) {
6556 GemmMicrokernelTester()
6557 .mr(3)
6558 .nr(16)
6559 .kr(2)
6560 .sr(1)
6561 .m(3)
6562 .n(16)
6563 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006565 }
6566 }
6567
6568 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16_strided_a) {
6569 TEST_REQUIRES_ARM_NEON;
6570 for (size_t k = 17; k < 32; k++) {
6571 GemmMicrokernelTester()
6572 .mr(3)
6573 .nr(16)
6574 .kr(2)
6575 .sr(1)
6576 .m(3)
6577 .n(16)
6578 .k(k)
6579 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08006580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006581 }
6582 }
6583
6584 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
6585 TEST_REQUIRES_ARM_NEON;
6586 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006587 for (uint32_t n = 1; n <= 16; n++) {
6588 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006589 GemmMicrokernelTester()
6590 .mr(3)
6591 .nr(16)
6592 .kr(2)
6593 .sr(1)
6594 .m(m)
6595 .n(n)
6596 .k(k)
6597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006599 }
6600 }
6601 }
6602 }
6603
6604 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16) {
6605 TEST_REQUIRES_ARM_NEON;
6606 for (size_t k = 32; k <= 160; k += 16) {
6607 GemmMicrokernelTester()
6608 .mr(3)
6609 .nr(16)
6610 .kr(2)
6611 .sr(1)
6612 .m(3)
6613 .n(16)
6614 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006616 }
6617 }
6618
6619 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16_strided_a) {
6620 TEST_REQUIRES_ARM_NEON;
6621 for (size_t k = 32; k <= 160; k += 16) {
6622 GemmMicrokernelTester()
6623 .mr(3)
6624 .nr(16)
6625 .kr(2)
6626 .sr(1)
6627 .m(3)
6628 .n(16)
6629 .k(k)
6630 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006632 }
6633 }
6634
6635 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
6636 TEST_REQUIRES_ARM_NEON;
6637 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006638 for (uint32_t n = 1; n <= 16; n++) {
6639 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006640 GemmMicrokernelTester()
6641 .mr(3)
6642 .nr(16)
6643 .kr(2)
6644 .sr(1)
6645 .m(m)
6646 .n(n)
6647 .k(k)
6648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006650 }
6651 }
6652 }
6653 }
6654
6655 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16) {
6656 TEST_REQUIRES_ARM_NEON;
6657 for (uint32_t n = 17; n < 32; n++) {
6658 for (size_t k = 1; k <= 80; k += 17) {
6659 GemmMicrokernelTester()
6660 .mr(3)
6661 .nr(16)
6662 .kr(2)
6663 .sr(1)
6664 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006665 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006666 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006668 }
6669 }
6670 }
6671
6672 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
6673 TEST_REQUIRES_ARM_NEON;
6674 for (uint32_t n = 17; n < 32; n++) {
6675 for (size_t k = 1; k <= 80; k += 17) {
6676 GemmMicrokernelTester()
6677 .mr(3)
6678 .nr(16)
6679 .kr(2)
6680 .sr(1)
6681 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006682 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006683 .k(k)
6684 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006686 }
6687 }
6688 }
6689
6690 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_strided_a) {
6691 TEST_REQUIRES_ARM_NEON;
6692 for (uint32_t n = 17; n < 32; n++) {
6693 for (size_t k = 1; k <= 80; k += 17) {
6694 GemmMicrokernelTester()
6695 .mr(3)
6696 .nr(16)
6697 .kr(2)
6698 .sr(1)
6699 .m(3)
6700 .n(n)
6701 .k(k)
6702 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006704 }
6705 }
6706 }
6707
6708 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
6709 TEST_REQUIRES_ARM_NEON;
6710 for (uint32_t n = 17; n < 32; n++) {
6711 for (size_t k = 1; k <= 80; k += 17) {
6712 for (uint32_t m = 1; m <= 3; m++) {
6713 GemmMicrokernelTester()
6714 .mr(3)
6715 .nr(16)
6716 .kr(2)
6717 .sr(1)
6718 .m(m)
6719 .n(n)
6720 .k(k)
6721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006723 }
6724 }
6725 }
6726 }
6727
6728 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16) {
6729 TEST_REQUIRES_ARM_NEON;
6730 for (uint32_t n = 32; n <= 48; n += 16) {
6731 for (size_t k = 1; k <= 80; k += 17) {
6732 GemmMicrokernelTester()
6733 .mr(3)
6734 .nr(16)
6735 .kr(2)
6736 .sr(1)
6737 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006738 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006741 }
6742 }
6743 }
6744
6745 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
6746 TEST_REQUIRES_ARM_NEON;
6747 for (uint32_t n = 32; n <= 48; n += 16) {
6748 for (size_t k = 1; k <= 80; k += 17) {
6749 GemmMicrokernelTester()
6750 .mr(3)
6751 .nr(16)
6752 .kr(2)
6753 .sr(1)
6754 .m(3)
6755 .n(n)
6756 .k(k)
6757 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006758 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006759 }
6760 }
6761 }
6762
6763 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_strided_a) {
6764 TEST_REQUIRES_ARM_NEON;
6765 for (uint32_t n = 32; n <= 48; n += 16) {
6766 for (size_t k = 1; k <= 80; k += 17) {
6767 GemmMicrokernelTester()
6768 .mr(3)
6769 .nr(16)
6770 .kr(2)
6771 .sr(1)
6772 .m(3)
6773 .n(n)
6774 .k(k)
6775 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006776 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006777 }
6778 }
6779 }
6780
6781 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
6782 TEST_REQUIRES_ARM_NEON;
6783 for (uint32_t n = 32; n <= 48; n += 16) {
6784 for (size_t k = 1; k <= 80; k += 17) {
6785 for (uint32_t m = 1; m <= 3; m++) {
6786 GemmMicrokernelTester()
6787 .mr(3)
6788 .nr(16)
6789 .kr(2)
6790 .sr(1)
6791 .m(m)
6792 .n(n)
6793 .k(k)
6794 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006795 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006796 }
6797 }
6798 }
6799 }
6800
6801 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6802 TEST_REQUIRES_ARM_NEON;
6803 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006804 for (uint32_t n = 1; n <= 16; n++) {
6805 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006806 GemmMicrokernelTester()
6807 .mr(3)
6808 .nr(16)
6809 .kr(2)
6810 .sr(1)
6811 .m(m)
6812 .n(n)
6813 .k(k)
6814 .cm_stride(19)
6815 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006816 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006817 }
6818 }
6819 }
6820 }
6821
6822 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmin) {
6823 TEST_REQUIRES_ARM_NEON;
6824 GemmMicrokernelTester()
6825 .mr(3)
6826 .nr(16)
6827 .kr(2)
6828 .sr(1)
6829 .m(3)
6830 .n(16)
6831 .k(16)
6832 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006834 }
6835
6836 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmax) {
6837 TEST_REQUIRES_ARM_NEON;
6838 GemmMicrokernelTester()
6839 .mr(3)
6840 .nr(16)
6841 .kr(2)
6842 .sr(1)
6843 .m(3)
6844 .n(16)
6845 .k(16)
6846 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006847 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006848 }
6849
6850 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm) {
6851 TEST_REQUIRES_ARM_NEON;
6852 GemmMicrokernelTester()
6853 .mr(3)
6854 .nr(16)
6855 .kr(2)
6856 .sr(1)
6857 .m(3)
6858 .n(16)
6859 .k(16)
6860 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006862 }
6863#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6864
6865
6866#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6867 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08006868 TEST_REQUIRES_ARM_NEON;
6869 GemmMicrokernelTester()
6870 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006871 .nr(16)
6872 .kr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08006873 .sr(1)
6874 .m(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006875 .n(16)
6876 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006878 }
6879
6880 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cn) {
6881 TEST_REQUIRES_ARM_NEON;
6882 GemmMicrokernelTester()
6883 .mr(4)
6884 .nr(16)
6885 .kr(2)
6886 .sr(1)
6887 .m(4)
6888 .n(16)
6889 .k(16)
6890 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006891 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006892 }
6893
6894 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_strided_a) {
6895 TEST_REQUIRES_ARM_NEON;
6896 GemmMicrokernelTester()
6897 .mr(4)
6898 .nr(16)
6899 .kr(2)
6900 .sr(1)
6901 .m(4)
6902 .n(16)
6903 .k(16)
6904 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006906 }
6907
6908 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
6909 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006910 for (uint32_t n = 1; n <= 16; n++) {
6911 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006912 GemmMicrokernelTester()
6913 .mr(4)
6914 .nr(16)
6915 .kr(2)
6916 .sr(1)
6917 .m(m)
6918 .n(n)
6919 .k(16)
6920 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006922 }
6923 }
6924 }
6925
6926 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
6927 TEST_REQUIRES_ARM_NEON;
6928 for (uint32_t m = 1; m <= 4; m++) {
6929 GemmMicrokernelTester()
6930 .mr(4)
6931 .nr(16)
6932 .kr(2)
6933 .sr(1)
6934 .m(m)
6935 .n(16)
6936 .k(16)
6937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006939 }
6940 }
6941
6942 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
6943 TEST_REQUIRES_ARM_NEON;
6944 for (uint32_t n = 1; n <= 16; n++) {
6945 GemmMicrokernelTester()
6946 .mr(4)
6947 .nr(16)
6948 .kr(2)
6949 .sr(1)
6950 .m(4)
6951 .n(n)
6952 .k(16)
6953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006955 }
6956 }
6957
6958 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16) {
6959 TEST_REQUIRES_ARM_NEON;
6960 for (size_t k = 1; k < 16; k++) {
6961 GemmMicrokernelTester()
6962 .mr(4)
6963 .nr(16)
6964 .kr(2)
6965 .sr(1)
6966 .m(4)
6967 .n(16)
6968 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006970 }
6971 }
6972
6973 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16_strided_a) {
6974 TEST_REQUIRES_ARM_NEON;
6975 for (size_t k = 1; k < 16; k++) {
6976 GemmMicrokernelTester()
6977 .mr(4)
6978 .nr(16)
6979 .kr(2)
6980 .sr(1)
6981 .m(4)
6982 .n(16)
6983 .k(k)
6984 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006986 }
6987 }
6988
6989 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
6990 TEST_REQUIRES_ARM_NEON;
6991 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006992 for (uint32_t n = 1; n <= 16; n++) {
6993 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006994 GemmMicrokernelTester()
6995 .mr(4)
6996 .nr(16)
6997 .kr(2)
6998 .sr(1)
6999 .m(m)
7000 .n(n)
7001 .k(k)
7002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007004 }
7005 }
7006 }
7007 }
7008
7009 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16) {
7010 TEST_REQUIRES_ARM_NEON;
7011 for (size_t k = 17; k < 32; k++) {
7012 GemmMicrokernelTester()
7013 .mr(4)
7014 .nr(16)
7015 .kr(2)
7016 .sr(1)
7017 .m(4)
7018 .n(16)
7019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007021 }
7022 }
7023
7024 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16_strided_a) {
7025 TEST_REQUIRES_ARM_NEON;
7026 for (size_t k = 17; k < 32; k++) {
7027 GemmMicrokernelTester()
7028 .mr(4)
7029 .nr(16)
7030 .kr(2)
7031 .sr(1)
7032 .m(4)
7033 .n(16)
7034 .k(k)
7035 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08007036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007037 }
7038 }
7039
7040 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
7041 TEST_REQUIRES_ARM_NEON;
7042 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007043 for (uint32_t n = 1; n <= 16; n++) {
7044 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007045 GemmMicrokernelTester()
7046 .mr(4)
7047 .nr(16)
7048 .kr(2)
7049 .sr(1)
7050 .m(m)
7051 .n(n)
7052 .k(k)
7053 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007055 }
7056 }
7057 }
7058 }
7059
7060 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16) {
7061 TEST_REQUIRES_ARM_NEON;
7062 for (size_t k = 32; k <= 160; k += 16) {
7063 GemmMicrokernelTester()
7064 .mr(4)
7065 .nr(16)
7066 .kr(2)
7067 .sr(1)
7068 .m(4)
7069 .n(16)
7070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007072 }
7073 }
7074
7075 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16_strided_a) {
7076 TEST_REQUIRES_ARM_NEON;
7077 for (size_t k = 32; k <= 160; k += 16) {
7078 GemmMicrokernelTester()
7079 .mr(4)
7080 .nr(16)
7081 .kr(2)
7082 .sr(1)
7083 .m(4)
7084 .n(16)
7085 .k(k)
7086 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08007087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007088 }
7089 }
7090
7091 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
7092 TEST_REQUIRES_ARM_NEON;
7093 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007094 for (uint32_t n = 1; n <= 16; n++) {
7095 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007096 GemmMicrokernelTester()
7097 .mr(4)
7098 .nr(16)
7099 .kr(2)
7100 .sr(1)
7101 .m(m)
7102 .n(n)
7103 .k(k)
7104 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007106 }
7107 }
7108 }
7109 }
7110
7111 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16) {
7112 TEST_REQUIRES_ARM_NEON;
7113 for (uint32_t n = 17; n < 32; n++) {
7114 for (size_t k = 1; k <= 80; k += 17) {
7115 GemmMicrokernelTester()
7116 .mr(4)
7117 .nr(16)
7118 .kr(2)
7119 .sr(1)
7120 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007121 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007122 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007124 }
7125 }
7126 }
7127
7128 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
7129 TEST_REQUIRES_ARM_NEON;
7130 for (uint32_t n = 17; n < 32; n++) {
7131 for (size_t k = 1; k <= 80; k += 17) {
7132 GemmMicrokernelTester()
7133 .mr(4)
7134 .nr(16)
7135 .kr(2)
7136 .sr(1)
7137 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007138 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007139 .k(k)
7140 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007142 }
7143 }
7144 }
7145
7146 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_strided_a) {
7147 TEST_REQUIRES_ARM_NEON;
7148 for (uint32_t n = 17; n < 32; n++) {
7149 for (size_t k = 1; k <= 80; k += 17) {
7150 GemmMicrokernelTester()
7151 .mr(4)
7152 .nr(16)
7153 .kr(2)
7154 .sr(1)
7155 .m(4)
7156 .n(n)
7157 .k(k)
7158 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007160 }
7161 }
7162 }
7163
7164 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
7165 TEST_REQUIRES_ARM_NEON;
7166 for (uint32_t n = 17; n < 32; n++) {
7167 for (size_t k = 1; k <= 80; k += 17) {
7168 for (uint32_t m = 1; m <= 4; m++) {
7169 GemmMicrokernelTester()
7170 .mr(4)
7171 .nr(16)
7172 .kr(2)
7173 .sr(1)
7174 .m(m)
7175 .n(n)
7176 .k(k)
7177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007179 }
7180 }
7181 }
7182 }
7183
7184 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16) {
7185 TEST_REQUIRES_ARM_NEON;
7186 for (uint32_t n = 32; n <= 48; n += 16) {
7187 for (size_t k = 1; k <= 80; k += 17) {
7188 GemmMicrokernelTester()
7189 .mr(4)
7190 .nr(16)
7191 .kr(2)
7192 .sr(1)
7193 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007194 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007197 }
7198 }
7199 }
7200
7201 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
7202 TEST_REQUIRES_ARM_NEON;
7203 for (uint32_t n = 32; n <= 48; n += 16) {
7204 for (size_t k = 1; k <= 80; k += 17) {
7205 GemmMicrokernelTester()
7206 .mr(4)
7207 .nr(16)
7208 .kr(2)
7209 .sr(1)
7210 .m(4)
7211 .n(n)
7212 .k(k)
7213 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007215 }
7216 }
7217 }
7218
7219 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_strided_a) {
7220 TEST_REQUIRES_ARM_NEON;
7221 for (uint32_t n = 32; n <= 48; n += 16) {
7222 for (size_t k = 1; k <= 80; k += 17) {
7223 GemmMicrokernelTester()
7224 .mr(4)
7225 .nr(16)
7226 .kr(2)
7227 .sr(1)
7228 .m(4)
7229 .n(n)
7230 .k(k)
7231 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007233 }
7234 }
7235 }
7236
7237 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
7238 TEST_REQUIRES_ARM_NEON;
7239 for (uint32_t n = 32; n <= 48; n += 16) {
7240 for (size_t k = 1; k <= 80; k += 17) {
7241 for (uint32_t m = 1; m <= 4; m++) {
7242 GemmMicrokernelTester()
7243 .mr(4)
7244 .nr(16)
7245 .kr(2)
7246 .sr(1)
7247 .m(m)
7248 .n(n)
7249 .k(k)
7250 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007251 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007252 }
7253 }
7254 }
7255 }
7256
7257 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
7258 TEST_REQUIRES_ARM_NEON;
7259 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007260 for (uint32_t n = 1; n <= 16; n++) {
7261 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007262 GemmMicrokernelTester()
7263 .mr(4)
7264 .nr(16)
7265 .kr(2)
7266 .sr(1)
7267 .m(m)
7268 .n(n)
7269 .k(k)
7270 .cm_stride(19)
7271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007272 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007273 }
7274 }
7275 }
7276 }
7277
7278 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, qmin) {
7279 TEST_REQUIRES_ARM_NEON;
7280 GemmMicrokernelTester()
7281 .mr(4)
7282 .nr(16)
7283 .kr(2)
7284 .sr(1)
7285 .m(4)
7286 .n(16)
7287 .k(16)
7288 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007290 }
7291
7292 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, qmax) {
7293 TEST_REQUIRES_ARM_NEON;
7294 GemmMicrokernelTester()
7295 .mr(4)
7296 .nr(16)
7297 .kr(2)
7298 .sr(1)
7299 .m(4)
7300 .n(16)
7301 .k(16)
7302 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007303 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007304 }
7305
7306 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cm) {
7307 TEST_REQUIRES_ARM_NEON;
7308 GemmMicrokernelTester()
7309 .mr(4)
7310 .nr(16)
7311 .kr(2)
7312 .sr(1)
7313 .m(4)
7314 .n(16)
7315 .k(16)
7316 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007318 }
7319#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7320
7321
7322#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7323 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8) {
7324 TEST_REQUIRES_ARM_NEON;
7325 GemmMicrokernelTester()
7326 .mr(3)
7327 .nr(8)
7328 .kr(4)
7329 .sr(2)
7330 .m(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08007331 .n(8)
7332 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08007334 }
7335
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007336 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cn) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08007337 TEST_REQUIRES_ARM_NEON;
7338 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007339 .mr(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08007340 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007341 .kr(4)
7342 .sr(2)
7343 .m(3)
Frank Barcharde4d3f762021-12-23 15:31:43 -08007344 .n(8)
7345 .k(8)
7346 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007347 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08007348 }
7349
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007350 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_strided_a) {
7351 TEST_REQUIRES_ARM_NEON;
7352 GemmMicrokernelTester()
7353 .mr(3)
7354 .nr(8)
7355 .kr(4)
7356 .sr(2)
7357 .m(3)
7358 .n(8)
7359 .k(8)
7360 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007362 }
7363
7364 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile) {
7365 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007366 for (uint32_t n = 1; n <= 8; n++) {
7367 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007368 GemmMicrokernelTester()
7369 .mr(3)
7370 .nr(8)
7371 .kr(4)
7372 .sr(2)
7373 .m(m)
7374 .n(n)
7375 .k(8)
7376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007378 }
7379 }
7380 }
7381
7382 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
7383 TEST_REQUIRES_ARM_NEON;
7384 for (uint32_t m = 1; m <= 3; m++) {
7385 GemmMicrokernelTester()
7386 .mr(3)
7387 .nr(8)
7388 .kr(4)
7389 .sr(2)
7390 .m(m)
7391 .n(8)
7392 .k(8)
7393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007395 }
7396 }
7397
7398 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
7399 TEST_REQUIRES_ARM_NEON;
7400 for (uint32_t n = 1; n <= 8; n++) {
7401 GemmMicrokernelTester()
7402 .mr(3)
7403 .nr(8)
7404 .kr(4)
7405 .sr(2)
7406 .m(3)
7407 .n(n)
7408 .k(8)
7409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007411 }
7412 }
7413
7414 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8) {
7415 TEST_REQUIRES_ARM_NEON;
7416 for (size_t k = 1; k < 8; k++) {
7417 GemmMicrokernelTester()
7418 .mr(3)
7419 .nr(8)
7420 .kr(4)
7421 .sr(2)
7422 .m(3)
7423 .n(8)
7424 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007426 }
7427 }
7428
7429 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8_strided_a) {
7430 TEST_REQUIRES_ARM_NEON;
7431 for (size_t k = 1; k < 8; k++) {
7432 GemmMicrokernelTester()
7433 .mr(3)
7434 .nr(8)
7435 .kr(4)
7436 .sr(2)
7437 .m(3)
7438 .n(8)
7439 .k(k)
7440 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007442 }
7443 }
7444
7445 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8_subtile) {
7446 TEST_REQUIRES_ARM_NEON;
7447 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007448 for (uint32_t n = 1; n <= 8; n++) {
7449 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007450 GemmMicrokernelTester()
7451 .mr(3)
7452 .nr(8)
7453 .kr(4)
7454 .sr(2)
7455 .m(m)
7456 .n(n)
7457 .k(k)
7458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007460 }
7461 }
7462 }
7463 }
7464
7465 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8) {
7466 TEST_REQUIRES_ARM_NEON;
7467 for (size_t k = 9; k < 16; k++) {
7468 GemmMicrokernelTester()
7469 .mr(3)
7470 .nr(8)
7471 .kr(4)
7472 .sr(2)
7473 .m(3)
7474 .n(8)
7475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007477 }
7478 }
7479
7480 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8_strided_a) {
7481 TEST_REQUIRES_ARM_NEON;
7482 for (size_t k = 9; k < 16; k++) {
7483 GemmMicrokernelTester()
7484 .mr(3)
7485 .nr(8)
7486 .kr(4)
7487 .sr(2)
7488 .m(3)
7489 .n(8)
7490 .k(k)
7491 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007493 }
7494 }
7495
7496 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8_subtile) {
7497 TEST_REQUIRES_ARM_NEON;
7498 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007499 for (uint32_t n = 1; n <= 8; n++) {
7500 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007501 GemmMicrokernelTester()
7502 .mr(3)
7503 .nr(8)
7504 .kr(4)
7505 .sr(2)
7506 .m(m)
7507 .n(n)
7508 .k(k)
7509 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007511 }
7512 }
7513 }
7514 }
7515
7516 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8) {
7517 TEST_REQUIRES_ARM_NEON;
7518 for (size_t k = 16; k <= 80; k += 8) {
7519 GemmMicrokernelTester()
7520 .mr(3)
7521 .nr(8)
7522 .kr(4)
7523 .sr(2)
7524 .m(3)
7525 .n(8)
7526 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007528 }
7529 }
7530
7531 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8_strided_a) {
7532 TEST_REQUIRES_ARM_NEON;
7533 for (size_t k = 16; k <= 80; k += 8) {
7534 GemmMicrokernelTester()
7535 .mr(3)
7536 .nr(8)
7537 .kr(4)
7538 .sr(2)
7539 .m(3)
7540 .n(8)
7541 .k(k)
7542 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007544 }
7545 }
7546
7547 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8_subtile) {
7548 TEST_REQUIRES_ARM_NEON;
7549 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007550 for (uint32_t n = 1; n <= 8; n++) {
7551 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007552 GemmMicrokernelTester()
7553 .mr(3)
7554 .nr(8)
7555 .kr(4)
7556 .sr(2)
7557 .m(m)
7558 .n(n)
7559 .k(k)
7560 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007562 }
7563 }
7564 }
7565 }
7566
7567 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8) {
7568 TEST_REQUIRES_ARM_NEON;
7569 for (uint32_t n = 9; n < 16; n++) {
7570 for (size_t k = 1; k <= 40; k += 9) {
7571 GemmMicrokernelTester()
7572 .mr(3)
7573 .nr(8)
7574 .kr(4)
7575 .sr(2)
7576 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007577 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007580 }
7581 }
7582 }
7583
7584 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
7585 TEST_REQUIRES_ARM_NEON;
7586 for (uint32_t n = 9; n < 16; n++) {
7587 for (size_t k = 1; k <= 40; k += 9) {
7588 GemmMicrokernelTester()
7589 .mr(3)
7590 .nr(8)
7591 .kr(4)
7592 .sr(2)
7593 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007594 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007595 .k(k)
7596 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007598 }
7599 }
7600 }
7601
7602 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_strided_a) {
7603 TEST_REQUIRES_ARM_NEON;
7604 for (uint32_t n = 9; n < 16; n++) {
7605 for (size_t k = 1; k <= 40; k += 9) {
7606 GemmMicrokernelTester()
7607 .mr(3)
7608 .nr(8)
7609 .kr(4)
7610 .sr(2)
7611 .m(3)
7612 .n(n)
7613 .k(k)
7614 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007616 }
7617 }
7618 }
7619
7620 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_subtile) {
7621 TEST_REQUIRES_ARM_NEON;
7622 for (uint32_t n = 9; n < 16; n++) {
7623 for (size_t k = 1; k <= 40; k += 9) {
7624 for (uint32_t m = 1; m <= 3; m++) {
7625 GemmMicrokernelTester()
7626 .mr(3)
7627 .nr(8)
7628 .kr(4)
7629 .sr(2)
7630 .m(m)
7631 .n(n)
7632 .k(k)
7633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007635 }
7636 }
7637 }
7638 }
7639
7640 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8) {
7641 TEST_REQUIRES_ARM_NEON;
7642 for (uint32_t n = 16; n <= 24; n += 8) {
7643 for (size_t k = 1; k <= 40; k += 9) {
7644 GemmMicrokernelTester()
7645 .mr(3)
7646 .nr(8)
7647 .kr(4)
7648 .sr(2)
7649 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007650 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007653 }
7654 }
7655 }
7656
7657 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_strided_cn) {
7658 TEST_REQUIRES_ARM_NEON;
7659 for (uint32_t n = 16; n <= 24; n += 8) {
7660 for (size_t k = 1; k <= 40; k += 9) {
7661 GemmMicrokernelTester()
7662 .mr(3)
7663 .nr(8)
7664 .kr(4)
7665 .sr(2)
7666 .m(3)
7667 .n(n)
7668 .k(k)
7669 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007671 }
7672 }
7673 }
7674
7675 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_strided_a) {
7676 TEST_REQUIRES_ARM_NEON;
7677 for (uint32_t n = 16; n <= 24; n += 8) {
7678 for (size_t k = 1; k <= 40; k += 9) {
7679 GemmMicrokernelTester()
7680 .mr(3)
7681 .nr(8)
7682 .kr(4)
7683 .sr(2)
7684 .m(3)
7685 .n(n)
7686 .k(k)
7687 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007689 }
7690 }
7691 }
7692
7693 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_subtile) {
7694 TEST_REQUIRES_ARM_NEON;
7695 for (uint32_t n = 16; n <= 24; n += 8) {
7696 for (size_t k = 1; k <= 40; k += 9) {
7697 for (uint32_t m = 1; m <= 3; m++) {
7698 GemmMicrokernelTester()
7699 .mr(3)
7700 .nr(8)
7701 .kr(4)
7702 .sr(2)
7703 .m(m)
7704 .n(n)
7705 .k(k)
7706 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007707 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007708 }
7709 }
7710 }
7711 }
7712
7713 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cm_subtile) {
7714 TEST_REQUIRES_ARM_NEON;
7715 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007716 for (uint32_t n = 1; n <= 8; n++) {
7717 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007718 GemmMicrokernelTester()
7719 .mr(3)
7720 .nr(8)
7721 .kr(4)
7722 .sr(2)
7723 .m(m)
7724 .n(n)
7725 .k(k)
7726 .cm_stride(11)
7727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007728 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007729 }
7730 }
7731 }
7732 }
7733
7734 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, qmin) {
7735 TEST_REQUIRES_ARM_NEON;
7736 GemmMicrokernelTester()
7737 .mr(3)
7738 .nr(8)
7739 .kr(4)
7740 .sr(2)
7741 .m(3)
7742 .n(8)
7743 .k(8)
7744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007746 }
7747
7748 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, qmax) {
7749 TEST_REQUIRES_ARM_NEON;
7750 GemmMicrokernelTester()
7751 .mr(3)
7752 .nr(8)
7753 .kr(4)
7754 .sr(2)
7755 .m(3)
7756 .n(8)
7757 .k(8)
7758 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007759 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007760 }
7761
7762 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cm) {
7763 TEST_REQUIRES_ARM_NEON;
7764 GemmMicrokernelTester()
7765 .mr(3)
7766 .nr(8)
7767 .kr(4)
7768 .sr(2)
7769 .m(3)
7770 .n(8)
7771 .k(8)
7772 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007774 }
7775#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7776
7777
7778#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7779 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8) {
7780 TEST_REQUIRES_ARM_NEON;
7781 GemmMicrokernelTester()
7782 .mr(2)
7783 .nr(16)
7784 .kr(4)
7785 .sr(2)
7786 .m(2)
7787 .n(16)
7788 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007790 }
7791
7792 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cn) {
7793 TEST_REQUIRES_ARM_NEON;
7794 GemmMicrokernelTester()
7795 .mr(2)
7796 .nr(16)
7797 .kr(4)
7798 .sr(2)
7799 .m(2)
7800 .n(16)
7801 .k(8)
7802 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007803 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007804 }
7805
7806 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_strided_a) {
7807 TEST_REQUIRES_ARM_NEON;
7808 GemmMicrokernelTester()
7809 .mr(2)
7810 .nr(16)
7811 .kr(4)
7812 .sr(2)
7813 .m(2)
7814 .n(16)
7815 .k(8)
7816 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007818 }
7819
7820 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile) {
7821 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007822 for (uint32_t n = 1; n <= 16; n++) {
7823 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007824 GemmMicrokernelTester()
7825 .mr(2)
7826 .nr(16)
7827 .kr(4)
7828 .sr(2)
7829 .m(m)
7830 .n(n)
7831 .k(8)
7832 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007834 }
7835 }
7836 }
7837
7838 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
7839 TEST_REQUIRES_ARM_NEON;
7840 for (uint32_t m = 1; m <= 2; m++) {
7841 GemmMicrokernelTester()
7842 .mr(2)
7843 .nr(16)
7844 .kr(4)
7845 .sr(2)
7846 .m(m)
7847 .n(16)
7848 .k(8)
7849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007851 }
7852 }
7853
7854 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
7855 TEST_REQUIRES_ARM_NEON;
7856 for (uint32_t n = 1; n <= 16; n++) {
7857 GemmMicrokernelTester()
7858 .mr(2)
7859 .nr(16)
7860 .kr(4)
7861 .sr(2)
7862 .m(2)
7863 .n(n)
7864 .k(8)
7865 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007867 }
7868 }
7869
7870 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8) {
7871 TEST_REQUIRES_ARM_NEON;
7872 for (size_t k = 1; k < 8; k++) {
7873 GemmMicrokernelTester()
7874 .mr(2)
7875 .nr(16)
7876 .kr(4)
7877 .sr(2)
7878 .m(2)
7879 .n(16)
7880 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007882 }
7883 }
7884
7885 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8_strided_a) {
7886 TEST_REQUIRES_ARM_NEON;
7887 for (size_t k = 1; k < 8; k++) {
7888 GemmMicrokernelTester()
7889 .mr(2)
7890 .nr(16)
7891 .kr(4)
7892 .sr(2)
7893 .m(2)
7894 .n(16)
7895 .k(k)
7896 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007898 }
7899 }
7900
7901 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8_subtile) {
7902 TEST_REQUIRES_ARM_NEON;
7903 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007904 for (uint32_t n = 1; n <= 16; n++) {
7905 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007906 GemmMicrokernelTester()
7907 .mr(2)
7908 .nr(16)
7909 .kr(4)
7910 .sr(2)
7911 .m(m)
7912 .n(n)
7913 .k(k)
7914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007916 }
7917 }
7918 }
7919 }
7920
7921 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8) {
7922 TEST_REQUIRES_ARM_NEON;
7923 for (size_t k = 9; k < 16; k++) {
7924 GemmMicrokernelTester()
7925 .mr(2)
7926 .nr(16)
7927 .kr(4)
7928 .sr(2)
7929 .m(2)
7930 .n(16)
7931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007933 }
7934 }
7935
7936 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8_strided_a) {
7937 TEST_REQUIRES_ARM_NEON;
7938 for (size_t k = 9; k < 16; k++) {
7939 GemmMicrokernelTester()
7940 .mr(2)
7941 .nr(16)
7942 .kr(4)
7943 .sr(2)
7944 .m(2)
7945 .n(16)
7946 .k(k)
7947 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007949 }
7950 }
7951
7952 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8_subtile) {
7953 TEST_REQUIRES_ARM_NEON;
7954 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007955 for (uint32_t n = 1; n <= 16; n++) {
7956 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007957 GemmMicrokernelTester()
7958 .mr(2)
7959 .nr(16)
7960 .kr(4)
7961 .sr(2)
7962 .m(m)
7963 .n(n)
7964 .k(k)
7965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007967 }
7968 }
7969 }
7970 }
7971
7972 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8) {
7973 TEST_REQUIRES_ARM_NEON;
7974 for (size_t k = 16; k <= 80; k += 8) {
7975 GemmMicrokernelTester()
7976 .mr(2)
7977 .nr(16)
7978 .kr(4)
7979 .sr(2)
7980 .m(2)
7981 .n(16)
7982 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007984 }
7985 }
7986
7987 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8_strided_a) {
7988 TEST_REQUIRES_ARM_NEON;
7989 for (size_t k = 16; k <= 80; k += 8) {
7990 GemmMicrokernelTester()
7991 .mr(2)
7992 .nr(16)
7993 .kr(4)
7994 .sr(2)
7995 .m(2)
7996 .n(16)
7997 .k(k)
7998 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008000 }
8001 }
8002
8003 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8_subtile) {
8004 TEST_REQUIRES_ARM_NEON;
8005 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008006 for (uint32_t n = 1; n <= 16; n++) {
8007 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008008 GemmMicrokernelTester()
8009 .mr(2)
8010 .nr(16)
8011 .kr(4)
8012 .sr(2)
8013 .m(m)
8014 .n(n)
8015 .k(k)
8016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008018 }
8019 }
8020 }
8021 }
8022
8023 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16) {
8024 TEST_REQUIRES_ARM_NEON;
8025 for (uint32_t n = 17; n < 32; n++) {
8026 for (size_t k = 1; k <= 40; k += 9) {
8027 GemmMicrokernelTester()
8028 .mr(2)
8029 .nr(16)
8030 .kr(4)
8031 .sr(2)
8032 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008033 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008034 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008036 }
8037 }
8038 }
8039
8040 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
8041 TEST_REQUIRES_ARM_NEON;
8042 for (uint32_t n = 17; n < 32; n++) {
8043 for (size_t k = 1; k <= 40; k += 9) {
8044 GemmMicrokernelTester()
8045 .mr(2)
8046 .nr(16)
8047 .kr(4)
8048 .sr(2)
8049 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008050 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008051 .k(k)
8052 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008054 }
8055 }
8056 }
8057
8058 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_strided_a) {
8059 TEST_REQUIRES_ARM_NEON;
8060 for (uint32_t n = 17; n < 32; n++) {
8061 for (size_t k = 1; k <= 40; k += 9) {
8062 GemmMicrokernelTester()
8063 .mr(2)
8064 .nr(16)
8065 .kr(4)
8066 .sr(2)
8067 .m(2)
8068 .n(n)
8069 .k(k)
8070 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008072 }
8073 }
8074 }
8075
8076 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_subtile) {
8077 TEST_REQUIRES_ARM_NEON;
8078 for (uint32_t n = 17; n < 32; n++) {
8079 for (size_t k = 1; k <= 40; k += 9) {
8080 for (uint32_t m = 1; m <= 2; m++) {
8081 GemmMicrokernelTester()
8082 .mr(2)
8083 .nr(16)
8084 .kr(4)
8085 .sr(2)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008091 }
8092 }
8093 }
8094 }
8095
8096 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16) {
8097 TEST_REQUIRES_ARM_NEON;
8098 for (uint32_t n = 32; n <= 48; n += 16) {
8099 for (size_t k = 1; k <= 40; k += 9) {
8100 GemmMicrokernelTester()
8101 .mr(2)
8102 .nr(16)
8103 .kr(4)
8104 .sr(2)
8105 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008106 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008109 }
8110 }
8111 }
8112
8113 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_strided_cn) {
8114 TEST_REQUIRES_ARM_NEON;
8115 for (uint32_t n = 32; n <= 48; n += 16) {
8116 for (size_t k = 1; k <= 40; k += 9) {
8117 GemmMicrokernelTester()
8118 .mr(2)
8119 .nr(16)
8120 .kr(4)
8121 .sr(2)
8122 .m(2)
8123 .n(n)
8124 .k(k)
8125 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008127 }
8128 }
8129 }
8130
8131 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_strided_a) {
8132 TEST_REQUIRES_ARM_NEON;
8133 for (uint32_t n = 32; n <= 48; n += 16) {
8134 for (size_t k = 1; k <= 40; k += 9) {
8135 GemmMicrokernelTester()
8136 .mr(2)
8137 .nr(16)
8138 .kr(4)
8139 .sr(2)
8140 .m(2)
8141 .n(n)
8142 .k(k)
8143 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008145 }
8146 }
8147 }
8148
8149 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_subtile) {
8150 TEST_REQUIRES_ARM_NEON;
8151 for (uint32_t n = 32; n <= 48; n += 16) {
8152 for (size_t k = 1; k <= 40; k += 9) {
8153 for (uint32_t m = 1; m <= 2; m++) {
8154 GemmMicrokernelTester()
8155 .mr(2)
8156 .nr(16)
8157 .kr(4)
8158 .sr(2)
8159 .m(m)
8160 .n(n)
8161 .k(k)
8162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008164 }
8165 }
8166 }
8167 }
8168
8169 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cm_subtile) {
8170 TEST_REQUIRES_ARM_NEON;
8171 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008172 for (uint32_t n = 1; n <= 16; n++) {
8173 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008174 GemmMicrokernelTester()
8175 .mr(2)
8176 .nr(16)
8177 .kr(4)
8178 .sr(2)
8179 .m(m)
8180 .n(n)
8181 .k(k)
8182 .cm_stride(19)
8183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008184 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008185 }
8186 }
8187 }
8188 }
8189
8190 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, qmin) {
8191 TEST_REQUIRES_ARM_NEON;
8192 GemmMicrokernelTester()
8193 .mr(2)
8194 .nr(16)
8195 .kr(4)
8196 .sr(2)
8197 .m(2)
8198 .n(16)
8199 .k(8)
8200 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008202 }
8203
8204 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, qmax) {
8205 TEST_REQUIRES_ARM_NEON;
8206 GemmMicrokernelTester()
8207 .mr(2)
8208 .nr(16)
8209 .kr(4)
8210 .sr(2)
8211 .m(2)
8212 .n(16)
8213 .k(8)
8214 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008215 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008216 }
8217
8218 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cm) {
8219 TEST_REQUIRES_ARM_NEON;
8220 GemmMicrokernelTester()
8221 .mr(2)
8222 .nr(16)
8223 .kr(4)
8224 .sr(2)
8225 .m(2)
8226 .n(16)
8227 .k(8)
8228 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008230 }
8231#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8232
8233
8234#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8235 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16) {
8236 TEST_REQUIRES_ARM_NEON;
8237 GemmMicrokernelTester()
8238 .mr(2)
8239 .nr(8)
8240 .kr(4)
8241 .sr(2)
8242 .m(2)
8243 .n(8)
8244 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008246 }
8247
8248 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, strided_cn) {
8249 TEST_REQUIRES_ARM_NEON;
8250 GemmMicrokernelTester()
8251 .mr(2)
8252 .nr(8)
8253 .kr(4)
8254 .sr(2)
8255 .m(2)
8256 .n(8)
8257 .k(16)
8258 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008260 }
8261
8262 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_strided_a) {
8263 TEST_REQUIRES_ARM_NEON;
8264 GemmMicrokernelTester()
8265 .mr(2)
8266 .nr(8)
8267 .kr(4)
8268 .sr(2)
8269 .m(2)
8270 .n(8)
8271 .k(16)
8272 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008274 }
8275
8276 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_subtile) {
8277 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008278 for (uint32_t n = 1; n <= 8; n++) {
8279 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008280 GemmMicrokernelTester()
8281 .mr(2)
8282 .nr(8)
8283 .kr(4)
8284 .sr(2)
8285 .m(m)
8286 .n(n)
8287 .k(16)
8288 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008290 }
8291 }
8292 }
8293
8294 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
8295 TEST_REQUIRES_ARM_NEON;
8296 for (uint32_t m = 1; m <= 2; m++) {
8297 GemmMicrokernelTester()
8298 .mr(2)
8299 .nr(8)
8300 .kr(4)
8301 .sr(2)
8302 .m(m)
8303 .n(8)
8304 .k(16)
8305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008307 }
8308 }
8309
8310 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
8311 TEST_REQUIRES_ARM_NEON;
8312 for (uint32_t n = 1; n <= 8; n++) {
8313 GemmMicrokernelTester()
8314 .mr(2)
8315 .nr(8)
8316 .kr(4)
8317 .sr(2)
8318 .m(2)
8319 .n(n)
8320 .k(16)
8321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008323 }
8324 }
8325
8326 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_lt_16) {
8327 TEST_REQUIRES_ARM_NEON;
8328 for (size_t k = 1; k < 16; k++) {
8329 GemmMicrokernelTester()
8330 .mr(2)
8331 .nr(8)
8332 .kr(4)
8333 .sr(2)
8334 .m(2)
8335 .n(8)
8336 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008338 }
8339 }
8340
8341 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_lt_16_strided_a) {
8342 TEST_REQUIRES_ARM_NEON;
8343 for (size_t k = 1; k < 16; k++) {
8344 GemmMicrokernelTester()
8345 .mr(2)
8346 .nr(8)
8347 .kr(4)
8348 .sr(2)
8349 .m(2)
8350 .n(8)
8351 .k(k)
8352 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008354 }
8355 }
8356
8357 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_lt_16_subtile) {
8358 TEST_REQUIRES_ARM_NEON;
8359 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008360 for (uint32_t n = 1; n <= 8; n++) {
8361 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008362 GemmMicrokernelTester()
8363 .mr(2)
8364 .nr(8)
8365 .kr(4)
8366 .sr(2)
8367 .m(m)
8368 .n(n)
8369 .k(k)
8370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008372 }
8373 }
8374 }
8375 }
8376
8377 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_gt_16) {
8378 TEST_REQUIRES_ARM_NEON;
8379 for (size_t k = 17; k < 32; k++) {
8380 GemmMicrokernelTester()
8381 .mr(2)
8382 .nr(8)
8383 .kr(4)
8384 .sr(2)
8385 .m(2)
8386 .n(8)
8387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008389 }
8390 }
8391
8392 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_gt_16_strided_a) {
8393 TEST_REQUIRES_ARM_NEON;
8394 for (size_t k = 17; k < 32; k++) {
8395 GemmMicrokernelTester()
8396 .mr(2)
8397 .nr(8)
8398 .kr(4)
8399 .sr(2)
8400 .m(2)
8401 .n(8)
8402 .k(k)
8403 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008405 }
8406 }
8407
8408 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_gt_16_subtile) {
8409 TEST_REQUIRES_ARM_NEON;
8410 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008411 for (uint32_t n = 1; n <= 8; n++) {
8412 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008413 GemmMicrokernelTester()
8414 .mr(2)
8415 .nr(8)
8416 .kr(4)
8417 .sr(2)
8418 .m(m)
8419 .n(n)
8420 .k(k)
8421 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008423 }
8424 }
8425 }
8426 }
8427
8428 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_div_16) {
8429 TEST_REQUIRES_ARM_NEON;
8430 for (size_t k = 32; k <= 160; k += 16) {
8431 GemmMicrokernelTester()
8432 .mr(2)
8433 .nr(8)
8434 .kr(4)
8435 .sr(2)
8436 .m(2)
8437 .n(8)
8438 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008440 }
8441 }
8442
8443 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_div_16_strided_a) {
8444 TEST_REQUIRES_ARM_NEON;
8445 for (size_t k = 32; k <= 160; k += 16) {
8446 GemmMicrokernelTester()
8447 .mr(2)
8448 .nr(8)
8449 .kr(4)
8450 .sr(2)
8451 .m(2)
8452 .n(8)
8453 .k(k)
8454 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008456 }
8457 }
8458
8459 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_div_16_subtile) {
8460 TEST_REQUIRES_ARM_NEON;
8461 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008462 for (uint32_t n = 1; n <= 8; n++) {
8463 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008464 GemmMicrokernelTester()
8465 .mr(2)
8466 .nr(8)
8467 .kr(4)
8468 .sr(2)
8469 .m(m)
8470 .n(n)
8471 .k(k)
8472 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008474 }
8475 }
8476 }
8477 }
8478
8479 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8) {
8480 TEST_REQUIRES_ARM_NEON;
8481 for (uint32_t n = 9; n < 16; n++) {
8482 for (size_t k = 1; k <= 80; k += 17) {
8483 GemmMicrokernelTester()
8484 .mr(2)
8485 .nr(8)
8486 .kr(4)
8487 .sr(2)
8488 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008489 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008492 }
8493 }
8494 }
8495
8496 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
8497 TEST_REQUIRES_ARM_NEON;
8498 for (uint32_t n = 9; n < 16; n++) {
8499 for (size_t k = 1; k <= 80; k += 17) {
8500 GemmMicrokernelTester()
8501 .mr(2)
8502 .nr(8)
8503 .kr(4)
8504 .sr(2)
8505 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008506 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008507 .k(k)
8508 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008510 }
8511 }
8512 }
8513
8514 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8_strided_a) {
8515 TEST_REQUIRES_ARM_NEON;
8516 for (uint32_t n = 9; n < 16; n++) {
8517 for (size_t k = 1; k <= 80; k += 17) {
8518 GemmMicrokernelTester()
8519 .mr(2)
8520 .nr(8)
8521 .kr(4)
8522 .sr(2)
8523 .m(2)
8524 .n(n)
8525 .k(k)
8526 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008528 }
8529 }
8530 }
8531
8532 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8_subtile) {
8533 TEST_REQUIRES_ARM_NEON;
8534 for (uint32_t n = 9; n < 16; n++) {
8535 for (size_t k = 1; k <= 80; k += 17) {
8536 for (uint32_t m = 1; m <= 2; m++) {
8537 GemmMicrokernelTester()
8538 .mr(2)
8539 .nr(8)
8540 .kr(4)
8541 .sr(2)
8542 .m(m)
8543 .n(n)
8544 .k(k)
8545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008547 }
8548 }
8549 }
8550 }
8551
8552 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8) {
8553 TEST_REQUIRES_ARM_NEON;
8554 for (uint32_t n = 16; n <= 24; n += 8) {
8555 for (size_t k = 1; k <= 80; k += 17) {
8556 GemmMicrokernelTester()
8557 .mr(2)
8558 .nr(8)
8559 .kr(4)
8560 .sr(2)
8561 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008562 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008563 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008565 }
8566 }
8567 }
8568
8569 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
8570 TEST_REQUIRES_ARM_NEON;
8571 for (uint32_t n = 16; n <= 24; n += 8) {
8572 for (size_t k = 1; k <= 80; k += 17) {
8573 GemmMicrokernelTester()
8574 .mr(2)
8575 .nr(8)
8576 .kr(4)
8577 .sr(2)
8578 .m(2)
8579 .n(n)
8580 .k(k)
8581 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008583 }
8584 }
8585 }
8586
8587 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8_strided_a) {
8588 TEST_REQUIRES_ARM_NEON;
8589 for (uint32_t n = 16; n <= 24; n += 8) {
8590 for (size_t k = 1; k <= 80; k += 17) {
8591 GemmMicrokernelTester()
8592 .mr(2)
8593 .nr(8)
8594 .kr(4)
8595 .sr(2)
8596 .m(2)
8597 .n(n)
8598 .k(k)
8599 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008601 }
8602 }
8603 }
8604
8605 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8_subtile) {
8606 TEST_REQUIRES_ARM_NEON;
8607 for (uint32_t n = 16; n <= 24; n += 8) {
8608 for (size_t k = 1; k <= 80; k += 17) {
8609 for (uint32_t m = 1; m <= 2; m++) {
8610 GemmMicrokernelTester()
8611 .mr(2)
8612 .nr(8)
8613 .kr(4)
8614 .sr(2)
8615 .m(m)
8616 .n(n)
8617 .k(k)
8618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008620 }
8621 }
8622 }
8623 }
8624
8625 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, strided_cm_subtile) {
8626 TEST_REQUIRES_ARM_NEON;
8627 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008628 for (uint32_t n = 1; n <= 8; n++) {
8629 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008630 GemmMicrokernelTester()
8631 .mr(2)
8632 .nr(8)
8633 .kr(4)
8634 .sr(2)
8635 .m(m)
8636 .n(n)
8637 .k(k)
8638 .cm_stride(11)
8639 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008640 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008641 }
8642 }
8643 }
8644 }
8645
8646 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, qmin) {
8647 TEST_REQUIRES_ARM_NEON;
8648 GemmMicrokernelTester()
8649 .mr(2)
8650 .nr(8)
8651 .kr(4)
8652 .sr(2)
8653 .m(2)
8654 .n(8)
8655 .k(16)
8656 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008658 }
8659
8660 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, qmax) {
8661 TEST_REQUIRES_ARM_NEON;
8662 GemmMicrokernelTester()
8663 .mr(2)
8664 .nr(8)
8665 .kr(4)
8666 .sr(2)
8667 .m(2)
8668 .n(8)
8669 .k(16)
8670 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008671 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008672 }
8673
8674 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, strided_cm) {
8675 TEST_REQUIRES_ARM_NEON;
8676 GemmMicrokernelTester()
8677 .mr(2)
8678 .nr(8)
8679 .kr(4)
8680 .sr(2)
8681 .m(2)
8682 .n(8)
8683 .k(16)
8684 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008686 }
8687#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8688
8689
8690#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8691 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008692 TEST_REQUIRES_ARM_NEON;
8693 GemmMicrokernelTester()
8694 .mr(4)
8695 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008696 .kr(4)
8697 .sr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008698 .m(4)
8699 .n(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008700 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08008702 }
8703
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008704 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cn) {
8705 TEST_REQUIRES_ARM_NEON;
8706 GemmMicrokernelTester()
8707 .mr(4)
8708 .nr(8)
8709 .kr(4)
8710 .sr(2)
8711 .m(4)
8712 .n(8)
8713 .k(16)
8714 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008715 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008716 }
8717
8718 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_strided_a) {
8719 TEST_REQUIRES_ARM_NEON;
8720 GemmMicrokernelTester()
8721 .mr(4)
8722 .nr(8)
8723 .kr(4)
8724 .sr(2)
8725 .m(4)
8726 .n(8)
8727 .k(16)
8728 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008730 }
8731
8732 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008733 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008734 for (uint32_t n = 1; n <= 8; n++) {
8735 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008736 GemmMicrokernelTester()
8737 .mr(4)
8738 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008739 .kr(4)
8740 .sr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008741 .m(m)
8742 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008743 .k(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008744 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08008746 }
8747 }
8748 }
8749
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008750 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008751 TEST_REQUIRES_ARM_NEON;
8752 for (uint32_t m = 1; m <= 4; m++) {
8753 GemmMicrokernelTester()
8754 .mr(4)
8755 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008756 .kr(4)
8757 .sr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008758 .m(m)
8759 .n(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008760 .k(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008761 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08008763 }
8764 }
8765
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008766 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008767 TEST_REQUIRES_ARM_NEON;
8768 for (uint32_t n = 1; n <= 8; n++) {
8769 GemmMicrokernelTester()
8770 .mr(4)
8771 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008772 .kr(4)
8773 .sr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008774 .m(4)
8775 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008776 .k(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08008779 }
8780 }
8781
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008782 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008783 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008784 for (size_t k = 1; k < 16; k++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008785 GemmMicrokernelTester()
8786 .mr(4)
8787 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008788 .kr(4)
8789 .sr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008790 .m(4)
8791 .n(8)
8792 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08008794 }
8795 }
8796
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008797 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008798 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008799 for (size_t k = 1; k < 16; k++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08008800 GemmMicrokernelTester()
8801 .mr(4)
8802 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008803 .kr(4)
8804 .sr(2)
Frank Barcharde4d3f762021-12-23 15:31:43 -08008805 .m(4)
8806 .n(8)
8807 .k(k)
8808 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08008810 }
8811 }
8812
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008813 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16_subtile) {
8814 TEST_REQUIRES_ARM_NEON;
8815 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008816 for (uint32_t n = 1; n <= 8; n++) {
8817 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008818 GemmMicrokernelTester()
8819 .mr(4)
8820 .nr(8)
8821 .kr(4)
8822 .sr(2)
8823 .m(m)
8824 .n(n)
8825 .k(k)
8826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008828 }
8829 }
8830 }
8831 }
8832
8833 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16) {
8834 TEST_REQUIRES_ARM_NEON;
8835 for (size_t k = 17; k < 32; k++) {
8836 GemmMicrokernelTester()
8837 .mr(4)
8838 .nr(8)
8839 .kr(4)
8840 .sr(2)
8841 .m(4)
8842 .n(8)
8843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008845 }
8846 }
8847
8848 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16_strided_a) {
8849 TEST_REQUIRES_ARM_NEON;
8850 for (size_t k = 17; k < 32; k++) {
8851 GemmMicrokernelTester()
8852 .mr(4)
8853 .nr(8)
8854 .kr(4)
8855 .sr(2)
8856 .m(4)
8857 .n(8)
8858 .k(k)
8859 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008861 }
8862 }
8863
8864 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16_subtile) {
8865 TEST_REQUIRES_ARM_NEON;
8866 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008867 for (uint32_t n = 1; n <= 8; n++) {
8868 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008869 GemmMicrokernelTester()
8870 .mr(4)
8871 .nr(8)
8872 .kr(4)
8873 .sr(2)
8874 .m(m)
8875 .n(n)
8876 .k(k)
8877 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008879 }
8880 }
8881 }
8882 }
8883
8884 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16) {
8885 TEST_REQUIRES_ARM_NEON;
8886 for (size_t k = 32; k <= 160; k += 16) {
8887 GemmMicrokernelTester()
8888 .mr(4)
8889 .nr(8)
8890 .kr(4)
8891 .sr(2)
8892 .m(4)
8893 .n(8)
8894 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008896 }
8897 }
8898
8899 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16_strided_a) {
8900 TEST_REQUIRES_ARM_NEON;
8901 for (size_t k = 32; k <= 160; k += 16) {
8902 GemmMicrokernelTester()
8903 .mr(4)
8904 .nr(8)
8905 .kr(4)
8906 .sr(2)
8907 .m(4)
8908 .n(8)
8909 .k(k)
8910 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008912 }
8913 }
8914
8915 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16_subtile) {
8916 TEST_REQUIRES_ARM_NEON;
8917 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008918 for (uint32_t n = 1; n <= 8; n++) {
8919 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008920 GemmMicrokernelTester()
8921 .mr(4)
8922 .nr(8)
8923 .kr(4)
8924 .sr(2)
8925 .m(m)
8926 .n(n)
8927 .k(k)
8928 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008930 }
8931 }
8932 }
8933 }
8934
8935 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8) {
8936 TEST_REQUIRES_ARM_NEON;
8937 for (uint32_t n = 9; n < 16; n++) {
8938 for (size_t k = 1; k <= 80; k += 17) {
8939 GemmMicrokernelTester()
8940 .mr(4)
8941 .nr(8)
8942 .kr(4)
8943 .sr(2)
8944 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008945 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008946 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008948 }
8949 }
8950 }
8951
8952 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
8953 TEST_REQUIRES_ARM_NEON;
8954 for (uint32_t n = 9; n < 16; n++) {
8955 for (size_t k = 1; k <= 80; k += 17) {
8956 GemmMicrokernelTester()
8957 .mr(4)
8958 .nr(8)
8959 .kr(4)
8960 .sr(2)
8961 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008962 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008963 .k(k)
8964 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008966 }
8967 }
8968 }
8969
8970 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_strided_a) {
8971 TEST_REQUIRES_ARM_NEON;
8972 for (uint32_t n = 9; n < 16; n++) {
8973 for (size_t k = 1; k <= 80; k += 17) {
8974 GemmMicrokernelTester()
8975 .mr(4)
8976 .nr(8)
8977 .kr(4)
8978 .sr(2)
8979 .m(4)
8980 .n(n)
8981 .k(k)
8982 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008984 }
8985 }
8986 }
8987
8988 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_subtile) {
8989 TEST_REQUIRES_ARM_NEON;
8990 for (uint32_t n = 9; n < 16; n++) {
8991 for (size_t k = 1; k <= 80; k += 17) {
8992 for (uint32_t m = 1; m <= 4; m++) {
8993 GemmMicrokernelTester()
8994 .mr(4)
8995 .nr(8)
8996 .kr(4)
8997 .sr(2)
8998 .m(m)
8999 .n(n)
9000 .k(k)
9001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009003 }
9004 }
9005 }
9006 }
9007
9008 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8) {
9009 TEST_REQUIRES_ARM_NEON;
9010 for (uint32_t n = 16; n <= 24; n += 8) {
9011 for (size_t k = 1; k <= 80; k += 17) {
9012 GemmMicrokernelTester()
9013 .mr(4)
9014 .nr(8)
9015 .kr(4)
9016 .sr(2)
9017 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009018 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009021 }
9022 }
9023 }
9024
9025 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
9026 TEST_REQUIRES_ARM_NEON;
9027 for (uint32_t n = 16; n <= 24; n += 8) {
9028 for (size_t k = 1; k <= 80; k += 17) {
9029 GemmMicrokernelTester()
9030 .mr(4)
9031 .nr(8)
9032 .kr(4)
9033 .sr(2)
9034 .m(4)
9035 .n(n)
9036 .k(k)
9037 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009039 }
9040 }
9041 }
9042
9043 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_strided_a) {
9044 TEST_REQUIRES_ARM_NEON;
9045 for (uint32_t n = 16; n <= 24; n += 8) {
9046 for (size_t k = 1; k <= 80; k += 17) {
9047 GemmMicrokernelTester()
9048 .mr(4)
9049 .nr(8)
9050 .kr(4)
9051 .sr(2)
9052 .m(4)
9053 .n(n)
9054 .k(k)
9055 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009057 }
9058 }
9059 }
9060
9061 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_subtile) {
9062 TEST_REQUIRES_ARM_NEON;
9063 for (uint32_t n = 16; n <= 24; n += 8) {
9064 for (size_t k = 1; k <= 80; k += 17) {
9065 for (uint32_t m = 1; m <= 4; m++) {
9066 GemmMicrokernelTester()
9067 .mr(4)
9068 .nr(8)
9069 .kr(4)
9070 .sr(2)
9071 .m(m)
9072 .n(n)
9073 .k(k)
9074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009076 }
9077 }
9078 }
9079 }
9080
9081 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cm_subtile) {
9082 TEST_REQUIRES_ARM_NEON;
9083 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009084 for (uint32_t n = 1; n <= 8; n++) {
9085 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009086 GemmMicrokernelTester()
9087 .mr(4)
9088 .nr(8)
9089 .kr(4)
9090 .sr(2)
9091 .m(m)
9092 .n(n)
9093 .k(k)
9094 .cm_stride(11)
9095 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009096 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009097 }
9098 }
9099 }
9100 }
9101
9102 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, qmin) {
9103 TEST_REQUIRES_ARM_NEON;
9104 GemmMicrokernelTester()
9105 .mr(4)
9106 .nr(8)
9107 .kr(4)
9108 .sr(2)
9109 .m(4)
9110 .n(8)
9111 .k(16)
9112 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009114 }
9115
9116 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, qmax) {
9117 TEST_REQUIRES_ARM_NEON;
9118 GemmMicrokernelTester()
9119 .mr(4)
9120 .nr(8)
9121 .kr(4)
9122 .sr(2)
9123 .m(4)
9124 .n(8)
9125 .k(16)
9126 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009127 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009128 }
9129
9130 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cm) {
9131 TEST_REQUIRES_ARM_NEON;
9132 GemmMicrokernelTester()
9133 .mr(4)
9134 .nr(8)
9135 .kr(4)
9136 .sr(2)
9137 .m(4)
9138 .n(8)
9139 .k(16)
9140 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009142 }
9143#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9144
9145
9146#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9147 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_eq_8) {
9148 TEST_REQUIRES_ARM_NEON;
9149 GemmMicrokernelTester()
9150 .mr(4)
9151 .nr(16)
9152 .kr(2)
9153 .sr(4)
9154 .m(4)
9155 .n(16)
9156 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009158 }
9159
9160 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, strided_cn) {
9161 TEST_REQUIRES_ARM_NEON;
9162 GemmMicrokernelTester()
9163 .mr(4)
9164 .nr(16)
9165 .kr(2)
9166 .sr(4)
9167 .m(4)
9168 .n(16)
9169 .k(8)
9170 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009171 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009172 }
9173
9174 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_eq_8_strided_a) {
9175 TEST_REQUIRES_ARM_NEON;
9176 GemmMicrokernelTester()
9177 .mr(4)
9178 .nr(16)
9179 .kr(2)
9180 .sr(4)
9181 .m(4)
9182 .n(16)
9183 .k(8)
9184 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009186 }
9187
9188 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_eq_8_subtile) {
9189 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009190 for (uint32_t n = 1; n <= 16; n++) {
9191 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009192 GemmMicrokernelTester()
9193 .mr(4)
9194 .nr(16)
9195 .kr(2)
9196 .sr(4)
9197 .m(m)
9198 .n(n)
9199 .k(8)
9200 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009202 }
9203 }
9204 }
9205
9206 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_eq_8_subtile_m) {
9207 TEST_REQUIRES_ARM_NEON;
9208 for (uint32_t m = 1; m <= 4; m++) {
9209 GemmMicrokernelTester()
9210 .mr(4)
9211 .nr(16)
9212 .kr(2)
9213 .sr(4)
9214 .m(m)
9215 .n(16)
9216 .k(8)
9217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009219 }
9220 }
9221
9222 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_eq_8_subtile_n) {
9223 TEST_REQUIRES_ARM_NEON;
9224 for (uint32_t n = 1; n <= 16; n++) {
9225 GemmMicrokernelTester()
9226 .mr(4)
9227 .nr(16)
9228 .kr(2)
9229 .sr(4)
9230 .m(4)
9231 .n(n)
9232 .k(8)
9233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009235 }
9236 }
9237
9238 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_lt_8) {
9239 TEST_REQUIRES_ARM_NEON;
9240 for (size_t k = 1; k < 8; k++) {
9241 GemmMicrokernelTester()
9242 .mr(4)
9243 .nr(16)
9244 .kr(2)
9245 .sr(4)
9246 .m(4)
9247 .n(16)
9248 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009250 }
9251 }
9252
9253 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_lt_8_strided_a) {
9254 TEST_REQUIRES_ARM_NEON;
9255 for (size_t k = 1; k < 8; k++) {
9256 GemmMicrokernelTester()
9257 .mr(4)
9258 .nr(16)
9259 .kr(2)
9260 .sr(4)
9261 .m(4)
9262 .n(16)
9263 .k(k)
9264 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009266 }
9267 }
9268
9269 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_lt_8_subtile) {
9270 TEST_REQUIRES_ARM_NEON;
9271 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009272 for (uint32_t n = 1; n <= 16; n++) {
9273 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009274 GemmMicrokernelTester()
9275 .mr(4)
9276 .nr(16)
9277 .kr(2)
9278 .sr(4)
9279 .m(m)
9280 .n(n)
9281 .k(k)
9282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009284 }
9285 }
9286 }
9287 }
9288
9289 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_gt_8) {
9290 TEST_REQUIRES_ARM_NEON;
9291 for (size_t k = 9; k < 16; k++) {
9292 GemmMicrokernelTester()
9293 .mr(4)
9294 .nr(16)
9295 .kr(2)
9296 .sr(4)
9297 .m(4)
9298 .n(16)
9299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009301 }
9302 }
9303
9304 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_gt_8_strided_a) {
9305 TEST_REQUIRES_ARM_NEON;
9306 for (size_t k = 9; k < 16; k++) {
9307 GemmMicrokernelTester()
9308 .mr(4)
9309 .nr(16)
9310 .kr(2)
9311 .sr(4)
9312 .m(4)
9313 .n(16)
9314 .k(k)
9315 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009317 }
9318 }
9319
9320 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_gt_8_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009321 TEST_REQUIRES_ARM_NEON;
9322 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009323 for (uint32_t n = 1; n <= 16; n++) {
9324 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009325 GemmMicrokernelTester()
9326 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009327 .nr(16)
9328 .kr(2)
9329 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009330 .m(m)
9331 .n(n)
9332 .k(k)
9333 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009335 }
9336 }
9337 }
9338 }
9339
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009340 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_div_8) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009341 TEST_REQUIRES_ARM_NEON;
9342 for (size_t k = 16; k <= 80; k += 8) {
9343 GemmMicrokernelTester()
9344 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009345 .nr(16)
9346 .kr(2)
9347 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009348 .m(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009349 .n(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009350 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009352 }
9353 }
9354
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009355 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_div_8_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009356 TEST_REQUIRES_ARM_NEON;
9357 for (size_t k = 16; k <= 80; k += 8) {
9358 GemmMicrokernelTester()
9359 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009360 .nr(16)
9361 .kr(2)
9362 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009363 .m(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009364 .n(16)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009365 .k(k)
9366 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009368 }
9369 }
9370
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009371 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, k_div_8_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009372 TEST_REQUIRES_ARM_NEON;
9373 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009374 for (uint32_t n = 1; n <= 16; n++) {
9375 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009376 GemmMicrokernelTester()
9377 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009378 .nr(16)
9379 .kr(2)
9380 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009381 .m(m)
9382 .n(n)
9383 .k(k)
9384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009386 }
9387 }
9388 }
9389 }
9390
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009391 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_gt_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009392 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009393 for (uint32_t n = 17; n < 32; n++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009394 for (size_t k = 1; k <= 40; k += 9) {
9395 GemmMicrokernelTester()
9396 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009397 .nr(16)
9398 .kr(2)
9399 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009400 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009401 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009402 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009403 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009404 }
9405 }
9406 }
9407
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009408 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_gt_16_strided_cn) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009409 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009410 for (uint32_t n = 17; n < 32; n++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009411 for (size_t k = 1; k <= 40; k += 9) {
9412 GemmMicrokernelTester()
9413 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009414 .nr(16)
9415 .kr(2)
9416 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009417 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009418 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009419 .k(k)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009420 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009422 }
9423 }
9424 }
9425
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009426 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_gt_16_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009427 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009428 for (uint32_t n = 17; n < 32; n++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009429 for (size_t k = 1; k <= 40; k += 9) {
9430 GemmMicrokernelTester()
9431 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009432 .nr(16)
9433 .kr(2)
9434 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009435 .m(4)
9436 .n(n)
9437 .k(k)
9438 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009440 }
9441 }
9442 }
9443
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009444 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_gt_16_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009445 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009446 for (uint32_t n = 17; n < 32; n++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009447 for (size_t k = 1; k <= 40; k += 9) {
9448 for (uint32_t m = 1; m <= 4; m++) {
9449 GemmMicrokernelTester()
9450 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009451 .nr(16)
9452 .kr(2)
9453 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009454 .m(m)
9455 .n(n)
9456 .k(k)
9457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009459 }
9460 }
9461 }
9462 }
9463
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009464 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_div_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009465 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009466 for (uint32_t n = 32; n <= 48; n += 16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009467 for (size_t k = 1; k <= 40; k += 9) {
9468 GemmMicrokernelTester()
9469 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009470 .nr(16)
9471 .kr(2)
9472 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009473 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009474 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009477 }
9478 }
9479 }
9480
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009481 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_div_16_strided_cn) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009482 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009483 for (uint32_t n = 32; n <= 48; n += 16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009484 for (size_t k = 1; k <= 40; k += 9) {
9485 GemmMicrokernelTester()
9486 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009487 .nr(16)
9488 .kr(2)
9489 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009490 .m(4)
9491 .n(n)
9492 .k(k)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009493 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009495 }
9496 }
9497 }
9498
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009499 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_div_16_strided_a) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009500 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009501 for (uint32_t n = 32; n <= 48; n += 16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009502 for (size_t k = 1; k <= 40; k += 9) {
9503 GemmMicrokernelTester()
9504 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009505 .nr(16)
9506 .kr(2)
9507 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009508 .m(4)
9509 .n(n)
9510 .k(k)
9511 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009513 }
9514 }
9515 }
9516
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009517 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, n_div_16_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009518 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009519 for (uint32_t n = 32; n <= 48; n += 16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009520 for (size_t k = 1; k <= 40; k += 9) {
9521 for (uint32_t m = 1; m <= 4; m++) {
9522 GemmMicrokernelTester()
9523 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009524 .nr(16)
9525 .kr(2)
9526 .sr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009527 .m(m)
9528 .n(n)
9529 .k(k)
9530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -08009532 }
9533 }
9534 }
9535 }
9536
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009537 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, strided_cm_subtile) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009538 TEST_REQUIRES_ARM_NEON;
9539 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009540 for (uint32_t n = 1; n <= 16; n++) {
9541 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -08009542 GemmMicrokernelTester()
9543 .mr(4)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009544 .nr(16)
9545 .kr(2)
9546 .sr(4)
9547 .m(m)
9548 .n(n)
9549 .k(k)
9550 .cm_stride(19)
9551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009552 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009553 }
9554 }
9555 }
9556 }
9557
9558 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, qmin) {
9559 TEST_REQUIRES_ARM_NEON;
9560 GemmMicrokernelTester()
9561 .mr(4)
9562 .nr(16)
9563 .kr(2)
9564 .sr(4)
9565 .m(4)
9566 .n(16)
9567 .k(8)
9568 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009570 }
9571
9572 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, qmax) {
9573 TEST_REQUIRES_ARM_NEON;
9574 GemmMicrokernelTester()
9575 .mr(4)
9576 .nr(16)
9577 .kr(2)
9578 .sr(4)
9579 .m(4)
9580 .n(16)
9581 .k(8)
9582 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009583 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009584 }
9585
9586 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2S4__NEON_MULL, strided_cm) {
9587 TEST_REQUIRES_ARM_NEON;
9588 GemmMicrokernelTester()
9589 .mr(4)
9590 .nr(16)
9591 .kr(2)
9592 .sr(4)
9593 .m(4)
9594 .n(16)
9595 .k(8)
9596 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009598 }
9599#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9600
9601
9602#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9603 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8) {
9604 TEST_REQUIRES_ARM_NEON;
9605 GemmMicrokernelTester()
9606 .mr(1)
9607 .nr(8)
9608 .kr(4)
9609 .sr(1)
9610 .m(1)
9611 .n(8)
9612 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009614 }
9615
9616 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cn) {
9617 TEST_REQUIRES_ARM_NEON;
9618 GemmMicrokernelTester()
9619 .mr(1)
9620 .nr(8)
9621 .kr(4)
9622 .sr(1)
9623 .m(1)
9624 .n(8)
9625 .k(8)
9626 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009627 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009628 }
9629
9630 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_strided_a) {
9631 TEST_REQUIRES_ARM_NEON;
9632 GemmMicrokernelTester()
9633 .mr(1)
9634 .nr(8)
9635 .kr(4)
9636 .sr(1)
9637 .m(1)
9638 .n(8)
9639 .k(8)
9640 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009642 }
9643
9644 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
9645 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009646 for (uint32_t n = 1; n <= 8; n++) {
9647 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009648 GemmMicrokernelTester()
9649 .mr(1)
9650 .nr(8)
9651 .kr(4)
9652 .sr(1)
9653 .m(m)
9654 .n(n)
9655 .k(8)
9656 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009658 }
9659 }
9660 }
9661
9662 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
9663 TEST_REQUIRES_ARM_NEON;
9664 for (uint32_t m = 1; m <= 1; m++) {
9665 GemmMicrokernelTester()
9666 .mr(1)
9667 .nr(8)
9668 .kr(4)
9669 .sr(1)
9670 .m(m)
9671 .n(8)
9672 .k(8)
9673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009675 }
9676 }
9677
9678 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
9679 TEST_REQUIRES_ARM_NEON;
9680 for (uint32_t n = 1; n <= 8; n++) {
9681 GemmMicrokernelTester()
9682 .mr(1)
9683 .nr(8)
9684 .kr(4)
9685 .sr(1)
9686 .m(1)
9687 .n(n)
9688 .k(8)
9689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009691 }
9692 }
9693
9694 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8) {
9695 TEST_REQUIRES_ARM_NEON;
9696 for (size_t k = 1; k < 8; k++) {
9697 GemmMicrokernelTester()
9698 .mr(1)
9699 .nr(8)
9700 .kr(4)
9701 .sr(1)
9702 .m(1)
9703 .n(8)
9704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009706 }
9707 }
9708
9709 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8_strided_a) {
9710 TEST_REQUIRES_ARM_NEON;
9711 for (size_t k = 1; k < 8; k++) {
9712 GemmMicrokernelTester()
9713 .mr(1)
9714 .nr(8)
9715 .kr(4)
9716 .sr(1)
9717 .m(1)
9718 .n(8)
9719 .k(k)
9720 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009722 }
9723 }
9724
9725 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
9726 TEST_REQUIRES_ARM_NEON;
9727 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009728 for (uint32_t n = 1; n <= 8; n++) {
9729 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009730 GemmMicrokernelTester()
9731 .mr(1)
Frank Barcharde4d3f762021-12-23 15:31:43 -08009732 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009733 .kr(4)
9734 .sr(1)
9735 .m(m)
9736 .n(n)
9737 .k(k)
9738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009740 }
9741 }
9742 }
9743 }
9744
9745 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8) {
9746 TEST_REQUIRES_ARM_NEON;
9747 for (size_t k = 9; k < 16; k++) {
9748 GemmMicrokernelTester()
9749 .mr(1)
9750 .nr(8)
9751 .kr(4)
9752 .sr(1)
9753 .m(1)
9754 .n(8)
9755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009757 }
9758 }
9759
9760 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8_strided_a) {
9761 TEST_REQUIRES_ARM_NEON;
9762 for (size_t k = 9; k < 16; k++) {
9763 GemmMicrokernelTester()
9764 .mr(1)
9765 .nr(8)
9766 .kr(4)
9767 .sr(1)
9768 .m(1)
9769 .n(8)
9770 .k(k)
9771 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009773 }
9774 }
9775
9776 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
9777 TEST_REQUIRES_ARM_NEON;
9778 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009779 for (uint32_t n = 1; n <= 8; n++) {
9780 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009781 GemmMicrokernelTester()
9782 .mr(1)
9783 .nr(8)
9784 .kr(4)
9785 .sr(1)
9786 .m(m)
9787 .n(n)
9788 .k(k)
9789 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009791 }
9792 }
9793 }
9794 }
9795
9796 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8) {
9797 TEST_REQUIRES_ARM_NEON;
9798 for (size_t k = 16; k <= 80; k += 8) {
9799 GemmMicrokernelTester()
9800 .mr(1)
9801 .nr(8)
9802 .kr(4)
9803 .sr(1)
9804 .m(1)
9805 .n(8)
9806 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009808 }
9809 }
9810
9811 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8_strided_a) {
9812 TEST_REQUIRES_ARM_NEON;
9813 for (size_t k = 16; k <= 80; k += 8) {
9814 GemmMicrokernelTester()
9815 .mr(1)
9816 .nr(8)
9817 .kr(4)
9818 .sr(1)
9819 .m(1)
9820 .n(8)
9821 .k(k)
9822 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009824 }
9825 }
9826
9827 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8_subtile) {
9828 TEST_REQUIRES_ARM_NEON;
9829 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009830 for (uint32_t n = 1; n <= 8; n++) {
9831 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009832 GemmMicrokernelTester()
9833 .mr(1)
9834 .nr(8)
9835 .kr(4)
9836 .sr(1)
9837 .m(m)
9838 .n(n)
9839 .k(k)
9840 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009841 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009842 }
9843 }
9844 }
9845 }
9846
9847 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8) {
9848 TEST_REQUIRES_ARM_NEON;
9849 for (uint32_t n = 9; n < 16; n++) {
9850 for (size_t k = 1; k <= 40; k += 9) {
9851 GemmMicrokernelTester()
9852 .mr(1)
9853 .nr(8)
9854 .kr(4)
9855 .sr(1)
9856 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009857 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009858 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009859 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009860 }
9861 }
9862 }
9863
9864 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
9865 TEST_REQUIRES_ARM_NEON;
9866 for (uint32_t n = 9; n < 16; n++) {
9867 for (size_t k = 1; k <= 40; k += 9) {
9868 GemmMicrokernelTester()
9869 .mr(1)
9870 .nr(8)
9871 .kr(4)
9872 .sr(1)
9873 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009874 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009875 .k(k)
9876 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009878 }
9879 }
9880 }
9881
9882 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_strided_a) {
9883 TEST_REQUIRES_ARM_NEON;
9884 for (uint32_t n = 9; n < 16; n++) {
9885 for (size_t k = 1; k <= 40; k += 9) {
9886 GemmMicrokernelTester()
9887 .mr(1)
9888 .nr(8)
9889 .kr(4)
9890 .sr(1)
9891 .m(1)
9892 .n(n)
9893 .k(k)
9894 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009896 }
9897 }
9898 }
9899
9900 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
9901 TEST_REQUIRES_ARM_NEON;
9902 for (uint32_t n = 9; n < 16; n++) {
9903 for (size_t k = 1; k <= 40; k += 9) {
9904 for (uint32_t m = 1; m <= 1; m++) {
9905 GemmMicrokernelTester()
9906 .mr(1)
9907 .nr(8)
9908 .kr(4)
9909 .sr(1)
9910 .m(m)
9911 .n(n)
9912 .k(k)
9913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009915 }
9916 }
9917 }
9918 }
9919
9920 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8) {
9921 TEST_REQUIRES_ARM_NEON;
9922 for (uint32_t n = 16; n <= 24; n += 8) {
9923 for (size_t k = 1; k <= 40; k += 9) {
9924 GemmMicrokernelTester()
9925 .mr(1)
9926 .nr(8)
9927 .kr(4)
9928 .sr(1)
9929 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009930 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009933 }
9934 }
9935 }
9936
9937 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
9938 TEST_REQUIRES_ARM_NEON;
9939 for (uint32_t n = 16; n <= 24; n += 8) {
9940 for (size_t k = 1; k <= 40; k += 9) {
9941 GemmMicrokernelTester()
9942 .mr(1)
9943 .nr(8)
9944 .kr(4)
9945 .sr(1)
9946 .m(1)
9947 .n(n)
9948 .k(k)
9949 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009951 }
9952 }
9953 }
9954
9955 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_strided_a) {
9956 TEST_REQUIRES_ARM_NEON;
9957 for (uint32_t n = 16; n <= 24; n += 8) {
9958 for (size_t k = 1; k <= 40; k += 9) {
9959 GemmMicrokernelTester()
9960 .mr(1)
9961 .nr(8)
9962 .kr(4)
9963 .sr(1)
9964 .m(1)
9965 .n(n)
9966 .k(k)
9967 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009969 }
9970 }
9971 }
9972
9973 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_subtile) {
9974 TEST_REQUIRES_ARM_NEON;
9975 for (uint32_t n = 16; n <= 24; n += 8) {
9976 for (size_t k = 1; k <= 40; k += 9) {
9977 for (uint32_t m = 1; m <= 1; m++) {
9978 GemmMicrokernelTester()
9979 .mr(1)
9980 .nr(8)
9981 .kr(4)
9982 .sr(1)
9983 .m(m)
9984 .n(n)
9985 .k(k)
9986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009988 }
9989 }
9990 }
9991 }
9992
9993 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm_subtile) {
9994 TEST_REQUIRES_ARM_NEON;
9995 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009996 for (uint32_t n = 1; n <= 8; n++) {
9997 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009998 GemmMicrokernelTester()
9999 .mr(1)
10000 .nr(8)
10001 .kr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010002 .sr(1)
10003 .m(m)
10004 .n(n)
10005 .k(k)
10006 .cm_stride(11)
10007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -080010009 }
10010 }
10011 }
10012 }
10013
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010014 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmin) {
Frank Barcharde4d3f762021-12-23 15:31:43 -080010015 TEST_REQUIRES_ARM_NEON;
10016 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010017 .mr(1)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010018 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010019 .kr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010020 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010021 .m(1)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010022 .n(8)
10023 .k(8)
10024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -080010026 }
10027
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010028 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmax) {
Frank Barcharde4d3f762021-12-23 15:31:43 -080010029 TEST_REQUIRES_ARM_NEON;
10030 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010031 .mr(1)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010032 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010033 .kr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010034 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010035 .m(1)
Frank Barcharde4d3f762021-12-23 15:31:43 -080010036 .n(8)
10037 .k(8)
10038 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010039 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -080010040 }
10041
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010042 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm) {
10043 TEST_REQUIRES_ARM_NEON;
10044 GemmMicrokernelTester()
10045 .mr(1)
10046 .nr(8)
10047 .kr(4)
10048 .sr(1)
10049 .m(1)
10050 .n(8)
10051 .k(8)
10052 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010054 }
10055#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10056
10057
10058#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10059 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8) {
10060 TEST_REQUIRES_ARM_NEON;
10061 GemmMicrokernelTester()
10062 .mr(2)
10063 .nr(16)
10064 .kr(4)
10065 .sr(1)
10066 .m(2)
10067 .n(16)
10068 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010070 }
10071
10072 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, strided_cn) {
10073 TEST_REQUIRES_ARM_NEON;
10074 GemmMicrokernelTester()
10075 .mr(2)
10076 .nr(16)
10077 .kr(4)
10078 .sr(1)
10079 .m(2)
10080 .n(16)
10081 .k(8)
10082 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010083 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010084 }
10085
10086 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_strided_a) {
10087 TEST_REQUIRES_ARM_NEON;
10088 GemmMicrokernelTester()
10089 .mr(2)
10090 .nr(16)
10091 .kr(4)
10092 .sr(1)
10093 .m(2)
10094 .n(16)
10095 .k(8)
10096 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010098 }
10099
10100 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
10101 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010102 for (uint32_t n = 1; n <= 16; n++) {
10103 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010104 GemmMicrokernelTester()
10105 .mr(2)
10106 .nr(16)
10107 .kr(4)
10108 .sr(1)
10109 .m(m)
10110 .n(n)
10111 .k(8)
10112 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010114 }
10115 }
10116 }
10117
10118 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
10119 TEST_REQUIRES_ARM_NEON;
10120 for (uint32_t m = 1; m <= 2; m++) {
10121 GemmMicrokernelTester()
10122 .mr(2)
10123 .nr(16)
10124 .kr(4)
10125 .sr(1)
10126 .m(m)
10127 .n(16)
10128 .k(8)
10129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010131 }
10132 }
10133
10134 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
10135 TEST_REQUIRES_ARM_NEON;
10136 for (uint32_t n = 1; n <= 16; n++) {
10137 GemmMicrokernelTester()
10138 .mr(2)
10139 .nr(16)
10140 .kr(4)
10141 .sr(1)
10142 .m(2)
10143 .n(n)
10144 .k(8)
10145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010147 }
10148 }
10149
10150 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_lt_8) {
10151 TEST_REQUIRES_ARM_NEON;
10152 for (size_t k = 1; k < 8; k++) {
10153 GemmMicrokernelTester()
10154 .mr(2)
10155 .nr(16)
10156 .kr(4)
10157 .sr(1)
10158 .m(2)
10159 .n(16)
10160 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010162 }
10163 }
10164
10165 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_lt_8_strided_a) {
10166 TEST_REQUIRES_ARM_NEON;
10167 for (size_t k = 1; k < 8; k++) {
10168 GemmMicrokernelTester()
10169 .mr(2)
10170 .nr(16)
10171 .kr(4)
10172 .sr(1)
10173 .m(2)
10174 .n(16)
10175 .k(k)
10176 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010178 }
10179 }
10180
10181 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
10182 TEST_REQUIRES_ARM_NEON;
10183 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010184 for (uint32_t n = 1; n <= 16; n++) {
10185 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010186 GemmMicrokernelTester()
10187 .mr(2)
10188 .nr(16)
10189 .kr(4)
10190 .sr(1)
10191 .m(m)
10192 .n(n)
10193 .k(k)
10194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010196 }
10197 }
10198 }
10199 }
10200
10201 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_gt_8) {
10202 TEST_REQUIRES_ARM_NEON;
10203 for (size_t k = 9; k < 16; k++) {
10204 GemmMicrokernelTester()
10205 .mr(2)
10206 .nr(16)
10207 .kr(4)
10208 .sr(1)
10209 .m(2)
10210 .n(16)
10211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010213 }
10214 }
10215
10216 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_gt_8_strided_a) {
10217 TEST_REQUIRES_ARM_NEON;
10218 for (size_t k = 9; k < 16; k++) {
10219 GemmMicrokernelTester()
10220 .mr(2)
10221 .nr(16)
10222 .kr(4)
10223 .sr(1)
10224 .m(2)
10225 .n(16)
10226 .k(k)
10227 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010229 }
10230 }
10231
10232 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
10233 TEST_REQUIRES_ARM_NEON;
10234 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010235 for (uint32_t n = 1; n <= 16; n++) {
10236 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010237 GemmMicrokernelTester()
10238 .mr(2)
10239 .nr(16)
10240 .kr(4)
10241 .sr(1)
10242 .m(m)
10243 .n(n)
10244 .k(k)
10245 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010247 }
10248 }
10249 }
10250 }
10251
10252 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_div_8) {
10253 TEST_REQUIRES_ARM_NEON;
10254 for (size_t k = 16; k <= 80; k += 8) {
10255 GemmMicrokernelTester()
10256 .mr(2)
10257 .nr(16)
10258 .kr(4)
10259 .sr(1)
10260 .m(2)
10261 .n(16)
10262 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010264 }
10265 }
10266
10267 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_div_8_strided_a) {
10268 TEST_REQUIRES_ARM_NEON;
10269 for (size_t k = 16; k <= 80; k += 8) {
10270 GemmMicrokernelTester()
10271 .mr(2)
10272 .nr(16)
10273 .kr(4)
10274 .sr(1)
10275 .m(2)
10276 .n(16)
10277 .k(k)
10278 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010280 }
10281 }
10282
10283 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_div_8_subtile) {
10284 TEST_REQUIRES_ARM_NEON;
10285 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010286 for (uint32_t n = 1; n <= 16; n++) {
10287 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010288 GemmMicrokernelTester()
10289 .mr(2)
10290 .nr(16)
10291 .kr(4)
10292 .sr(1)
10293 .m(m)
10294 .n(n)
10295 .k(k)
10296 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010297 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010298 }
10299 }
10300 }
10301 }
10302
10303 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16) {
10304 TEST_REQUIRES_ARM_NEON;
10305 for (uint32_t n = 17; n < 32; n++) {
10306 for (size_t k = 1; k <= 40; k += 9) {
10307 GemmMicrokernelTester()
10308 .mr(2)
10309 .nr(16)
10310 .kr(4)
10311 .sr(1)
10312 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010313 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010314 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010315 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010316 }
10317 }
10318 }
10319
10320 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
10321 TEST_REQUIRES_ARM_NEON;
10322 for (uint32_t n = 17; n < 32; n++) {
10323 for (size_t k = 1; k <= 40; k += 9) {
10324 GemmMicrokernelTester()
10325 .mr(2)
10326 .nr(16)
10327 .kr(4)
10328 .sr(1)
10329 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010330 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010331 .k(k)
10332 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010334 }
10335 }
10336 }
10337
10338 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16_strided_a) {
10339 TEST_REQUIRES_ARM_NEON;
10340 for (uint32_t n = 17; n < 32; n++) {
10341 for (size_t k = 1; k <= 40; k += 9) {
10342 GemmMicrokernelTester()
10343 .mr(2)
10344 .nr(16)
10345 .kr(4)
10346 .sr(1)
10347 .m(2)
10348 .n(n)
10349 .k(k)
10350 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010352 }
10353 }
10354 }
10355
10356 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
10357 TEST_REQUIRES_ARM_NEON;
10358 for (uint32_t n = 17; n < 32; n++) {
10359 for (size_t k = 1; k <= 40; k += 9) {
10360 for (uint32_t m = 1; m <= 2; m++) {
10361 GemmMicrokernelTester()
10362 .mr(2)
10363 .nr(16)
10364 .kr(4)
10365 .sr(1)
10366 .m(m)
10367 .n(n)
10368 .k(k)
10369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010371 }
10372 }
10373 }
10374 }
10375
10376 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16) {
10377 TEST_REQUIRES_ARM_NEON;
10378 for (uint32_t n = 32; n <= 48; n += 16) {
10379 for (size_t k = 1; k <= 40; k += 9) {
10380 GemmMicrokernelTester()
10381 .mr(2)
10382 .nr(16)
10383 .kr(4)
10384 .sr(1)
10385 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010386 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010389 }
10390 }
10391 }
10392
10393 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
10394 TEST_REQUIRES_ARM_NEON;
10395 for (uint32_t n = 32; n <= 48; n += 16) {
10396 for (size_t k = 1; k <= 40; k += 9) {
10397 GemmMicrokernelTester()
10398 .mr(2)
10399 .nr(16)
10400 .kr(4)
10401 .sr(1)
10402 .m(2)
10403 .n(n)
10404 .k(k)
10405 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010407 }
10408 }
10409 }
10410
10411 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16_strided_a) {
10412 TEST_REQUIRES_ARM_NEON;
10413 for (uint32_t n = 32; n <= 48; n += 16) {
10414 for (size_t k = 1; k <= 40; k += 9) {
10415 GemmMicrokernelTester()
10416 .mr(2)
10417 .nr(16)
10418 .kr(4)
10419 .sr(1)
10420 .m(2)
10421 .n(n)
10422 .k(k)
10423 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010424 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010425 }
10426 }
10427 }
10428
10429 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16_subtile) {
10430 TEST_REQUIRES_ARM_NEON;
10431 for (uint32_t n = 32; n <= 48; n += 16) {
10432 for (size_t k = 1; k <= 40; k += 9) {
10433 for (uint32_t m = 1; m <= 2; m++) {
10434 GemmMicrokernelTester()
10435 .mr(2)
10436 .nr(16)
10437 .kr(4)
10438 .sr(1)
10439 .m(m)
10440 .n(n)
10441 .k(k)
10442 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010444 }
10445 }
10446 }
10447 }
10448
10449 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, strided_cm_subtile) {
10450 TEST_REQUIRES_ARM_NEON;
10451 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010452 for (uint32_t n = 1; n <= 16; n++) {
10453 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010454 GemmMicrokernelTester()
10455 .mr(2)
10456 .nr(16)
10457 .kr(4)
10458 .sr(1)
10459 .m(m)
10460 .n(n)
10461 .k(k)
10462 .cm_stride(19)
10463 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010464 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010465 }
10466 }
10467 }
10468 }
10469
10470 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, qmin) {
10471 TEST_REQUIRES_ARM_NEON;
10472 GemmMicrokernelTester()
10473 .mr(2)
10474 .nr(16)
10475 .kr(4)
10476 .sr(1)
10477 .m(2)
10478 .n(16)
10479 .k(8)
10480 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010482 }
10483
10484 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, qmax) {
10485 TEST_REQUIRES_ARM_NEON;
10486 GemmMicrokernelTester()
10487 .mr(2)
10488 .nr(16)
10489 .kr(4)
10490 .sr(1)
10491 .m(2)
10492 .n(16)
10493 .k(8)
10494 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010495 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010496 }
10497
10498 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, strided_cm) {
10499 TEST_REQUIRES_ARM_NEON;
10500 GemmMicrokernelTester()
10501 .mr(2)
10502 .nr(16)
10503 .kr(4)
10504 .sr(1)
10505 .m(2)
10506 .n(16)
10507 .k(8)
10508 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010510 }
10511#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10512
10513
10514#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10515 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16) {
10516 TEST_REQUIRES_ARM_NEON;
10517 GemmMicrokernelTester()
10518 .mr(2)
10519 .nr(8)
10520 .kr(4)
10521 .sr(1)
10522 .m(2)
10523 .n(8)
10524 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010526 }
10527
10528 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cn) {
10529 TEST_REQUIRES_ARM_NEON;
10530 GemmMicrokernelTester()
10531 .mr(2)
10532 .nr(8)
10533 .kr(4)
10534 .sr(1)
10535 .m(2)
10536 .n(8)
10537 .k(16)
10538 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010539 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010540 }
10541
10542 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_strided_a) {
10543 TEST_REQUIRES_ARM_NEON;
10544 GemmMicrokernelTester()
10545 .mr(2)
10546 .nr(8)
10547 .kr(4)
10548 .sr(1)
10549 .m(2)
10550 .n(8)
10551 .k(16)
10552 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010554 }
10555
10556 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
10557 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010558 for (uint32_t n = 1; n <= 8; n++) {
10559 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010560 GemmMicrokernelTester()
10561 .mr(2)
10562 .nr(8)
10563 .kr(4)
10564 .sr(1)
10565 .m(m)
10566 .n(n)
10567 .k(16)
10568 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010570 }
10571 }
10572 }
10573
10574 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
10575 TEST_REQUIRES_ARM_NEON;
10576 for (uint32_t m = 1; m <= 2; m++) {
10577 GemmMicrokernelTester()
10578 .mr(2)
10579 .nr(8)
10580 .kr(4)
10581 .sr(1)
10582 .m(m)
10583 .n(8)
10584 .k(16)
10585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010587 }
10588 }
10589
10590 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
10591 TEST_REQUIRES_ARM_NEON;
10592 for (uint32_t n = 1; n <= 8; n++) {
10593 GemmMicrokernelTester()
10594 .mr(2)
10595 .nr(8)
10596 .kr(4)
10597 .sr(1)
10598 .m(2)
10599 .n(n)
10600 .k(16)
10601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010603 }
10604 }
10605
10606 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16) {
10607 TEST_REQUIRES_ARM_NEON;
10608 for (size_t k = 1; k < 16; k++) {
10609 GemmMicrokernelTester()
10610 .mr(2)
10611 .nr(8)
10612 .kr(4)
10613 .sr(1)
10614 .m(2)
10615 .n(8)
10616 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010618 }
10619 }
10620
10621 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16_strided_a) {
10622 TEST_REQUIRES_ARM_NEON;
10623 for (size_t k = 1; k < 16; k++) {
10624 GemmMicrokernelTester()
10625 .mr(2)
10626 .nr(8)
10627 .kr(4)
10628 .sr(1)
10629 .m(2)
10630 .n(8)
10631 .k(k)
10632 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010634 }
10635 }
10636
10637 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
10638 TEST_REQUIRES_ARM_NEON;
10639 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010640 for (uint32_t n = 1; n <= 8; n++) {
10641 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010642 GemmMicrokernelTester()
10643 .mr(2)
10644 .nr(8)
10645 .kr(4)
10646 .sr(1)
10647 .m(m)
10648 .n(n)
10649 .k(k)
10650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010652 }
10653 }
10654 }
10655 }
10656
10657 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16) {
10658 TEST_REQUIRES_ARM_NEON;
10659 for (size_t k = 17; k < 32; k++) {
10660 GemmMicrokernelTester()
10661 .mr(2)
10662 .nr(8)
10663 .kr(4)
10664 .sr(1)
10665 .m(2)
10666 .n(8)
10667 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010669 }
10670 }
10671
10672 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16_strided_a) {
10673 TEST_REQUIRES_ARM_NEON;
10674 for (size_t k = 17; k < 32; k++) {
10675 GemmMicrokernelTester()
10676 .mr(2)
10677 .nr(8)
10678 .kr(4)
10679 .sr(1)
10680 .m(2)
10681 .n(8)
10682 .k(k)
10683 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080010684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010685 }
10686 }
10687
10688 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
10689 TEST_REQUIRES_ARM_NEON;
10690 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010691 for (uint32_t n = 1; n <= 8; n++) {
10692 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010693 GemmMicrokernelTester()
10694 .mr(2)
10695 .nr(8)
10696 .kr(4)
10697 .sr(1)
10698 .m(m)
10699 .n(n)
10700 .k(k)
10701 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010703 }
10704 }
10705 }
10706 }
10707
10708 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16) {
10709 TEST_REQUIRES_ARM_NEON;
10710 for (size_t k = 32; k <= 160; k += 16) {
10711 GemmMicrokernelTester()
10712 .mr(2)
10713 .nr(8)
10714 .kr(4)
10715 .sr(1)
10716 .m(2)
10717 .n(8)
10718 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010720 }
10721 }
10722
10723 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16_strided_a) {
10724 TEST_REQUIRES_ARM_NEON;
10725 for (size_t k = 32; k <= 160; k += 16) {
10726 GemmMicrokernelTester()
10727 .mr(2)
10728 .nr(8)
10729 .kr(4)
10730 .sr(1)
10731 .m(2)
10732 .n(8)
10733 .k(k)
10734 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010736 }
10737 }
10738
10739 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
10740 TEST_REQUIRES_ARM_NEON;
10741 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010742 for (uint32_t n = 1; n <= 8; n++) {
10743 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010744 GemmMicrokernelTester()
10745 .mr(2)
10746 .nr(8)
10747 .kr(4)
10748 .sr(1)
10749 .m(m)
10750 .n(n)
10751 .k(k)
10752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010753 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010754 }
10755 }
10756 }
10757 }
10758
10759 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8) {
10760 TEST_REQUIRES_ARM_NEON;
10761 for (uint32_t n = 9; n < 16; n++) {
10762 for (size_t k = 1; k <= 80; k += 17) {
10763 GemmMicrokernelTester()
10764 .mr(2)
10765 .nr(8)
10766 .kr(4)
10767 .sr(1)
10768 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010769 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010770 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010771 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010772 }
10773 }
10774 }
10775
10776 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
10777 TEST_REQUIRES_ARM_NEON;
10778 for (uint32_t n = 9; n < 16; n++) {
10779 for (size_t k = 1; k <= 80; k += 17) {
10780 GemmMicrokernelTester()
10781 .mr(2)
10782 .nr(8)
10783 .kr(4)
10784 .sr(1)
10785 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010786 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010787 .k(k)
10788 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010790 }
10791 }
10792 }
10793
10794 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_a) {
10795 TEST_REQUIRES_ARM_NEON;
10796 for (uint32_t n = 9; n < 16; n++) {
10797 for (size_t k = 1; k <= 80; k += 17) {
10798 GemmMicrokernelTester()
10799 .mr(2)
10800 .nr(8)
10801 .kr(4)
10802 .sr(1)
10803 .m(2)
10804 .n(n)
10805 .k(k)
10806 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010808 }
10809 }
10810 }
10811
10812 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
10813 TEST_REQUIRES_ARM_NEON;
10814 for (uint32_t n = 9; n < 16; n++) {
10815 for (size_t k = 1; k <= 80; k += 17) {
10816 for (uint32_t m = 1; m <= 2; m++) {
10817 GemmMicrokernelTester()
10818 .mr(2)
10819 .nr(8)
10820 .kr(4)
10821 .sr(1)
10822 .m(m)
10823 .n(n)
10824 .k(k)
10825 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010827 }
10828 }
10829 }
10830 }
10831
10832 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8) {
10833 TEST_REQUIRES_ARM_NEON;
10834 for (uint32_t n = 16; n <= 24; n += 8) {
10835 for (size_t k = 1; k <= 80; k += 17) {
10836 GemmMicrokernelTester()
10837 .mr(2)
10838 .nr(8)
10839 .kr(4)
10840 .sr(1)
10841 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010842 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010845 }
10846 }
10847 }
10848
10849 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
10850 TEST_REQUIRES_ARM_NEON;
10851 for (uint32_t n = 16; n <= 24; n += 8) {
10852 for (size_t k = 1; k <= 80; k += 17) {
10853 GemmMicrokernelTester()
10854 .mr(2)
10855 .nr(8)
10856 .kr(4)
10857 .sr(1)
10858 .m(2)
10859 .n(n)
10860 .k(k)
10861 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010863 }
10864 }
10865 }
10866
10867 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_strided_a) {
10868 TEST_REQUIRES_ARM_NEON;
10869 for (uint32_t n = 16; n <= 24; n += 8) {
10870 for (size_t k = 1; k <= 80; k += 17) {
10871 GemmMicrokernelTester()
10872 .mr(2)
10873 .nr(8)
10874 .kr(4)
10875 .sr(1)
10876 .m(2)
10877 .n(n)
10878 .k(k)
10879 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010880 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010881 }
10882 }
10883 }
10884
10885 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
10886 TEST_REQUIRES_ARM_NEON;
10887 for (uint32_t n = 16; n <= 24; n += 8) {
10888 for (size_t k = 1; k <= 80; k += 17) {
10889 for (uint32_t m = 1; m <= 2; m++) {
10890 GemmMicrokernelTester()
10891 .mr(2)
10892 .nr(8)
10893 .kr(4)
10894 .sr(1)
10895 .m(m)
10896 .n(n)
10897 .k(k)
10898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010900 }
10901 }
10902 }
10903 }
10904
10905 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
10906 TEST_REQUIRES_ARM_NEON;
10907 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010908 for (uint32_t n = 1; n <= 8; n++) {
10909 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010910 GemmMicrokernelTester()
10911 .mr(2)
10912 .nr(8)
10913 .kr(4)
10914 .sr(1)
10915 .m(m)
10916 .n(n)
10917 .k(k)
10918 .cm_stride(11)
10919 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010920 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010921 }
10922 }
10923 }
10924 }
10925
10926 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, qmin) {
10927 TEST_REQUIRES_ARM_NEON;
10928 GemmMicrokernelTester()
10929 .mr(2)
10930 .nr(8)
10931 .kr(4)
10932 .sr(1)
10933 .m(2)
10934 .n(8)
10935 .k(16)
10936 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010938 }
10939
10940 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, qmax) {
10941 TEST_REQUIRES_ARM_NEON;
10942 GemmMicrokernelTester()
10943 .mr(2)
10944 .nr(8)
10945 .kr(4)
10946 .sr(1)
10947 .m(2)
10948 .n(8)
10949 .k(16)
10950 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010951 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010952 }
10953
10954 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_DUP, strided_cm) {
10955 TEST_REQUIRES_ARM_NEON;
10956 GemmMicrokernelTester()
10957 .mr(2)
10958 .nr(8)
10959 .kr(4)
10960 .sr(1)
10961 .m(2)
10962 .n(8)
10963 .k(16)
10964 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010966 }
10967#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10968
10969
10970#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10971 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8) {
10972 TEST_REQUIRES_ARM_NEON;
10973 GemmMicrokernelTester()
10974 .mr(1)
10975 .nr(8)
10976 .kr(4)
10977 .sr(1)
10978 .m(1)
10979 .n(8)
10980 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010982 }
10983
10984 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cn) {
10985 TEST_REQUIRES_ARM_NEON;
10986 GemmMicrokernelTester()
10987 .mr(1)
10988 .nr(8)
10989 .kr(4)
10990 .sr(1)
10991 .m(1)
10992 .n(8)
10993 .k(8)
10994 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010995 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010996 }
10997
10998 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_strided_a) {
10999 TEST_REQUIRES_ARM_NEON;
11000 GemmMicrokernelTester()
11001 .mr(1)
11002 .nr(8)
11003 .kr(4)
11004 .sr(1)
11005 .m(1)
11006 .n(8)
11007 .k(8)
11008 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011010 }
11011
11012 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
11013 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011014 for (uint32_t n = 1; n <= 8; n++) {
11015 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011016 GemmMicrokernelTester()
11017 .mr(1)
11018 .nr(8)
11019 .kr(4)
11020 .sr(1)
11021 .m(m)
11022 .n(n)
11023 .k(8)
11024 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011026 }
11027 }
11028 }
11029
11030 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
11031 TEST_REQUIRES_ARM_NEON;
11032 for (uint32_t m = 1; m <= 1; m++) {
11033 GemmMicrokernelTester()
11034 .mr(1)
11035 .nr(8)
11036 .kr(4)
11037 .sr(1)
11038 .m(m)
11039 .n(8)
11040 .k(8)
11041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011043 }
11044 }
11045
11046 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
11047 TEST_REQUIRES_ARM_NEON;
11048 for (uint32_t n = 1; n <= 8; n++) {
11049 GemmMicrokernelTester()
11050 .mr(1)
11051 .nr(8)
11052 .kr(4)
11053 .sr(1)
11054 .m(1)
11055 .n(n)
11056 .k(8)
11057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011059 }
11060 }
11061
11062 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8) {
11063 TEST_REQUIRES_ARM_NEON;
11064 for (size_t k = 1; k < 8; k++) {
11065 GemmMicrokernelTester()
11066 .mr(1)
11067 .nr(8)
11068 .kr(4)
11069 .sr(1)
11070 .m(1)
11071 .n(8)
11072 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011074 }
11075 }
11076
11077 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8_strided_a) {
11078 TEST_REQUIRES_ARM_NEON;
11079 for (size_t k = 1; k < 8; k++) {
11080 GemmMicrokernelTester()
11081 .mr(1)
11082 .nr(8)
11083 .kr(4)
11084 .sr(1)
11085 .m(1)
11086 .n(8)
11087 .k(k)
11088 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011090 }
11091 }
11092
11093 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
11094 TEST_REQUIRES_ARM_NEON;
11095 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011096 for (uint32_t n = 1; n <= 8; n++) {
11097 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011098 GemmMicrokernelTester()
11099 .mr(1)
11100 .nr(8)
11101 .kr(4)
11102 .sr(1)
11103 .m(m)
11104 .n(n)
11105 .k(k)
11106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011108 }
11109 }
11110 }
11111 }
11112
11113 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8) {
11114 TEST_REQUIRES_ARM_NEON;
11115 for (size_t k = 9; k < 16; k++) {
11116 GemmMicrokernelTester()
11117 .mr(1)
11118 .nr(8)
11119 .kr(4)
11120 .sr(1)
11121 .m(1)
11122 .n(8)
11123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011125 }
11126 }
11127
11128 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8_strided_a) {
11129 TEST_REQUIRES_ARM_NEON;
11130 for (size_t k = 9; k < 16; k++) {
11131 GemmMicrokernelTester()
11132 .mr(1)
11133 .nr(8)
11134 .kr(4)
11135 .sr(1)
11136 .m(1)
11137 .n(8)
11138 .k(k)
11139 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011141 }
11142 }
11143
11144 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
11145 TEST_REQUIRES_ARM_NEON;
11146 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011147 for (uint32_t n = 1; n <= 8; n++) {
11148 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011149 GemmMicrokernelTester()
11150 .mr(1)
11151 .nr(8)
11152 .kr(4)
11153 .sr(1)
11154 .m(m)
11155 .n(n)
11156 .k(k)
11157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011159 }
11160 }
11161 }
11162 }
11163
11164 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8) {
11165 TEST_REQUIRES_ARM_NEON;
11166 for (size_t k = 16; k <= 80; k += 8) {
11167 GemmMicrokernelTester()
11168 .mr(1)
11169 .nr(8)
11170 .kr(4)
11171 .sr(1)
11172 .m(1)
11173 .n(8)
11174 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011176 }
11177 }
11178
11179 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8_strided_a) {
11180 TEST_REQUIRES_ARM_NEON;
11181 for (size_t k = 16; k <= 80; k += 8) {
11182 GemmMicrokernelTester()
11183 .mr(1)
11184 .nr(8)
11185 .kr(4)
11186 .sr(1)
11187 .m(1)
11188 .n(8)
11189 .k(k)
11190 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011192 }
11193 }
11194
11195 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
11196 TEST_REQUIRES_ARM_NEON;
11197 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011198 for (uint32_t n = 1; n <= 8; n++) {
11199 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011200 GemmMicrokernelTester()
11201 .mr(1)
11202 .nr(8)
11203 .kr(4)
11204 .sr(1)
11205 .m(m)
11206 .n(n)
11207 .k(k)
11208 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011209 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011210 }
11211 }
11212 }
11213 }
11214
11215 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8) {
11216 TEST_REQUIRES_ARM_NEON;
11217 for (uint32_t n = 9; n < 16; n++) {
11218 for (size_t k = 1; k <= 40; k += 9) {
11219 GemmMicrokernelTester()
11220 .mr(1)
11221 .nr(8)
11222 .kr(4)
11223 .sr(1)
11224 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011225 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011226 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011227 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011228 }
11229 }
11230 }
11231
11232 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
11233 TEST_REQUIRES_ARM_NEON;
11234 for (uint32_t n = 9; n < 16; n++) {
11235 for (size_t k = 1; k <= 40; k += 9) {
11236 GemmMicrokernelTester()
11237 .mr(1)
11238 .nr(8)
11239 .kr(4)
11240 .sr(1)
11241 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011242 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011243 .k(k)
11244 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011246 }
11247 }
11248 }
11249
11250 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_strided_a) {
11251 TEST_REQUIRES_ARM_NEON;
11252 for (uint32_t n = 9; n < 16; n++) {
11253 for (size_t k = 1; k <= 40; k += 9) {
11254 GemmMicrokernelTester()
11255 .mr(1)
11256 .nr(8)
11257 .kr(4)
11258 .sr(1)
11259 .m(1)
11260 .n(n)
11261 .k(k)
11262 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011264 }
11265 }
11266 }
11267
11268 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
11269 TEST_REQUIRES_ARM_NEON;
11270 for (uint32_t n = 9; n < 16; n++) {
11271 for (size_t k = 1; k <= 40; k += 9) {
11272 for (uint32_t m = 1; m <= 1; m++) {
11273 GemmMicrokernelTester()
11274 .mr(1)
11275 .nr(8)
11276 .kr(4)
11277 .sr(1)
11278 .m(m)
11279 .n(n)
11280 .k(k)
11281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011282 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011283 }
11284 }
11285 }
11286 }
11287
11288 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8) {
11289 TEST_REQUIRES_ARM_NEON;
11290 for (uint32_t n = 16; n <= 24; n += 8) {
11291 for (size_t k = 1; k <= 40; k += 9) {
11292 GemmMicrokernelTester()
11293 .mr(1)
11294 .nr(8)
11295 .kr(4)
11296 .sr(1)
11297 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011298 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011301 }
11302 }
11303 }
11304
11305 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
11306 TEST_REQUIRES_ARM_NEON;
11307 for (uint32_t n = 16; n <= 24; n += 8) {
11308 for (size_t k = 1; k <= 40; k += 9) {
11309 GemmMicrokernelTester()
11310 .mr(1)
11311 .nr(8)
11312 .kr(4)
11313 .sr(1)
11314 .m(1)
11315 .n(n)
11316 .k(k)
11317 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011319 }
11320 }
11321 }
11322
11323 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_strided_a) {
11324 TEST_REQUIRES_ARM_NEON;
11325 for (uint32_t n = 16; n <= 24; n += 8) {
11326 for (size_t k = 1; k <= 40; k += 9) {
11327 GemmMicrokernelTester()
11328 .mr(1)
11329 .nr(8)
11330 .kr(4)
11331 .sr(1)
11332 .m(1)
11333 .n(n)
11334 .k(k)
11335 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011336 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011337 }
11338 }
11339 }
11340
11341 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
11342 TEST_REQUIRES_ARM_NEON;
11343 for (uint32_t n = 16; n <= 24; n += 8) {
11344 for (size_t k = 1; k <= 40; k += 9) {
11345 for (uint32_t m = 1; m <= 1; m++) {
11346 GemmMicrokernelTester()
11347 .mr(1)
11348 .nr(8)
11349 .kr(4)
11350 .sr(1)
11351 .m(m)
11352 .n(n)
11353 .k(k)
11354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011356 }
11357 }
11358 }
11359 }
11360
11361 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
11362 TEST_REQUIRES_ARM_NEON;
11363 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011364 for (uint32_t n = 1; n <= 8; n++) {
11365 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011366 GemmMicrokernelTester()
11367 .mr(1)
11368 .nr(8)
11369 .kr(4)
11370 .sr(1)
11371 .m(m)
11372 .n(n)
11373 .k(k)
11374 .cm_stride(11)
11375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011376 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011377 }
11378 }
11379 }
11380 }
11381
11382 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmin) {
11383 TEST_REQUIRES_ARM_NEON;
11384 GemmMicrokernelTester()
11385 .mr(1)
11386 .nr(8)
11387 .kr(4)
11388 .sr(1)
11389 .m(1)
11390 .n(8)
11391 .k(8)
11392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011394 }
11395
11396 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmax) {
11397 TEST_REQUIRES_ARM_NEON;
11398 GemmMicrokernelTester()
11399 .mr(1)
11400 .nr(8)
11401 .kr(4)
11402 .sr(1)
11403 .m(1)
11404 .n(8)
11405 .k(8)
11406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011407 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011408 }
11409
11410 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm) {
11411 TEST_REQUIRES_ARM_NEON;
11412 GemmMicrokernelTester()
11413 .mr(1)
11414 .nr(8)
11415 .kr(4)
11416 .sr(1)
11417 .m(1)
11418 .n(8)
11419 .k(8)
11420 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011422 }
11423#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11424
11425
11426#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11427 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_eq_8) {
11428 TEST_REQUIRES_ARM_NEON;
11429 GemmMicrokernelTester()
11430 .mr(1)
11431 .nr(16)
11432 .kr(4)
11433 .sr(1)
11434 .m(1)
11435 .n(16)
11436 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011438 }
11439
11440 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, strided_cn) {
11441 TEST_REQUIRES_ARM_NEON;
11442 GemmMicrokernelTester()
11443 .mr(1)
11444 .nr(16)
11445 .kr(4)
11446 .sr(1)
11447 .m(1)
11448 .n(16)
11449 .k(8)
11450 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011451 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011452 }
11453
11454 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_eq_8_strided_a) {
11455 TEST_REQUIRES_ARM_NEON;
11456 GemmMicrokernelTester()
11457 .mr(1)
11458 .nr(16)
11459 .kr(4)
11460 .sr(1)
11461 .m(1)
11462 .n(16)
11463 .k(8)
11464 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011466 }
11467
11468 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_eq_8_subtile) {
11469 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011470 for (uint32_t n = 1; n <= 16; n++) {
11471 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011472 GemmMicrokernelTester()
11473 .mr(1)
11474 .nr(16)
11475 .kr(4)
11476 .sr(1)
11477 .m(m)
11478 .n(n)
11479 .k(8)
11480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011482 }
11483 }
11484 }
11485
11486 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
11487 TEST_REQUIRES_ARM_NEON;
11488 for (uint32_t m = 1; m <= 1; m++) {
11489 GemmMicrokernelTester()
11490 .mr(1)
11491 .nr(16)
11492 .kr(4)
11493 .sr(1)
11494 .m(m)
11495 .n(16)
11496 .k(8)
11497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011499 }
11500 }
11501
11502 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
11503 TEST_REQUIRES_ARM_NEON;
11504 for (uint32_t n = 1; n <= 16; n++) {
11505 GemmMicrokernelTester()
11506 .mr(1)
11507 .nr(16)
11508 .kr(4)
11509 .sr(1)
11510 .m(1)
11511 .n(n)
11512 .k(8)
11513 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011515 }
11516 }
11517
11518 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_lt_8) {
11519 TEST_REQUIRES_ARM_NEON;
11520 for (size_t k = 1; k < 8; k++) {
11521 GemmMicrokernelTester()
11522 .mr(1)
11523 .nr(16)
11524 .kr(4)
11525 .sr(1)
11526 .m(1)
11527 .n(16)
11528 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011530 }
11531 }
11532
11533 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_lt_8_strided_a) {
11534 TEST_REQUIRES_ARM_NEON;
11535 for (size_t k = 1; k < 8; k++) {
11536 GemmMicrokernelTester()
11537 .mr(1)
11538 .nr(16)
11539 .kr(4)
11540 .sr(1)
11541 .m(1)
11542 .n(16)
11543 .k(k)
11544 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011546 }
11547 }
11548
11549 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_lt_8_subtile) {
11550 TEST_REQUIRES_ARM_NEON;
11551 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011552 for (uint32_t n = 1; n <= 16; n++) {
11553 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011554 GemmMicrokernelTester()
11555 .mr(1)
11556 .nr(16)
11557 .kr(4)
11558 .sr(1)
11559 .m(m)
11560 .n(n)
11561 .k(k)
11562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011564 }
11565 }
11566 }
11567 }
11568
11569 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_gt_8) {
11570 TEST_REQUIRES_ARM_NEON;
11571 for (size_t k = 9; k < 16; k++) {
11572 GemmMicrokernelTester()
11573 .mr(1)
11574 .nr(16)
11575 .kr(4)
11576 .sr(1)
11577 .m(1)
11578 .n(16)
11579 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011581 }
11582 }
11583
11584 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_gt_8_strided_a) {
11585 TEST_REQUIRES_ARM_NEON;
11586 for (size_t k = 9; k < 16; k++) {
11587 GemmMicrokernelTester()
11588 .mr(1)
11589 .nr(16)
11590 .kr(4)
11591 .sr(1)
11592 .m(1)
11593 .n(16)
11594 .k(k)
11595 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011597 }
11598 }
11599
11600 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_gt_8_subtile) {
11601 TEST_REQUIRES_ARM_NEON;
11602 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011603 for (uint32_t n = 1; n <= 16; n++) {
11604 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011605 GemmMicrokernelTester()
11606 .mr(1)
11607 .nr(16)
11608 .kr(4)
11609 .sr(1)
11610 .m(m)
11611 .n(n)
11612 .k(k)
11613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011615 }
11616 }
11617 }
11618 }
11619
11620 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_div_8) {
11621 TEST_REQUIRES_ARM_NEON;
11622 for (size_t k = 16; k <= 80; k += 8) {
11623 GemmMicrokernelTester()
11624 .mr(1)
11625 .nr(16)
11626 .kr(4)
11627 .sr(1)
11628 .m(1)
11629 .n(16)
11630 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011632 }
11633 }
11634
11635 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_div_8_strided_a) {
11636 TEST_REQUIRES_ARM_NEON;
11637 for (size_t k = 16; k <= 80; k += 8) {
11638 GemmMicrokernelTester()
11639 .mr(1)
11640 .nr(16)
11641 .kr(4)
11642 .sr(1)
11643 .m(1)
11644 .n(16)
11645 .k(k)
11646 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011648 }
11649 }
11650
11651 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, k_div_8_subtile) {
11652 TEST_REQUIRES_ARM_NEON;
11653 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011654 for (uint32_t n = 1; n <= 16; n++) {
11655 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011656 GemmMicrokernelTester()
11657 .mr(1)
11658 .nr(16)
11659 .kr(4)
11660 .sr(1)
11661 .m(m)
11662 .n(n)
11663 .k(k)
11664 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011665 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011666 }
11667 }
11668 }
11669 }
11670
11671 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_gt_16) {
11672 TEST_REQUIRES_ARM_NEON;
11673 for (uint32_t n = 17; n < 32; n++) {
11674 for (size_t k = 1; k <= 40; k += 9) {
11675 GemmMicrokernelTester()
11676 .mr(1)
11677 .nr(16)
11678 .kr(4)
11679 .sr(1)
11680 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011681 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011682 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011683 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011684 }
11685 }
11686 }
11687
11688 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_gt_16_strided_cn) {
11689 TEST_REQUIRES_ARM_NEON;
11690 for (uint32_t n = 17; n < 32; n++) {
11691 for (size_t k = 1; k <= 40; k += 9) {
11692 GemmMicrokernelTester()
11693 .mr(1)
11694 .nr(16)
11695 .kr(4)
11696 .sr(1)
11697 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011698 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011699 .k(k)
11700 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011702 }
11703 }
11704 }
11705
11706 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_gt_16_strided_a) {
11707 TEST_REQUIRES_ARM_NEON;
11708 for (uint32_t n = 17; n < 32; n++) {
11709 for (size_t k = 1; k <= 40; k += 9) {
11710 GemmMicrokernelTester()
11711 .mr(1)
11712 .nr(16)
11713 .kr(4)
11714 .sr(1)
11715 .m(1)
11716 .n(n)
11717 .k(k)
11718 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011720 }
11721 }
11722 }
11723
11724 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_gt_16_subtile) {
11725 TEST_REQUIRES_ARM_NEON;
11726 for (uint32_t n = 17; n < 32; n++) {
11727 for (size_t k = 1; k <= 40; k += 9) {
11728 for (uint32_t m = 1; m <= 1; m++) {
11729 GemmMicrokernelTester()
11730 .mr(1)
11731 .nr(16)
11732 .kr(4)
11733 .sr(1)
11734 .m(m)
11735 .n(n)
11736 .k(k)
11737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011738 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011739 }
11740 }
11741 }
11742 }
11743
11744 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_div_16) {
11745 TEST_REQUIRES_ARM_NEON;
11746 for (uint32_t n = 32; n <= 48; n += 16) {
11747 for (size_t k = 1; k <= 40; k += 9) {
11748 GemmMicrokernelTester()
11749 .mr(1)
11750 .nr(16)
11751 .kr(4)
11752 .sr(1)
11753 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011754 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011757 }
11758 }
11759 }
11760
11761 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_div_16_strided_cn) {
11762 TEST_REQUIRES_ARM_NEON;
11763 for (uint32_t n = 32; n <= 48; n += 16) {
11764 for (size_t k = 1; k <= 40; k += 9) {
11765 GemmMicrokernelTester()
11766 .mr(1)
11767 .nr(16)
11768 .kr(4)
11769 .sr(1)
11770 .m(1)
11771 .n(n)
11772 .k(k)
11773 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011775 }
11776 }
11777 }
11778
11779 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_div_16_strided_a) {
11780 TEST_REQUIRES_ARM_NEON;
11781 for (uint32_t n = 32; n <= 48; n += 16) {
11782 for (size_t k = 1; k <= 40; k += 9) {
11783 GemmMicrokernelTester()
11784 .mr(1)
11785 .nr(16)
11786 .kr(4)
11787 .sr(1)
11788 .m(1)
11789 .n(n)
11790 .k(k)
11791 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011792 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011793 }
11794 }
11795 }
11796
11797 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, n_div_16_subtile) {
11798 TEST_REQUIRES_ARM_NEON;
11799 for (uint32_t n = 32; n <= 48; n += 16) {
11800 for (size_t k = 1; k <= 40; k += 9) {
11801 for (uint32_t m = 1; m <= 1; m++) {
11802 GemmMicrokernelTester()
11803 .mr(1)
11804 .nr(16)
11805 .kr(4)
11806 .sr(1)
11807 .m(m)
11808 .n(n)
11809 .k(k)
11810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011812 }
11813 }
11814 }
11815 }
11816
11817 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, strided_cm_subtile) {
11818 TEST_REQUIRES_ARM_NEON;
11819 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011820 for (uint32_t n = 1; n <= 16; n++) {
11821 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011822 GemmMicrokernelTester()
11823 .mr(1)
11824 .nr(16)
11825 .kr(4)
11826 .sr(1)
11827 .m(m)
11828 .n(n)
11829 .k(k)
11830 .cm_stride(19)
11831 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011832 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011833 }
11834 }
11835 }
11836 }
11837
11838 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, qmin) {
11839 TEST_REQUIRES_ARM_NEON;
11840 GemmMicrokernelTester()
11841 .mr(1)
11842 .nr(16)
11843 .kr(4)
11844 .sr(1)
11845 .m(1)
11846 .n(16)
11847 .k(8)
11848 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011850 }
11851
11852 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, qmax) {
11853 TEST_REQUIRES_ARM_NEON;
11854 GemmMicrokernelTester()
11855 .mr(1)
11856 .nr(16)
11857 .kr(4)
11858 .sr(1)
11859 .m(1)
11860 .n(16)
11861 .k(8)
11862 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011863 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011864 }
11865
11866 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD1R, strided_cm) {
11867 TEST_REQUIRES_ARM_NEON;
11868 GemmMicrokernelTester()
11869 .mr(1)
11870 .nr(16)
11871 .kr(4)
11872 .sr(1)
11873 .m(1)
11874 .n(16)
11875 .k(8)
11876 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011878 }
11879#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11880
11881
11882#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11883 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
11884 TEST_REQUIRES_ARM_NEON;
11885 GemmMicrokernelTester()
11886 .mr(1)
11887 .nr(8)
11888 .kr(4)
11889 .sr(1)
11890 .m(1)
11891 .n(8)
11892 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011894 }
11895
11896 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cn) {
11897 TEST_REQUIRES_ARM_NEON;
11898 GemmMicrokernelTester()
11899 .mr(1)
11900 .nr(8)
11901 .kr(4)
11902 .sr(1)
11903 .m(1)
11904 .n(8)
11905 .k(16)
11906 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011907 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011908 }
11909
11910 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_strided_a) {
11911 TEST_REQUIRES_ARM_NEON;
11912 GemmMicrokernelTester()
11913 .mr(1)
11914 .nr(8)
11915 .kr(4)
11916 .sr(1)
11917 .m(1)
11918 .n(8)
11919 .k(16)
11920 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011922 }
11923
11924 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
11925 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011926 for (uint32_t n = 1; n <= 8; n++) {
11927 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011928 GemmMicrokernelTester()
11929 .mr(1)
11930 .nr(8)
11931 .kr(4)
11932 .sr(1)
11933 .m(m)
11934 .n(n)
11935 .k(16)
11936 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011938 }
11939 }
11940 }
11941
11942 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
11943 TEST_REQUIRES_ARM_NEON;
11944 for (uint32_t m = 1; m <= 1; m++) {
11945 GemmMicrokernelTester()
11946 .mr(1)
11947 .nr(8)
11948 .kr(4)
11949 .sr(1)
11950 .m(m)
11951 .n(8)
11952 .k(16)
11953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011955 }
11956 }
11957
11958 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
11959 TEST_REQUIRES_ARM_NEON;
11960 for (uint32_t n = 1; n <= 8; n++) {
11961 GemmMicrokernelTester()
11962 .mr(1)
11963 .nr(8)
11964 .kr(4)
11965 .sr(1)
11966 .m(1)
11967 .n(n)
11968 .k(16)
11969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011971 }
11972 }
11973
11974 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
11975 TEST_REQUIRES_ARM_NEON;
11976 for (size_t k = 1; k < 16; k++) {
11977 GemmMicrokernelTester()
11978 .mr(1)
11979 .nr(8)
11980 .kr(4)
11981 .sr(1)
11982 .m(1)
11983 .n(8)
11984 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011986 }
11987 }
11988
11989 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16_strided_a) {
11990 TEST_REQUIRES_ARM_NEON;
11991 for (size_t k = 1; k < 16; k++) {
11992 GemmMicrokernelTester()
11993 .mr(1)
11994 .nr(8)
11995 .kr(4)
11996 .sr(1)
11997 .m(1)
11998 .n(8)
11999 .k(k)
12000 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012002 }
12003 }
12004
12005 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
12006 TEST_REQUIRES_ARM_NEON;
12007 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012008 for (uint32_t n = 1; n <= 8; n++) {
12009 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012010 GemmMicrokernelTester()
12011 .mr(1)
12012 .nr(8)
12013 .kr(4)
12014 .sr(1)
12015 .m(m)
12016 .n(n)
12017 .k(k)
12018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012020 }
12021 }
12022 }
12023 }
12024
12025 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
12026 TEST_REQUIRES_ARM_NEON;
12027 for (size_t k = 17; k < 32; k++) {
12028 GemmMicrokernelTester()
12029 .mr(1)
12030 .nr(8)
12031 .kr(4)
12032 .sr(1)
12033 .m(1)
12034 .n(8)
12035 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012037 }
12038 }
12039
12040 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16_strided_a) {
12041 TEST_REQUIRES_ARM_NEON;
12042 for (size_t k = 17; k < 32; k++) {
12043 GemmMicrokernelTester()
12044 .mr(1)
12045 .nr(8)
12046 .kr(4)
12047 .sr(1)
12048 .m(1)
12049 .n(8)
12050 .k(k)
12051 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080012052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012053 }
12054 }
12055
12056 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
12057 TEST_REQUIRES_ARM_NEON;
12058 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012059 for (uint32_t n = 1; n <= 8; n++) {
12060 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012061 GemmMicrokernelTester()
12062 .mr(1)
12063 .nr(8)
12064 .kr(4)
12065 .sr(1)
12066 .m(m)
12067 .n(n)
12068 .k(k)
12069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012071 }
12072 }
12073 }
12074 }
12075
12076 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16) {
12077 TEST_REQUIRES_ARM_NEON;
12078 for (size_t k = 32; k <= 160; k += 16) {
12079 GemmMicrokernelTester()
12080 .mr(1)
12081 .nr(8)
12082 .kr(4)
12083 .sr(1)
12084 .m(1)
12085 .n(8)
12086 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012088 }
12089 }
12090
12091 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16_strided_a) {
12092 TEST_REQUIRES_ARM_NEON;
12093 for (size_t k = 32; k <= 160; k += 16) {
12094 GemmMicrokernelTester()
12095 .mr(1)
12096 .nr(8)
12097 .kr(4)
12098 .sr(1)
12099 .m(1)
12100 .n(8)
12101 .k(k)
12102 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012104 }
12105 }
12106
12107 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
12108 TEST_REQUIRES_ARM_NEON;
12109 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012110 for (uint32_t n = 1; n <= 8; n++) {
12111 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012112 GemmMicrokernelTester()
12113 .mr(1)
12114 .nr(8)
12115 .kr(4)
12116 .sr(1)
12117 .m(m)
12118 .n(n)
12119 .k(k)
12120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012121 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012122 }
12123 }
12124 }
12125 }
12126
12127 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
12128 TEST_REQUIRES_ARM_NEON;
12129 for (uint32_t n = 9; n < 16; n++) {
12130 for (size_t k = 1; k <= 80; k += 17) {
12131 GemmMicrokernelTester()
12132 .mr(1)
12133 .nr(8)
12134 .kr(4)
12135 .sr(1)
12136 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012137 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012138 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012139 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012140 }
12141 }
12142 }
12143
12144 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
12145 TEST_REQUIRES_ARM_NEON;
12146 for (uint32_t n = 9; n < 16; n++) {
12147 for (size_t k = 1; k <= 80; k += 17) {
12148 GemmMicrokernelTester()
12149 .mr(1)
12150 .nr(8)
12151 .kr(4)
12152 .sr(1)
12153 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012154 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012155 .k(k)
12156 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012158 }
12159 }
12160 }
12161
12162 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_a) {
12163 TEST_REQUIRES_ARM_NEON;
12164 for (uint32_t n = 9; n < 16; n++) {
12165 for (size_t k = 1; k <= 80; k += 17) {
12166 GemmMicrokernelTester()
12167 .mr(1)
12168 .nr(8)
12169 .kr(4)
12170 .sr(1)
12171 .m(1)
12172 .n(n)
12173 .k(k)
12174 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012176 }
12177 }
12178 }
12179
12180 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
12181 TEST_REQUIRES_ARM_NEON;
12182 for (uint32_t n = 9; n < 16; n++) {
12183 for (size_t k = 1; k <= 80; k += 17) {
12184 for (uint32_t m = 1; m <= 1; m++) {
12185 GemmMicrokernelTester()
12186 .mr(1)
12187 .nr(8)
12188 .kr(4)
12189 .sr(1)
12190 .m(m)
12191 .n(n)
12192 .k(k)
12193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012194 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012195 }
12196 }
12197 }
12198 }
12199
12200 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8) {
12201 TEST_REQUIRES_ARM_NEON;
12202 for (uint32_t n = 16; n <= 24; n += 8) {
12203 for (size_t k = 1; k <= 80; k += 17) {
12204 GemmMicrokernelTester()
12205 .mr(1)
12206 .nr(8)
12207 .kr(4)
12208 .sr(1)
12209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012210 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012213 }
12214 }
12215 }
12216
12217 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
12218 TEST_REQUIRES_ARM_NEON;
12219 for (uint32_t n = 16; n <= 24; n += 8) {
12220 for (size_t k = 1; k <= 80; k += 17) {
12221 GemmMicrokernelTester()
12222 .mr(1)
12223 .nr(8)
12224 .kr(4)
12225 .sr(1)
12226 .m(1)
12227 .n(n)
12228 .k(k)
12229 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012231 }
12232 }
12233 }
12234
12235 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_a) {
12236 TEST_REQUIRES_ARM_NEON;
12237 for (uint32_t n = 16; n <= 24; n += 8) {
12238 for (size_t k = 1; k <= 80; k += 17) {
12239 GemmMicrokernelTester()
12240 .mr(1)
12241 .nr(8)
12242 .kr(4)
12243 .sr(1)
12244 .m(1)
12245 .n(n)
12246 .k(k)
12247 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012249 }
12250 }
12251 }
12252
12253 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
12254 TEST_REQUIRES_ARM_NEON;
12255 for (uint32_t n = 16; n <= 24; n += 8) {
12256 for (size_t k = 1; k <= 80; k += 17) {
12257 for (uint32_t m = 1; m <= 1; m++) {
12258 GemmMicrokernelTester()
12259 .mr(1)
12260 .nr(8)
12261 .kr(4)
12262 .sr(1)
12263 .m(m)
12264 .n(n)
12265 .k(k)
12266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012268 }
12269 }
12270 }
12271 }
12272
12273 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
12274 TEST_REQUIRES_ARM_NEON;
12275 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012276 for (uint32_t n = 1; n <= 8; n++) {
12277 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012278 GemmMicrokernelTester()
12279 .mr(1)
12280 .nr(8)
12281 .kr(4)
12282 .sr(1)
12283 .m(m)
12284 .n(n)
12285 .k(k)
12286 .cm_stride(11)
12287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012288 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012289 }
12290 }
12291 }
12292 }
12293
12294 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmin) {
12295 TEST_REQUIRES_ARM_NEON;
12296 GemmMicrokernelTester()
12297 .mr(1)
12298 .nr(8)
12299 .kr(4)
12300 .sr(1)
12301 .m(1)
12302 .n(8)
12303 .k(16)
12304 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012306 }
12307
12308 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmax) {
12309 TEST_REQUIRES_ARM_NEON;
12310 GemmMicrokernelTester()
12311 .mr(1)
12312 .nr(8)
12313 .kr(4)
12314 .sr(1)
12315 .m(1)
12316 .n(8)
12317 .k(16)
12318 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012320 }
12321
12322 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm) {
12323 TEST_REQUIRES_ARM_NEON;
12324 GemmMicrokernelTester()
12325 .mr(1)
12326 .nr(8)
12327 .kr(4)
12328 .sr(1)
12329 .m(1)
12330 .n(8)
12331 .k(16)
12332 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012334 }
12335#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12336
12337
12338#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12339 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16) {
Frank Barcharde4d3f762021-12-23 15:31:43 -080012340 TEST_REQUIRES_ARM_NEON;
12341 GemmMicrokernelTester()
12342 .mr(4)
12343 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012344 .kr(4)
Frank Barcharde4d3f762021-12-23 15:31:43 -080012345 .sr(1)
12346 .m(4)
12347 .n(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012348 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012350 }
12351
12352 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cn) {
12353 TEST_REQUIRES_ARM_NEON;
12354 GemmMicrokernelTester()
12355 .mr(4)
12356 .nr(8)
12357 .kr(4)
12358 .sr(1)
12359 .m(4)
12360 .n(8)
12361 .k(16)
12362 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012363 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012364 }
12365
12366 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_strided_a) {
12367 TEST_REQUIRES_ARM_NEON;
12368 GemmMicrokernelTester()
12369 .mr(4)
12370 .nr(8)
12371 .kr(4)
12372 .sr(1)
12373 .m(4)
12374 .n(8)
12375 .k(16)
12376 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012378 }
12379
12380 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
12381 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012382 for (uint32_t n = 1; n <= 8; n++) {
12383 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012384 GemmMicrokernelTester()
12385 .mr(4)
12386 .nr(8)
12387 .kr(4)
12388 .sr(1)
12389 .m(m)
12390 .n(n)
12391 .k(16)
12392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012394 }
12395 }
12396 }
12397
12398 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
12399 TEST_REQUIRES_ARM_NEON;
12400 for (uint32_t m = 1; m <= 4; m++) {
12401 GemmMicrokernelTester()
12402 .mr(4)
12403 .nr(8)
12404 .kr(4)
12405 .sr(1)
12406 .m(m)
12407 .n(8)
12408 .k(16)
12409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012411 }
12412 }
12413
12414 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
12415 TEST_REQUIRES_ARM_NEON;
12416 for (uint32_t n = 1; n <= 8; n++) {
12417 GemmMicrokernelTester()
12418 .mr(4)
12419 .nr(8)
12420 .kr(4)
12421 .sr(1)
12422 .m(4)
12423 .n(n)
12424 .k(16)
12425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012427 }
12428 }
12429
12430 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16) {
12431 TEST_REQUIRES_ARM_NEON;
12432 for (size_t k = 1; k < 16; k++) {
12433 GemmMicrokernelTester()
12434 .mr(4)
12435 .nr(8)
12436 .kr(4)
12437 .sr(1)
12438 .m(4)
12439 .n(8)
12440 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012442 }
12443 }
12444
12445 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16_strided_a) {
12446 TEST_REQUIRES_ARM_NEON;
12447 for (size_t k = 1; k < 16; k++) {
12448 GemmMicrokernelTester()
12449 .mr(4)
12450 .nr(8)
12451 .kr(4)
12452 .sr(1)
12453 .m(4)
12454 .n(8)
12455 .k(k)
12456 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012458 }
12459 }
12460
12461 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
12462 TEST_REQUIRES_ARM_NEON;
12463 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012464 for (uint32_t n = 1; n <= 8; n++) {
12465 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012466 GemmMicrokernelTester()
12467 .mr(4)
12468 .nr(8)
12469 .kr(4)
12470 .sr(1)
12471 .m(m)
12472 .n(n)
12473 .k(k)
12474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012476 }
12477 }
12478 }
12479 }
12480
12481 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16) {
12482 TEST_REQUIRES_ARM_NEON;
12483 for (size_t k = 17; k < 32; k++) {
12484 GemmMicrokernelTester()
12485 .mr(4)
12486 .nr(8)
12487 .kr(4)
12488 .sr(1)
12489 .m(4)
12490 .n(8)
12491 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012493 }
12494 }
12495
12496 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16_strided_a) {
12497 TEST_REQUIRES_ARM_NEON;
12498 for (size_t k = 17; k < 32; k++) {
12499 GemmMicrokernelTester()
12500 .mr(4)
12501 .nr(8)
12502 .kr(4)
12503 .sr(1)
12504 .m(4)
12505 .n(8)
12506 .k(k)
12507 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080012508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012509 }
12510 }
12511
12512 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
12513 TEST_REQUIRES_ARM_NEON;
12514 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012515 for (uint32_t n = 1; n <= 8; n++) {
12516 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012517 GemmMicrokernelTester()
12518 .mr(4)
12519 .nr(8)
12520 .kr(4)
12521 .sr(1)
12522 .m(m)
12523 .n(n)
12524 .k(k)
12525 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012527 }
12528 }
12529 }
12530 }
12531
12532 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16) {
12533 TEST_REQUIRES_ARM_NEON;
12534 for (size_t k = 32; k <= 160; k += 16) {
12535 GemmMicrokernelTester()
12536 .mr(4)
12537 .nr(8)
12538 .kr(4)
12539 .sr(1)
12540 .m(4)
12541 .n(8)
12542 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012544 }
12545 }
12546
12547 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16_strided_a) {
12548 TEST_REQUIRES_ARM_NEON;
12549 for (size_t k = 32; k <= 160; k += 16) {
12550 GemmMicrokernelTester()
12551 .mr(4)
12552 .nr(8)
12553 .kr(4)
12554 .sr(1)
12555 .m(4)
12556 .n(8)
12557 .k(k)
12558 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012560 }
12561 }
12562
12563 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
12564 TEST_REQUIRES_ARM_NEON;
12565 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012566 for (uint32_t n = 1; n <= 8; n++) {
12567 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012568 GemmMicrokernelTester()
12569 .mr(4)
12570 .nr(8)
12571 .kr(4)
12572 .sr(1)
12573 .m(m)
12574 .n(n)
12575 .k(k)
12576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012577 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012578 }
12579 }
12580 }
12581 }
12582
12583 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8) {
12584 TEST_REQUIRES_ARM_NEON;
12585 for (uint32_t n = 9; n < 16; n++) {
12586 for (size_t k = 1; k <= 80; k += 17) {
12587 GemmMicrokernelTester()
12588 .mr(4)
12589 .nr(8)
12590 .kr(4)
12591 .sr(1)
12592 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012593 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012594 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012595 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012596 }
12597 }
12598 }
12599
12600 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
12601 TEST_REQUIRES_ARM_NEON;
12602 for (uint32_t n = 9; n < 16; n++) {
12603 for (size_t k = 1; k <= 80; k += 17) {
12604 GemmMicrokernelTester()
12605 .mr(4)
12606 .nr(8)
12607 .kr(4)
12608 .sr(1)
12609 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012610 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012611 .k(k)
12612 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012614 }
12615 }
12616 }
12617
12618 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_strided_a) {
12619 TEST_REQUIRES_ARM_NEON;
12620 for (uint32_t n = 9; n < 16; n++) {
12621 for (size_t k = 1; k <= 80; k += 17) {
12622 GemmMicrokernelTester()
12623 .mr(4)
12624 .nr(8)
12625 .kr(4)
12626 .sr(1)
12627 .m(4)
12628 .n(n)
12629 .k(k)
12630 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012632 }
12633 }
12634 }
12635
12636 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
12637 TEST_REQUIRES_ARM_NEON;
12638 for (uint32_t n = 9; n < 16; n++) {
12639 for (size_t k = 1; k <= 80; k += 17) {
12640 for (uint32_t m = 1; m <= 4; m++) {
12641 GemmMicrokernelTester()
12642 .mr(4)
12643 .nr(8)
12644 .kr(4)
12645 .sr(1)
12646 .m(m)
12647 .n(n)
12648 .k(k)
12649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012650 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012651 }
12652 }
12653 }
12654 }
12655
12656 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8) {
12657 TEST_REQUIRES_ARM_NEON;
12658 for (uint32_t n = 16; n <= 24; n += 8) {
12659 for (size_t k = 1; k <= 80; k += 17) {
12660 GemmMicrokernelTester()
12661 .mr(4)
12662 .nr(8)
12663 .kr(4)
12664 .sr(1)
12665 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012666 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012667 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012669 }
12670 }
12671 }
12672
12673 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
12674 TEST_REQUIRES_ARM_NEON;
12675 for (uint32_t n = 16; n <= 24; n += 8) {
12676 for (size_t k = 1; k <= 80; k += 17) {
12677 GemmMicrokernelTester()
12678 .mr(4)
12679 .nr(8)
12680 .kr(4)
12681 .sr(1)
12682 .m(4)
12683 .n(n)
12684 .k(k)
12685 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012687 }
12688 }
12689 }
12690
12691 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_strided_a) {
12692 TEST_REQUIRES_ARM_NEON;
12693 for (uint32_t n = 16; n <= 24; n += 8) {
12694 for (size_t k = 1; k <= 80; k += 17) {
12695 GemmMicrokernelTester()
12696 .mr(4)
12697 .nr(8)
12698 .kr(4)
12699 .sr(1)
12700 .m(4)
12701 .n(n)
12702 .k(k)
12703 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012705 }
12706 }
12707 }
12708
12709 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
12710 TEST_REQUIRES_ARM_NEON;
12711 for (uint32_t n = 16; n <= 24; n += 8) {
12712 for (size_t k = 1; k <= 80; k += 17) {
12713 for (uint32_t m = 1; m <= 4; m++) {
12714 GemmMicrokernelTester()
12715 .mr(4)
12716 .nr(8)
12717 .kr(4)
12718 .sr(1)
12719 .m(m)
12720 .n(n)
12721 .k(k)
12722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012724 }
12725 }
12726 }
12727 }
12728
12729 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
12730 TEST_REQUIRES_ARM_NEON;
12731 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012732 for (uint32_t n = 1; n <= 8; n++) {
12733 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012734 GemmMicrokernelTester()
12735 .mr(4)
12736 .nr(8)
12737 .kr(4)
12738 .sr(1)
12739 .m(m)
12740 .n(n)
12741 .k(k)
12742 .cm_stride(11)
12743 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012744 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012745 }
12746 }
12747 }
12748 }
12749
12750 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, qmin) {
12751 TEST_REQUIRES_ARM_NEON;
12752 GemmMicrokernelTester()
12753 .mr(4)
12754 .nr(8)
12755 .kr(4)
12756 .sr(1)
12757 .m(4)
12758 .n(8)
12759 .k(16)
12760 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012762 }
12763
12764 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, qmax) {
12765 TEST_REQUIRES_ARM_NEON;
12766 GemmMicrokernelTester()
12767 .mr(4)
12768 .nr(8)
12769 .kr(4)
12770 .sr(1)
12771 .m(4)
12772 .n(8)
12773 .k(16)
12774 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012776 }
12777
12778 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cm) {
12779 TEST_REQUIRES_ARM_NEON;
12780 GemmMicrokernelTester()
12781 .mr(4)
12782 .nr(8)
12783 .kr(4)
12784 .sr(1)
12785 .m(4)
12786 .n(8)
12787 .k(16)
12788 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012790 }
12791#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12792
12793
12794#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12795 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8) {
12796 TEST_REQUIRES_ARM_NEON;
12797 GemmMicrokernelTester()
12798 .mr(3)
12799 .nr(16)
12800 .kr(4)
12801 .sr(1)
12802 .m(3)
12803 .n(16)
12804 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012806 }
12807
12808 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cn) {
12809 TEST_REQUIRES_ARM_NEON;
12810 GemmMicrokernelTester()
12811 .mr(3)
12812 .nr(16)
12813 .kr(4)
12814 .sr(1)
12815 .m(3)
12816 .n(16)
12817 .k(8)
12818 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012819 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012820 }
12821
12822 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_strided_a) {
12823 TEST_REQUIRES_ARM_NEON;
12824 GemmMicrokernelTester()
12825 .mr(3)
12826 .nr(16)
12827 .kr(4)
12828 .sr(1)
12829 .m(3)
12830 .n(16)
12831 .k(8)
12832 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012834 }
12835
12836 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile) {
12837 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012838 for (uint32_t n = 1; n <= 16; n++) {
12839 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012840 GemmMicrokernelTester()
12841 .mr(3)
12842 .nr(16)
12843 .kr(4)
12844 .sr(1)
12845 .m(m)
12846 .n(n)
12847 .k(8)
12848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012850 }
12851 }
12852 }
12853
12854 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
12855 TEST_REQUIRES_ARM_NEON;
12856 for (uint32_t m = 1; m <= 3; m++) {
12857 GemmMicrokernelTester()
12858 .mr(3)
12859 .nr(16)
12860 .kr(4)
12861 .sr(1)
12862 .m(m)
12863 .n(16)
12864 .k(8)
12865 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012867 }
12868 }
12869
12870 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
12871 TEST_REQUIRES_ARM_NEON;
12872 for (uint32_t n = 1; n <= 16; n++) {
12873 GemmMicrokernelTester()
12874 .mr(3)
12875 .nr(16)
12876 .kr(4)
12877 .sr(1)
12878 .m(3)
12879 .n(n)
12880 .k(8)
12881 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012883 }
12884 }
12885
12886 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8) {
12887 TEST_REQUIRES_ARM_NEON;
12888 for (size_t k = 1; k < 8; k++) {
12889 GemmMicrokernelTester()
12890 .mr(3)
12891 .nr(16)
12892 .kr(4)
12893 .sr(1)
12894 .m(3)
12895 .n(16)
12896 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012898 }
12899 }
12900
12901 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8_strided_a) {
12902 TEST_REQUIRES_ARM_NEON;
12903 for (size_t k = 1; k < 8; k++) {
12904 GemmMicrokernelTester()
12905 .mr(3)
12906 .nr(16)
12907 .kr(4)
12908 .sr(1)
12909 .m(3)
12910 .n(16)
12911 .k(k)
12912 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012914 }
12915 }
12916
12917 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8_subtile) {
12918 TEST_REQUIRES_ARM_NEON;
12919 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012920 for (uint32_t n = 1; n <= 16; n++) {
12921 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012922 GemmMicrokernelTester()
12923 .mr(3)
12924 .nr(16)
12925 .kr(4)
12926 .sr(1)
12927 .m(m)
12928 .n(n)
12929 .k(k)
12930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012932 }
12933 }
12934 }
12935 }
12936
12937 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8) {
12938 TEST_REQUIRES_ARM_NEON;
12939 for (size_t k = 9; k < 16; k++) {
12940 GemmMicrokernelTester()
12941 .mr(3)
12942 .nr(16)
12943 .kr(4)
12944 .sr(1)
12945 .m(3)
12946 .n(16)
12947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012949 }
12950 }
12951
12952 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8_strided_a) {
12953 TEST_REQUIRES_ARM_NEON;
12954 for (size_t k = 9; k < 16; k++) {
12955 GemmMicrokernelTester()
12956 .mr(3)
12957 .nr(16)
12958 .kr(4)
12959 .sr(1)
12960 .m(3)
12961 .n(16)
12962 .k(k)
12963 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012965 }
12966 }
12967
12968 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8_subtile) {
12969 TEST_REQUIRES_ARM_NEON;
12970 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012971 for (uint32_t n = 1; n <= 16; n++) {
12972 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012973 GemmMicrokernelTester()
12974 .mr(3)
12975 .nr(16)
12976 .kr(4)
12977 .sr(1)
12978 .m(m)
12979 .n(n)
12980 .k(k)
12981 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012983 }
12984 }
12985 }
12986 }
12987
12988 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8) {
12989 TEST_REQUIRES_ARM_NEON;
12990 for (size_t k = 16; k <= 80; k += 8) {
12991 GemmMicrokernelTester()
12992 .mr(3)
12993 .nr(16)
12994 .kr(4)
12995 .sr(1)
12996 .m(3)
12997 .n(16)
12998 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013000 }
13001 }
13002
13003 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8_strided_a) {
13004 TEST_REQUIRES_ARM_NEON;
13005 for (size_t k = 16; k <= 80; k += 8) {
13006 GemmMicrokernelTester()
13007 .mr(3)
13008 .nr(16)
13009 .kr(4)
13010 .sr(1)
13011 .m(3)
13012 .n(16)
13013 .k(k)
13014 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013016 }
13017 }
13018
13019 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8_subtile) {
13020 TEST_REQUIRES_ARM_NEON;
13021 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013022 for (uint32_t n = 1; n <= 16; n++) {
13023 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013024 GemmMicrokernelTester()
13025 .mr(3)
13026 .nr(16)
13027 .kr(4)
13028 .sr(1)
13029 .m(m)
13030 .n(n)
13031 .k(k)
13032 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013033 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013034 }
13035 }
13036 }
13037 }
13038
13039 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16) {
13040 TEST_REQUIRES_ARM_NEON;
13041 for (uint32_t n = 17; n < 32; n++) {
13042 for (size_t k = 1; k <= 40; k += 9) {
13043 GemmMicrokernelTester()
13044 .mr(3)
13045 .nr(16)
13046 .kr(4)
13047 .sr(1)
13048 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013049 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013050 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013051 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013052 }
13053 }
13054 }
13055
13056 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) {
13057 TEST_REQUIRES_ARM_NEON;
13058 for (uint32_t n = 17; n < 32; n++) {
13059 for (size_t k = 1; k <= 40; k += 9) {
13060 GemmMicrokernelTester()
13061 .mr(3)
13062 .nr(16)
13063 .kr(4)
13064 .sr(1)
13065 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013066 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013067 .k(k)
13068 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013070 }
13071 }
13072 }
13073
13074 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_strided_a) {
13075 TEST_REQUIRES_ARM_NEON;
13076 for (uint32_t n = 17; n < 32; n++) {
13077 for (size_t k = 1; k <= 40; k += 9) {
13078 GemmMicrokernelTester()
13079 .mr(3)
13080 .nr(16)
13081 .kr(4)
13082 .sr(1)
13083 .m(3)
13084 .n(n)
13085 .k(k)
13086 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013088 }
13089 }
13090 }
13091
13092 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_subtile) {
13093 TEST_REQUIRES_ARM_NEON;
13094 for (uint32_t n = 17; n < 32; n++) {
13095 for (size_t k = 1; k <= 40; k += 9) {
13096 for (uint32_t m = 1; m <= 3; m++) {
13097 GemmMicrokernelTester()
13098 .mr(3)
13099 .nr(16)
13100 .kr(4)
13101 .sr(1)
13102 .m(m)
13103 .n(n)
13104 .k(k)
13105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013106 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013107 }
13108 }
13109 }
13110 }
13111
13112 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16) {
13113 TEST_REQUIRES_ARM_NEON;
13114 for (uint32_t n = 32; n <= 48; n += 16) {
13115 for (size_t k = 1; k <= 40; k += 9) {
13116 GemmMicrokernelTester()
13117 .mr(3)
13118 .nr(16)
13119 .kr(4)
13120 .sr(1)
13121 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013122 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013125 }
13126 }
13127 }
13128
13129 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) {
13130 TEST_REQUIRES_ARM_NEON;
13131 for (uint32_t n = 32; n <= 48; n += 16) {
13132 for (size_t k = 1; k <= 40; k += 9) {
13133 GemmMicrokernelTester()
13134 .mr(3)
13135 .nr(16)
13136 .kr(4)
13137 .sr(1)
13138 .m(3)
13139 .n(n)
13140 .k(k)
13141 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013143 }
13144 }
13145 }
13146
13147 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_strided_a) {
13148 TEST_REQUIRES_ARM_NEON;
13149 for (uint32_t n = 32; n <= 48; n += 16) {
13150 for (size_t k = 1; k <= 40; k += 9) {
13151 GemmMicrokernelTester()
13152 .mr(3)
13153 .nr(16)
13154 .kr(4)
13155 .sr(1)
13156 .m(3)
13157 .n(n)
13158 .k(k)
13159 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013161 }
13162 }
13163 }
13164
13165 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_subtile) {
13166 TEST_REQUIRES_ARM_NEON;
13167 for (uint32_t n = 32; n <= 48; n += 16) {
13168 for (size_t k = 1; k <= 40; k += 9) {
13169 for (uint32_t m = 1; m <= 3; m++) {
13170 GemmMicrokernelTester()
13171 .mr(3)
13172 .nr(16)
13173 .kr(4)
13174 .sr(1)
13175 .m(m)
13176 .n(n)
13177 .k(k)
13178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013180 }
13181 }
13182 }
13183 }
13184
13185 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cm_subtile) {
13186 TEST_REQUIRES_ARM_NEON;
13187 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013188 for (uint32_t n = 1; n <= 16; n++) {
13189 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013190 GemmMicrokernelTester()
13191 .mr(3)
13192 .nr(16)
13193 .kr(4)
13194 .sr(1)
13195 .m(m)
13196 .n(n)
13197 .k(k)
13198 .cm_stride(19)
13199 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013200 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013201 }
13202 }
13203 }
13204 }
13205
13206 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, qmin) {
13207 TEST_REQUIRES_ARM_NEON;
13208 GemmMicrokernelTester()
13209 .mr(3)
13210 .nr(16)
13211 .kr(4)
13212 .sr(1)
13213 .m(3)
13214 .n(16)
13215 .k(8)
13216 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013218 }
13219
13220 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, qmax) {
13221 TEST_REQUIRES_ARM_NEON;
13222 GemmMicrokernelTester()
13223 .mr(3)
13224 .nr(16)
13225 .kr(4)
13226 .sr(1)
13227 .m(3)
13228 .n(16)
13229 .k(8)
13230 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013232 }
13233
13234 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cm) {
13235 TEST_REQUIRES_ARM_NEON;
13236 GemmMicrokernelTester()
13237 .mr(3)
13238 .nr(16)
13239 .kr(4)
13240 .sr(1)
13241 .m(3)
13242 .n(16)
13243 .k(8)
13244 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013246 }
13247#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13248
13249
13250#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_eq_16) {
13252 TEST_REQUIRES_ARM_NEON;
13253 GemmMicrokernelTester()
13254 .mr(4)
13255 .nr(8)
13256 .kr(4)
13257 .sr(1)
13258 .m(4)
13259 .n(8)
13260 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013262 }
13263
13264 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, strided_cn) {
13265 TEST_REQUIRES_ARM_NEON;
13266 GemmMicrokernelTester()
13267 .mr(4)
13268 .nr(8)
13269 .kr(4)
13270 .sr(1)
13271 .m(4)
13272 .n(8)
13273 .k(16)
13274 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013275 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013276 }
13277
13278 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) {
13279 TEST_REQUIRES_ARM_NEON;
13280 GemmMicrokernelTester()
13281 .mr(4)
13282 .nr(8)
13283 .kr(4)
13284 .sr(1)
13285 .m(4)
13286 .n(8)
13287 .k(16)
13288 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013290 }
13291
13292 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
13293 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013294 for (uint32_t n = 1; n <= 8; n++) {
13295 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013296 GemmMicrokernelTester()
13297 .mr(4)
13298 .nr(8)
13299 .kr(4)
13300 .sr(1)
13301 .m(m)
13302 .n(n)
13303 .k(16)
13304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013306 }
13307 }
13308 }
13309
13310 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
13311 TEST_REQUIRES_ARM_NEON;
13312 for (uint32_t m = 1; m <= 4; m++) {
13313 GemmMicrokernelTester()
13314 .mr(4)
13315 .nr(8)
13316 .kr(4)
13317 .sr(1)
13318 .m(m)
13319 .n(8)
13320 .k(16)
13321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013323 }
13324 }
13325
13326 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
13327 TEST_REQUIRES_ARM_NEON;
13328 for (uint32_t n = 1; n <= 8; n++) {
13329 GemmMicrokernelTester()
13330 .mr(4)
13331 .nr(8)
13332 .kr(4)
13333 .sr(1)
13334 .m(4)
13335 .n(n)
13336 .k(16)
13337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013339 }
13340 }
13341
13342 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_lt_16) {
13343 TEST_REQUIRES_ARM_NEON;
13344 for (size_t k = 1; k < 16; k++) {
13345 GemmMicrokernelTester()
13346 .mr(4)
13347 .nr(8)
13348 .kr(4)
13349 .sr(1)
13350 .m(4)
13351 .n(8)
13352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013354 }
13355 }
13356
13357 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) {
13358 TEST_REQUIRES_ARM_NEON;
13359 for (size_t k = 1; k < 16; k++) {
13360 GemmMicrokernelTester()
13361 .mr(4)
13362 .nr(8)
13363 .kr(4)
13364 .sr(1)
13365 .m(4)
13366 .n(8)
13367 .k(k)
13368 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013370 }
13371 }
13372
13373 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
13374 TEST_REQUIRES_ARM_NEON;
13375 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013376 for (uint32_t n = 1; n <= 8; n++) {
13377 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013378 GemmMicrokernelTester()
13379 .mr(4)
13380 .nr(8)
13381 .kr(4)
13382 .sr(1)
13383 .m(m)
13384 .n(n)
13385 .k(k)
13386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013388 }
13389 }
13390 }
13391 }
13392
13393 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_gt_16) {
13394 TEST_REQUIRES_ARM_NEON;
13395 for (size_t k = 17; k < 32; k++) {
13396 GemmMicrokernelTester()
13397 .mr(4)
13398 .nr(8)
13399 .kr(4)
13400 .sr(1)
13401 .m(4)
13402 .n(8)
13403 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013405 }
13406 }
13407
13408 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) {
13409 TEST_REQUIRES_ARM_NEON;
13410 for (size_t k = 17; k < 32; k++) {
13411 GemmMicrokernelTester()
13412 .mr(4)
13413 .nr(8)
13414 .kr(4)
13415 .sr(1)
13416 .m(4)
13417 .n(8)
13418 .k(k)
13419 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080013420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013421 }
13422 }
13423
13424 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
13425 TEST_REQUIRES_ARM_NEON;
13426 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013427 for (uint32_t n = 1; n <= 8; n++) {
13428 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013429 GemmMicrokernelTester()
13430 .mr(4)
13431 .nr(8)
13432 .kr(4)
13433 .sr(1)
13434 .m(m)
13435 .n(n)
13436 .k(k)
13437 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013439 }
13440 }
13441 }
13442 }
13443
13444 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_div_16) {
13445 TEST_REQUIRES_ARM_NEON;
13446 for (size_t k = 32; k <= 160; k += 16) {
13447 GemmMicrokernelTester()
13448 .mr(4)
13449 .nr(8)
13450 .kr(4)
13451 .sr(1)
13452 .m(4)
13453 .n(8)
13454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013456 }
13457 }
13458
13459 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) {
13460 TEST_REQUIRES_ARM_NEON;
13461 for (size_t k = 32; k <= 160; k += 16) {
13462 GemmMicrokernelTester()
13463 .mr(4)
13464 .nr(8)
13465 .kr(4)
13466 .sr(1)
13467 .m(4)
13468 .n(8)
13469 .k(k)
13470 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080013471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013472 }
13473 }
13474
13475 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
13476 TEST_REQUIRES_ARM_NEON;
13477 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013478 for (uint32_t n = 1; n <= 8; n++) {
13479 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013480 GemmMicrokernelTester()
13481 .mr(4)
13482 .nr(8)
13483 .kr(4)
13484 .sr(1)
13485 .m(m)
13486 .n(n)
13487 .k(k)
13488 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013489 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013490 }
13491 }
13492 }
13493 }
13494
13495 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_gt_8) {
13496 TEST_REQUIRES_ARM_NEON;
13497 for (uint32_t n = 9; n < 16; n++) {
13498 for (size_t k = 1; k <= 80; k += 17) {
13499 GemmMicrokernelTester()
13500 .mr(4)
13501 .nr(8)
13502 .kr(4)
13503 .sr(1)
13504 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013505 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013506 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013508 }
13509 }
13510 }
13511
13512 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
13513 TEST_REQUIRES_ARM_NEON;
13514 for (uint32_t n = 9; n < 16; n++) {
13515 for (size_t k = 1; k <= 80; k += 17) {
13516 GemmMicrokernelTester()
13517 .mr(4)
13518 .nr(8)
13519 .kr(4)
13520 .sr(1)
13521 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013522 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013523 .k(k)
13524 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013526 }
13527 }
13528 }
13529
13530 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) {
13531 TEST_REQUIRES_ARM_NEON;
13532 for (uint32_t n = 9; n < 16; n++) {
13533 for (size_t k = 1; k <= 80; k += 17) {
13534 GemmMicrokernelTester()
13535 .mr(4)
13536 .nr(8)
13537 .kr(4)
13538 .sr(1)
13539 .m(4)
13540 .n(n)
13541 .k(k)
13542 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013544 }
13545 }
13546 }
13547
13548 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
13549 TEST_REQUIRES_ARM_NEON;
13550 for (uint32_t n = 9; n < 16; n++) {
13551 for (size_t k = 1; k <= 80; k += 17) {
13552 for (uint32_t m = 1; m <= 4; m++) {
13553 GemmMicrokernelTester()
13554 .mr(4)
13555 .nr(8)
13556 .kr(4)
13557 .sr(1)
13558 .m(m)
13559 .n(n)
13560 .k(k)
13561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013562 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013563 }
13564 }
13565 }
13566 }
13567
13568 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_div_8) {
13569 TEST_REQUIRES_ARM_NEON;
13570 for (uint32_t n = 16; n <= 24; n += 8) {
13571 for (size_t k = 1; k <= 80; k += 17) {
13572 GemmMicrokernelTester()
13573 .mr(4)
13574 .nr(8)
13575 .kr(4)
13576 .sr(1)
13577 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013578 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013579 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013581 }
13582 }
13583 }
13584
13585 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
13586 TEST_REQUIRES_ARM_NEON;
13587 for (uint32_t n = 16; n <= 24; n += 8) {
13588 for (size_t k = 1; k <= 80; k += 17) {
13589 GemmMicrokernelTester()
13590 .mr(4)
13591 .nr(8)
13592 .kr(4)
13593 .sr(1)
13594 .m(4)
13595 .n(n)
13596 .k(k)
13597 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013599 }
13600 }
13601 }
13602
13603 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) {
13604 TEST_REQUIRES_ARM_NEON;
13605 for (uint32_t n = 16; n <= 24; n += 8) {
13606 for (size_t k = 1; k <= 80; k += 17) {
13607 GemmMicrokernelTester()
13608 .mr(4)
13609 .nr(8)
13610 .kr(4)
13611 .sr(1)
13612 .m(4)
13613 .n(n)
13614 .k(k)
13615 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013616 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013617 }
13618 }
13619 }
13620
13621 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
13622 TEST_REQUIRES_ARM_NEON;
13623 for (uint32_t n = 16; n <= 24; n += 8) {
13624 for (size_t k = 1; k <= 80; k += 17) {
13625 for (uint32_t m = 1; m <= 4; m++) {
13626 GemmMicrokernelTester()
13627 .mr(4)
13628 .nr(8)
13629 .kr(4)
13630 .sr(1)
13631 .m(m)
13632 .n(n)
13633 .k(k)
13634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013636 }
13637 }
13638 }
13639 }
13640
13641 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
13642 TEST_REQUIRES_ARM_NEON;
13643 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013644 for (uint32_t n = 1; n <= 8; n++) {
13645 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013646 GemmMicrokernelTester()
13647 .mr(4)
13648 .nr(8)
13649 .kr(4)
13650 .sr(1)
13651 .m(m)
13652 .n(n)
13653 .k(k)
13654 .cm_stride(11)
13655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013656 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013657 }
13658 }
13659 }
13660 }
13661
13662 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, qmin) {
13663 TEST_REQUIRES_ARM_NEON;
13664 GemmMicrokernelTester()
13665 .mr(4)
13666 .nr(8)
13667 .kr(4)
13668 .sr(1)
13669 .m(4)
13670 .n(8)
13671 .k(16)
13672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013674 }
13675
13676 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, qmax) {
13677 TEST_REQUIRES_ARM_NEON;
13678 GemmMicrokernelTester()
13679 .mr(4)
13680 .nr(8)
13681 .kr(4)
13682 .sr(1)
13683 .m(4)
13684 .n(8)
13685 .k(16)
13686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013688 }
13689
13690 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD2R, strided_cm) {
13691 TEST_REQUIRES_ARM_NEON;
13692 GemmMicrokernelTester()
13693 .mr(4)
13694 .nr(8)
13695 .kr(4)
13696 .sr(1)
13697 .m(4)
13698 .n(8)
13699 .k(16)
13700 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013702 }
13703#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13704
13705
13706#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13707 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8) {
13708 TEST_REQUIRES_ARM_NEON;
13709 GemmMicrokernelTester()
13710 .mr(2)
13711 .nr(8)
13712 .kr(2)
13713 .sr(1)
13714 .m(2)
13715 .n(8)
13716 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013718 }
13719
13720 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cn) {
13721 TEST_REQUIRES_ARM_NEON;
13722 GemmMicrokernelTester()
13723 .mr(2)
13724 .nr(8)
13725 .kr(2)
13726 .sr(1)
13727 .m(2)
13728 .n(8)
13729 .k(8)
13730 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013731 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013732 }
13733
13734 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_strided_a) {
13735 TEST_REQUIRES_ARM_NEON;
13736 GemmMicrokernelTester()
13737 .mr(2)
13738 .nr(8)
13739 .kr(2)
13740 .sr(1)
13741 .m(2)
13742 .n(8)
13743 .k(8)
13744 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013746 }
13747
13748 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
13749 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013750 for (uint32_t n = 1; n <= 8; n++) {
13751 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013752 GemmMicrokernelTester()
13753 .mr(2)
13754 .nr(8)
13755 .kr(2)
13756 .sr(1)
13757 .m(m)
13758 .n(n)
13759 .k(8)
13760 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013762 }
13763 }
13764 }
13765
13766 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
13767 TEST_REQUIRES_ARM_NEON;
13768 for (uint32_t m = 1; m <= 2; m++) {
13769 GemmMicrokernelTester()
13770 .mr(2)
13771 .nr(8)
13772 .kr(2)
13773 .sr(1)
13774 .m(m)
13775 .n(8)
13776 .k(8)
13777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013779 }
13780 }
13781
13782 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
13783 TEST_REQUIRES_ARM_NEON;
13784 for (uint32_t n = 1; n <= 8; n++) {
13785 GemmMicrokernelTester()
13786 .mr(2)
13787 .nr(8)
13788 .kr(2)
13789 .sr(1)
13790 .m(2)
13791 .n(n)
13792 .k(8)
13793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013795 }
13796 }
13797
13798 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8) {
13799 TEST_REQUIRES_ARM_NEON;
13800 for (size_t k = 1; k < 8; k++) {
13801 GemmMicrokernelTester()
13802 .mr(2)
13803 .nr(8)
13804 .kr(2)
13805 .sr(1)
13806 .m(2)
13807 .n(8)
13808 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013810 }
13811 }
13812
13813 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8_strided_a) {
13814 TEST_REQUIRES_ARM_NEON;
13815 for (size_t k = 1; k < 8; k++) {
13816 GemmMicrokernelTester()
13817 .mr(2)
13818 .nr(8)
13819 .kr(2)
13820 .sr(1)
13821 .m(2)
13822 .n(8)
13823 .k(k)
13824 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013826 }
13827 }
13828
13829 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
13830 TEST_REQUIRES_ARM_NEON;
13831 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013832 for (uint32_t n = 1; n <= 8; n++) {
13833 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013834 GemmMicrokernelTester()
13835 .mr(2)
13836 .nr(8)
13837 .kr(2)
13838 .sr(1)
13839 .m(m)
13840 .n(n)
13841 .k(k)
13842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013844 }
13845 }
13846 }
13847 }
13848
13849 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8) {
13850 TEST_REQUIRES_ARM_NEON;
13851 for (size_t k = 9; k < 16; k++) {
13852 GemmMicrokernelTester()
13853 .mr(2)
13854 .nr(8)
13855 .kr(2)
13856 .sr(1)
13857 .m(2)
13858 .n(8)
13859 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013861 }
13862 }
13863
13864 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8_strided_a) {
13865 TEST_REQUIRES_ARM_NEON;
13866 for (size_t k = 9; k < 16; k++) {
13867 GemmMicrokernelTester()
13868 .mr(2)
13869 .nr(8)
13870 .kr(2)
13871 .sr(1)
13872 .m(2)
13873 .n(8)
13874 .k(k)
13875 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013877 }
13878 }
13879
13880 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
13881 TEST_REQUIRES_ARM_NEON;
13882 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013883 for (uint32_t n = 1; n <= 8; n++) {
13884 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013885 GemmMicrokernelTester()
13886 .mr(2)
13887 .nr(8)
13888 .kr(2)
13889 .sr(1)
13890 .m(m)
13891 .n(n)
13892 .k(k)
13893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013895 }
13896 }
13897 }
13898 }
13899
13900 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8) {
13901 TEST_REQUIRES_ARM_NEON;
13902 for (size_t k = 16; k <= 80; k += 8) {
13903 GemmMicrokernelTester()
13904 .mr(2)
13905 .nr(8)
13906 .kr(2)
13907 .sr(1)
13908 .m(2)
13909 .n(8)
13910 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013912 }
13913 }
13914
13915 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8_strided_a) {
13916 TEST_REQUIRES_ARM_NEON;
13917 for (size_t k = 16; k <= 80; k += 8) {
13918 GemmMicrokernelTester()
13919 .mr(2)
13920 .nr(8)
13921 .kr(2)
13922 .sr(1)
13923 .m(2)
13924 .n(8)
13925 .k(k)
13926 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013928 }
13929 }
13930
13931 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8_subtile) {
13932 TEST_REQUIRES_ARM_NEON;
13933 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013934 for (uint32_t n = 1; n <= 8; n++) {
13935 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013936 GemmMicrokernelTester()
13937 .mr(2)
13938 .nr(8)
13939 .kr(2)
13940 .sr(1)
13941 .m(m)
13942 .n(n)
13943 .k(k)
13944 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013945 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013946 }
13947 }
13948 }
13949 }
13950
13951 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8) {
13952 TEST_REQUIRES_ARM_NEON;
13953 for (uint32_t n = 9; n < 16; n++) {
13954 for (size_t k = 1; k <= 40; k += 9) {
13955 GemmMicrokernelTester()
13956 .mr(2)
13957 .nr(8)
13958 .kr(2)
13959 .sr(1)
13960 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013961 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013964 }
13965 }
13966 }
13967
13968 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
13969 TEST_REQUIRES_ARM_NEON;
13970 for (uint32_t n = 9; n < 16; n++) {
13971 for (size_t k = 1; k <= 40; k += 9) {
13972 GemmMicrokernelTester()
13973 .mr(2)
13974 .nr(8)
13975 .kr(2)
13976 .sr(1)
13977 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013978 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013979 .k(k)
13980 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013982 }
13983 }
13984 }
13985
13986 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_strided_a) {
13987 TEST_REQUIRES_ARM_NEON;
13988 for (uint32_t n = 9; n < 16; n++) {
13989 for (size_t k = 1; k <= 40; k += 9) {
13990 GemmMicrokernelTester()
13991 .mr(2)
13992 .nr(8)
13993 .kr(2)
13994 .sr(1)
13995 .m(2)
13996 .n(n)
13997 .k(k)
13998 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014000 }
14001 }
14002 }
14003
14004 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
14005 TEST_REQUIRES_ARM_NEON;
14006 for (uint32_t n = 9; n < 16; n++) {
14007 for (size_t k = 1; k <= 40; k += 9) {
14008 for (uint32_t m = 1; m <= 2; m++) {
14009 GemmMicrokernelTester()
14010 .mr(2)
14011 .nr(8)
14012 .kr(2)
14013 .sr(1)
14014 .m(m)
14015 .n(n)
14016 .k(k)
14017 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014018 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014019 }
14020 }
14021 }
14022 }
14023
14024 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8) {
14025 TEST_REQUIRES_ARM_NEON;
14026 for (uint32_t n = 16; n <= 24; n += 8) {
14027 for (size_t k = 1; k <= 40; k += 9) {
14028 GemmMicrokernelTester()
14029 .mr(2)
14030 .nr(8)
14031 .kr(2)
14032 .sr(1)
14033 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014034 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014035 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014037 }
14038 }
14039 }
14040
14041 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
14042 TEST_REQUIRES_ARM_NEON;
14043 for (uint32_t n = 16; n <= 24; n += 8) {
14044 for (size_t k = 1; k <= 40; k += 9) {
14045 GemmMicrokernelTester()
14046 .mr(2)
14047 .nr(8)
14048 .kr(2)
14049 .sr(1)
14050 .m(2)
14051 .n(n)
14052 .k(k)
14053 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014055 }
14056 }
14057 }
14058
14059 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_strided_a) {
14060 TEST_REQUIRES_ARM_NEON;
14061 for (uint32_t n = 16; n <= 24; n += 8) {
14062 for (size_t k = 1; k <= 40; k += 9) {
14063 GemmMicrokernelTester()
14064 .mr(2)
14065 .nr(8)
14066 .kr(2)
14067 .sr(1)
14068 .m(2)
14069 .n(n)
14070 .k(k)
14071 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014072 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014073 }
14074 }
14075 }
14076
14077 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_subtile) {
14078 TEST_REQUIRES_ARM_NEON;
14079 for (uint32_t n = 16; n <= 24; n += 8) {
14080 for (size_t k = 1; k <= 40; k += 9) {
14081 for (uint32_t m = 1; m <= 2; m++) {
14082 GemmMicrokernelTester()
14083 .mr(2)
14084 .nr(8)
14085 .kr(2)
14086 .sr(1)
14087 .m(m)
14088 .n(n)
14089 .k(k)
14090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014092 }
14093 }
14094 }
14095 }
14096
14097 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cm_subtile) {
14098 TEST_REQUIRES_ARM_NEON;
14099 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014100 for (uint32_t n = 1; n <= 8; n++) {
14101 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014102 GemmMicrokernelTester()
14103 .mr(2)
14104 .nr(8)
14105 .kr(2)
14106 .sr(1)
14107 .m(m)
14108 .n(n)
14109 .k(k)
14110 .cm_stride(11)
14111 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014112 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014113 }
14114 }
14115 }
14116 }
14117
14118 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, qmin) {
14119 TEST_REQUIRES_ARM_NEON;
14120 GemmMicrokernelTester()
14121 .mr(2)
14122 .nr(8)
14123 .kr(2)
14124 .sr(1)
14125 .m(2)
14126 .n(8)
14127 .k(8)
14128 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014130 }
14131
14132 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, qmax) {
14133 TEST_REQUIRES_ARM_NEON;
14134 GemmMicrokernelTester()
14135 .mr(2)
14136 .nr(8)
14137 .kr(2)
14138 .sr(1)
14139 .m(2)
14140 .n(8)
14141 .k(8)
14142 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014144 }
14145
14146 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cm) {
14147 TEST_REQUIRES_ARM_NEON;
14148 GemmMicrokernelTester()
14149 .mr(2)
14150 .nr(8)
14151 .kr(2)
14152 .sr(1)
14153 .m(2)
14154 .n(8)
Frank Barcharde4d3f762021-12-23 15:31:43 -080014155 .k(8)
14156 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barcharde4d3f762021-12-23 15:31:43 -080014158 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014159#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14160
14161
14162#if XNN_ARCH_ARM || XNN_ARCH_ARM64
14163 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8) {
14164 TEST_REQUIRES_ARM_NEON;
14165 GemmMicrokernelTester()
14166 .mr(3)
14167 .nr(8)
14168 .kr(2)
14169 .sr(1)
14170 .m(3)
14171 .n(8)
14172 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014174 }
14175
14176 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cn) {
14177 TEST_REQUIRES_ARM_NEON;
14178 GemmMicrokernelTester()
14179 .mr(3)
14180 .nr(8)
14181 .kr(2)
14182 .sr(1)
14183 .m(3)
14184 .n(8)
14185 .k(8)
14186 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014187 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014188 }
14189
14190 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_strided_a) {
14191 TEST_REQUIRES_ARM_NEON;
14192 GemmMicrokernelTester()
14193 .mr(3)
14194 .nr(8)
14195 .kr(2)
14196 .sr(1)
14197 .m(3)
14198 .n(8)
14199 .k(8)
14200 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014202 }
14203
14204 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
14205 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014206 for (uint32_t n = 1; n <= 8; n++) {
14207 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014208 GemmMicrokernelTester()
14209 .mr(3)
14210 .nr(8)
14211 .kr(2)
14212 .sr(1)
14213 .m(m)
14214 .n(n)
14215 .k(8)
14216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014218 }
14219 }
14220 }
14221
14222 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
14223 TEST_REQUIRES_ARM_NEON;
14224 for (uint32_t m = 1; m <= 3; m++) {
14225 GemmMicrokernelTester()
14226 .mr(3)
14227 .nr(8)
14228 .kr(2)
14229 .sr(1)
14230 .m(m)
14231 .n(8)
14232 .k(8)
14233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014235 }
14236 }
14237
14238 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
14239 TEST_REQUIRES_ARM_NEON;
14240 for (uint32_t n = 1; n <= 8; n++) {
14241 GemmMicrokernelTester()
14242 .mr(3)
14243 .nr(8)
14244 .kr(2)
14245 .sr(1)
14246 .m(3)
14247 .n(n)
14248 .k(8)
14249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014251 }
14252 }
14253
14254 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8) {
14255 TEST_REQUIRES_ARM_NEON;
14256 for (size_t k = 1; k < 8; k++) {
14257 GemmMicrokernelTester()
14258 .mr(3)
14259 .nr(8)
14260 .kr(2)
14261 .sr(1)
14262 .m(3)
14263 .n(8)
14264 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014266 }
14267 }
14268
14269 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8_strided_a) {
14270 TEST_REQUIRES_ARM_NEON;
14271 for (size_t k = 1; k < 8; k++) {
14272 GemmMicrokernelTester()
14273 .mr(3)
14274 .nr(8)
14275 .kr(2)
14276 .sr(1)
14277 .m(3)
14278 .n(8)
14279 .k(k)
14280 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014282 }
14283 }
14284
14285 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
14286 TEST_REQUIRES_ARM_NEON;
14287 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014288 for (uint32_t n = 1; n <= 8; n++) {
14289 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014290 GemmMicrokernelTester()
14291 .mr(3)
14292 .nr(8)
14293 .kr(2)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014300 }
14301 }
14302 }
14303 }
14304
14305 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8) {
14306 TEST_REQUIRES_ARM_NEON;
14307 for (size_t k = 9; k < 16; k++) {
14308 GemmMicrokernelTester()
14309 .mr(3)
14310 .nr(8)
14311 .kr(2)
14312 .sr(1)
14313 .m(3)
14314 .n(8)
14315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014317 }
14318 }
14319
14320 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8_strided_a) {
14321 TEST_REQUIRES_ARM_NEON;
14322 for (size_t k = 9; k < 16; k++) {
14323 GemmMicrokernelTester()
14324 .mr(3)
14325 .nr(8)
14326 .kr(2)
14327 .sr(1)
14328 .m(3)
14329 .n(8)
14330 .k(k)
14331 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014333 }
14334 }
14335
14336 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
14337 TEST_REQUIRES_ARM_NEON;
14338 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014339 for (uint32_t n = 1; n <= 8; n++) {
14340 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014341 GemmMicrokernelTester()
14342 .mr(3)
14343 .nr(8)
14344 .kr(2)
14345 .sr(1)
14346 .m(m)
14347 .n(n)
14348 .k(k)
14349 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014351 }
14352 }
14353 }
14354 }
14355
14356 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8) {
14357 TEST_REQUIRES_ARM_NEON;
14358 for (size_t k = 16; k <= 80; k += 8) {
14359 GemmMicrokernelTester()
14360 .mr(3)
14361 .nr(8)
14362 .kr(2)
14363 .sr(1)
14364 .m(3)
14365 .n(8)
14366 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014368 }
14369 }
14370
14371 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8_strided_a) {
14372 TEST_REQUIRES_ARM_NEON;
14373 for (size_t k = 16; k <= 80; k += 8) {
14374 GemmMicrokernelTester()
14375 .mr(3)
14376 .nr(8)
14377 .kr(2)
14378 .sr(1)
14379 .m(3)
14380 .n(8)
14381 .k(k)
14382 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014384 }
14385 }
14386
14387 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8_subtile) {
14388 TEST_REQUIRES_ARM_NEON;
14389 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014390 for (uint32_t n = 1; n <= 8; n++) {
14391 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014392 GemmMicrokernelTester()
14393 .mr(3)
14394 .nr(8)
14395 .kr(2)
14396 .sr(1)
14397 .m(m)
14398 .n(n)
14399 .k(k)
14400 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014401 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014402 }
14403 }
14404 }
14405 }
14406
14407 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8) {
14408 TEST_REQUIRES_ARM_NEON;
14409 for (uint32_t n = 9; n < 16; n++) {
14410 for (size_t k = 1; k <= 40; k += 9) {
14411 GemmMicrokernelTester()
14412 .mr(3)
14413 .nr(8)
14414 .kr(2)
14415 .sr(1)
14416 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014417 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014418 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014420 }
14421 }
14422 }
14423
14424 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
14425 TEST_REQUIRES_ARM_NEON;
14426 for (uint32_t n = 9; n < 16; n++) {
14427 for (size_t k = 1; k <= 40; k += 9) {
14428 GemmMicrokernelTester()
14429 .mr(3)
14430 .nr(8)
14431 .kr(2)
14432 .sr(1)
14433 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014434 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014435 .k(k)
14436 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014438 }
14439 }
14440 }
14441
14442 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_strided_a) {
14443 TEST_REQUIRES_ARM_NEON;
14444 for (uint32_t n = 9; n < 16; n++) {
14445 for (size_t k = 1; k <= 40; k += 9) {
14446 GemmMicrokernelTester()
14447 .mr(3)
14448 .nr(8)
14449 .kr(2)
14450 .sr(1)
14451 .m(3)
14452 .n(n)
14453 .k(k)
14454 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014456 }
14457 }
14458 }
14459
14460 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
14461 TEST_REQUIRES_ARM_NEON;
14462 for (uint32_t n = 9; n < 16; n++) {
14463 for (size_t k = 1; k <= 40; k += 9) {
14464 for (uint32_t m = 1; m <= 3; m++) {
14465 GemmMicrokernelTester()
14466 .mr(3)
14467 .nr(8)
14468 .kr(2)
14469 .sr(1)
14470 .m(m)
14471 .n(n)
14472 .k(k)
14473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014474 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014475 }
14476 }
14477 }
14478 }
14479
14480 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8) {
14481 TEST_REQUIRES_ARM_NEON;
14482 for (uint32_t n = 16; n <= 24; n += 8) {
14483 for (size_t k = 1; k <= 40; k += 9) {
14484 GemmMicrokernelTester()
14485 .mr(3)
14486 .nr(8)
14487 .kr(2)
14488 .sr(1)
14489 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014490 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014491 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014493 }
14494 }
14495 }
14496
14497 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
14498 TEST_REQUIRES_ARM_NEON;
14499 for (uint32_t n = 16; n <= 24; n += 8) {
14500 for (size_t k = 1; k <= 40; k += 9) {
14501 GemmMicrokernelTester()
14502 .mr(3)
14503 .nr(8)
14504 .kr(2)
14505 .sr(1)
14506 .m(3)
14507 .n(n)
14508 .k(k)
14509 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014511 }
14512 }
14513 }
14514
14515 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_strided_a) {
14516 TEST_REQUIRES_ARM_NEON;
14517 for (uint32_t n = 16; n <= 24; n += 8) {
14518 for (size_t k = 1; k <= 40; k += 9) {
14519 GemmMicrokernelTester()
14520 .mr(3)
14521 .nr(8)
14522 .kr(2)
14523 .sr(1)
14524 .m(3)
14525 .n(n)
14526 .k(k)
14527 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014528 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014529 }
14530 }
14531 }
14532
14533 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_subtile) {
14534 TEST_REQUIRES_ARM_NEON;
14535 for (uint32_t n = 16; n <= 24; n += 8) {
14536 for (size_t k = 1; k <= 40; k += 9) {
14537 for (uint32_t m = 1; m <= 3; m++) {
14538 GemmMicrokernelTester()
14539 .mr(3)
14540 .nr(8)
14541 .kr(2)
14542 .sr(1)
14543 .m(m)
14544 .n(n)
14545 .k(k)
14546 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014548 }
14549 }
14550 }
14551 }
14552
14553 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm_subtile) {
14554 TEST_REQUIRES_ARM_NEON;
14555 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014556 for (uint32_t n = 1; n <= 8; n++) {
14557 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014558 GemmMicrokernelTester()
14559 .mr(3)
14560 .nr(8)
14561 .kr(2)
14562 .sr(1)
14563 .m(m)
14564 .n(n)
14565 .k(k)
14566 .cm_stride(11)
14567 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014568 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014569 }
14570 }
14571 }
14572 }
14573
14574 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmin) {
14575 TEST_REQUIRES_ARM_NEON;
14576 GemmMicrokernelTester()
14577 .mr(3)
14578 .nr(8)
14579 .kr(2)
14580 .sr(1)
14581 .m(3)
14582 .n(8)
14583 .k(8)
14584 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014586 }
14587
14588 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmax) {
14589 TEST_REQUIRES_ARM_NEON;
14590 GemmMicrokernelTester()
14591 .mr(3)
14592 .nr(8)
14593 .kr(2)
14594 .sr(1)
14595 .m(3)
14596 .n(8)
14597 .k(8)
14598 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014600 }
14601
14602 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm) {
14603 TEST_REQUIRES_ARM_NEON;
14604 GemmMicrokernelTester()
14605 .mr(3)
14606 .nr(8)
14607 .kr(2)
14608 .sr(1)
14609 .m(3)
14610 .n(8)
14611 .k(8)
14612 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014614 }
14615#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14616
14617
14618#if XNN_ARCH_ARM || XNN_ARCH_ARM64
14619 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16) {
14620 TEST_REQUIRES_ARM_NEON;
14621 GemmMicrokernelTester()
14622 .mr(2)
14623 .nr(16)
14624 .kr(2)
14625 .sr(1)
14626 .m(2)
14627 .n(16)
14628 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014630 }
14631
14632 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cn) {
14633 TEST_REQUIRES_ARM_NEON;
14634 GemmMicrokernelTester()
14635 .mr(2)
14636 .nr(16)
14637 .kr(2)
14638 .sr(1)
14639 .m(2)
14640 .n(16)
14641 .k(16)
14642 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014643 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014644 }
14645
14646 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
14647 TEST_REQUIRES_ARM_NEON;
14648 GemmMicrokernelTester()
14649 .mr(2)
14650 .nr(16)
14651 .kr(2)
14652 .sr(1)
14653 .m(2)
14654 .n(16)
14655 .k(16)
14656 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014658 }
14659
14660 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
14661 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014662 for (uint32_t n = 1; n <= 16; n++) {
14663 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014664 GemmMicrokernelTester()
14665 .mr(2)
14666 .nr(16)
14667 .kr(2)
14668 .sr(1)
14669 .m(m)
14670 .n(n)
14671 .k(16)
14672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014674 }
14675 }
14676 }
14677
14678 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
14679 TEST_REQUIRES_ARM_NEON;
14680 for (uint32_t m = 1; m <= 2; m++) {
14681 GemmMicrokernelTester()
14682 .mr(2)
14683 .nr(16)
14684 .kr(2)
14685 .sr(1)
14686 .m(m)
14687 .n(16)
14688 .k(16)
14689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014691 }
14692 }
14693
14694 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
14695 TEST_REQUIRES_ARM_NEON;
14696 for (uint32_t n = 1; n <= 16; n++) {
14697 GemmMicrokernelTester()
14698 .mr(2)
14699 .nr(16)
14700 .kr(2)
14701 .sr(1)
14702 .m(2)
14703 .n(n)
14704 .k(16)
14705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014707 }
14708 }
14709
14710 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16) {
14711 TEST_REQUIRES_ARM_NEON;
14712 for (size_t k = 1; k < 16; k++) {
14713 GemmMicrokernelTester()
14714 .mr(2)
14715 .nr(16)
14716 .kr(2)
14717 .sr(1)
14718 .m(2)
14719 .n(16)
14720 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014722 }
14723 }
14724
14725 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
14726 TEST_REQUIRES_ARM_NEON;
14727 for (size_t k = 1; k < 16; k++) {
14728 GemmMicrokernelTester()
14729 .mr(2)
14730 .nr(16)
14731 .kr(2)
14732 .sr(1)
14733 .m(2)
14734 .n(16)
14735 .k(k)
14736 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014738 }
14739 }
14740
14741 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
14742 TEST_REQUIRES_ARM_NEON;
14743 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014744 for (uint32_t n = 1; n <= 16; n++) {
14745 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014746 GemmMicrokernelTester()
14747 .mr(2)
14748 .nr(16)
14749 .kr(2)
14750 .sr(1)
14751 .m(m)
14752 .n(n)
14753 .k(k)
14754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014756 }
14757 }
14758 }
14759 }
14760
14761 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16) {
14762 TEST_REQUIRES_ARM_NEON;
14763 for (size_t k = 17; k < 32; k++) {
14764 GemmMicrokernelTester()
14765 .mr(2)
14766 .nr(16)
14767 .kr(2)
14768 .sr(1)
14769 .m(2)
14770 .n(16)
14771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014773 }
14774 }
14775
14776 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
14777 TEST_REQUIRES_ARM_NEON;
14778 for (size_t k = 17; k < 32; k++) {
14779 GemmMicrokernelTester()
14780 .mr(2)
14781 .nr(16)
14782 .kr(2)
14783 .sr(1)
14784 .m(2)
14785 .n(16)
14786 .k(k)
14787 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080014788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014789 }
14790 }
14791
14792 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
14793 TEST_REQUIRES_ARM_NEON;
14794 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014795 for (uint32_t n = 1; n <= 16; n++) {
14796 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014797 GemmMicrokernelTester()
14798 .mr(2)
14799 .nr(16)
14800 .kr(2)
14801 .sr(1)
14802 .m(m)
14803 .n(n)
14804 .k(k)
14805 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014807 }
14808 }
14809 }
14810 }
14811
14812 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16) {
14813 TEST_REQUIRES_ARM_NEON;
14814 for (size_t k = 32; k <= 160; k += 16) {
14815 GemmMicrokernelTester()
14816 .mr(2)
14817 .nr(16)
14818 .kr(2)
14819 .sr(1)
14820 .m(2)
14821 .n(16)
14822 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014824 }
14825 }
14826
14827 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16_strided_a) {
14828 TEST_REQUIRES_ARM_NEON;
14829 for (size_t k = 32; k <= 160; k += 16) {
14830 GemmMicrokernelTester()
14831 .mr(2)
14832 .nr(16)
14833 .kr(2)
14834 .sr(1)
14835 .m(2)
14836 .n(16)
14837 .k(k)
14838 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080014839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014840 }
14841 }
14842
14843 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
14844 TEST_REQUIRES_ARM_NEON;
14845 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014846 for (uint32_t n = 1; n <= 16; n++) {
14847 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014848 GemmMicrokernelTester()
14849 .mr(2)
14850 .nr(16)
14851 .kr(2)
14852 .sr(1)
14853 .m(m)
14854 .n(n)
14855 .k(k)
14856 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014857 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014858 }
14859 }
14860 }
14861 }
14862
14863 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16) {
14864 TEST_REQUIRES_ARM_NEON;
14865 for (uint32_t n = 17; n < 32; n++) {
14866 for (size_t k = 1; k <= 80; k += 17) {
14867 GemmMicrokernelTester()
14868 .mr(2)
14869 .nr(16)
14870 .kr(2)
14871 .sr(1)
14872 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014873 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014874 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014876 }
14877 }
14878 }
14879
14880 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
14881 TEST_REQUIRES_ARM_NEON;
14882 for (uint32_t n = 17; n < 32; n++) {
14883 for (size_t k = 1; k <= 80; k += 17) {
14884 GemmMicrokernelTester()
14885 .mr(2)
14886 .nr(16)
14887 .kr(2)
14888 .sr(1)
14889 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014890 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014891 .k(k)
14892 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014894 }
14895 }
14896 }
14897
14898 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_strided_a) {
14899 TEST_REQUIRES_ARM_NEON;
14900 for (uint32_t n = 17; n < 32; n++) {
14901 for (size_t k = 1; k <= 80; k += 17) {
14902 GemmMicrokernelTester()
14903 .mr(2)
14904 .nr(16)
14905 .kr(2)
14906 .sr(1)
14907 .m(2)
14908 .n(n)
14909 .k(k)
14910 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014912 }
14913 }
14914 }
14915
14916 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
14917 TEST_REQUIRES_ARM_NEON;
14918 for (uint32_t n = 17; n < 32; n++) {
14919 for (size_t k = 1; k <= 80; k += 17) {
14920 for (uint32_t m = 1; m <= 2; m++) {
14921 GemmMicrokernelTester()
14922 .mr(2)
14923 .nr(16)
14924 .kr(2)
14925 .sr(1)
14926 .m(m)
14927 .n(n)
14928 .k(k)
14929 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014930 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014931 }
14932 }
14933 }
14934 }
14935
14936 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16) {
14937 TEST_REQUIRES_ARM_NEON;
14938 for (uint32_t n = 32; n <= 48; n += 16) {
14939 for (size_t k = 1; k <= 80; k += 17) {
14940 GemmMicrokernelTester()
14941 .mr(2)
14942 .nr(16)
14943 .kr(2)
14944 .sr(1)
14945 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014949 }
14950 }
14951 }
14952
14953 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
14954 TEST_REQUIRES_ARM_NEON;
14955 for (uint32_t n = 32; n <= 48; n += 16) {
14956 for (size_t k = 1; k <= 80; k += 17) {
14957 GemmMicrokernelTester()
14958 .mr(2)
14959 .nr(16)
14960 .kr(2)
14961 .sr(1)
14962 .m(2)
14963 .n(n)
14964 .k(k)
14965 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014967 }
14968 }
14969 }
14970
14971 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_strided_a) {
14972 TEST_REQUIRES_ARM_NEON;
14973 for (uint32_t n = 32; n <= 48; n += 16) {
14974 for (size_t k = 1; k <= 80; k += 17) {
14975 GemmMicrokernelTester()
14976 .mr(2)
14977 .nr(16)
14978 .kr(2)
14979 .sr(1)
14980 .m(2)
14981 .n(n)
14982 .k(k)
14983 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014984 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014985 }
14986 }
14987 }
14988
14989 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
14990 TEST_REQUIRES_ARM_NEON;
14991 for (uint32_t n = 32; n <= 48; n += 16) {
14992 for (size_t k = 1; k <= 80; k += 17) {
14993 for (uint32_t m = 1; m <= 2; m++) {
14994 GemmMicrokernelTester()
14995 .mr(2)
14996 .nr(16)
14997 .kr(2)
14998 .sr(1)
14999 .m(m)
15000 .n(n)
15001 .k(k)
15002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015004 }
15005 }
15006 }
15007 }
15008
15009 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
15010 TEST_REQUIRES_ARM_NEON;
15011 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015012 for (uint32_t n = 1; n <= 16; n++) {
15013 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015014 GemmMicrokernelTester()
15015 .mr(2)
15016 .nr(16)
15017 .kr(2)
15018 .sr(1)
15019 .m(m)
15020 .n(n)
15021 .k(k)
15022 .cm_stride(19)
15023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015024 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015025 }
15026 }
15027 }
15028 }
15029
15030 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmin) {
15031 TEST_REQUIRES_ARM_NEON;
15032 GemmMicrokernelTester()
15033 .mr(2)
15034 .nr(16)
15035 .kr(2)
15036 .sr(1)
15037 .m(2)
15038 .n(16)
15039 .k(16)
15040 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015042 }
15043
15044 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmax) {
15045 TEST_REQUIRES_ARM_NEON;
15046 GemmMicrokernelTester()
15047 .mr(2)
15048 .nr(16)
15049 .kr(2)
15050 .sr(1)
15051 .m(2)
15052 .n(16)
15053 .k(16)
15054 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015056 }
15057
15058 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm) {
15059 TEST_REQUIRES_ARM_NEON;
15060 GemmMicrokernelTester()
15061 .mr(2)
15062 .nr(16)
15063 .kr(2)
15064 .sr(1)
15065 .m(2)
15066 .n(16)
15067 .k(16)
15068 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015070 }
15071#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15072
15073
15074#if XNN_ARCH_ARM || XNN_ARCH_ARM64
15075 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16) {
15076 TEST_REQUIRES_ARM_NEON;
15077 GemmMicrokernelTester()
15078 .mr(4)
15079 .nr(16)
15080 .kr(2)
15081 .sr(1)
15082 .m(4)
15083 .n(16)
15084 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015086 }
15087
15088 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cn) {
15089 TEST_REQUIRES_ARM_NEON;
15090 GemmMicrokernelTester()
15091 .mr(4)
15092 .nr(16)
15093 .kr(2)
15094 .sr(1)
15095 .m(4)
15096 .n(16)
15097 .k(16)
15098 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015099 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015100 }
15101
15102 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
15103 TEST_REQUIRES_ARM_NEON;
15104 GemmMicrokernelTester()
15105 .mr(4)
15106 .nr(16)
15107 .kr(2)
15108 .sr(1)
15109 .m(4)
15110 .n(16)
15111 .k(16)
15112 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015114 }
15115
15116 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
15117 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015118 for (uint32_t n = 1; n <= 16; n++) {
15119 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015120 GemmMicrokernelTester()
15121 .mr(4)
15122 .nr(16)
15123 .kr(2)
15124 .sr(1)
15125 .m(m)
15126 .n(n)
15127 .k(16)
15128 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015130 }
15131 }
15132 }
15133
15134 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
15135 TEST_REQUIRES_ARM_NEON;
15136 for (uint32_t m = 1; m <= 4; m++) {
15137 GemmMicrokernelTester()
15138 .mr(4)
15139 .nr(16)
15140 .kr(2)
15141 .sr(1)
15142 .m(m)
15143 .n(16)
15144 .k(16)
15145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015147 }
15148 }
15149
15150 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
15151 TEST_REQUIRES_ARM_NEON;
15152 for (uint32_t n = 1; n <= 16; n++) {
15153 GemmMicrokernelTester()
15154 .mr(4)
15155 .nr(16)
15156 .kr(2)
15157 .sr(1)
15158 .m(4)
15159 .n(n)
15160 .k(16)
15161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015163 }
15164 }
15165
15166 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16) {
15167 TEST_REQUIRES_ARM_NEON;
15168 for (size_t k = 1; k < 16; k++) {
15169 GemmMicrokernelTester()
15170 .mr(4)
15171 .nr(16)
15172 .kr(2)
15173 .sr(1)
15174 .m(4)
15175 .n(16)
15176 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015178 }
15179 }
15180
15181 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
15182 TEST_REQUIRES_ARM_NEON;
15183 for (size_t k = 1; k < 16; k++) {
15184 GemmMicrokernelTester()
15185 .mr(4)
15186 .nr(16)
15187 .kr(2)
15188 .sr(1)
15189 .m(4)
15190 .n(16)
15191 .k(k)
15192 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015194 }
15195 }
15196
15197 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
15198 TEST_REQUIRES_ARM_NEON;
15199 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015200 for (uint32_t n = 1; n <= 16; n++) {
15201 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015202 GemmMicrokernelTester()
15203 .mr(4)
15204 .nr(16)
15205 .kr(2)
15206 .sr(1)
15207 .m(m)
15208 .n(n)
15209 .k(k)
15210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015212 }
15213 }
15214 }
15215 }
15216
15217 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16) {
15218 TEST_REQUIRES_ARM_NEON;
15219 for (size_t k = 17; k < 32; k++) {
15220 GemmMicrokernelTester()
15221 .mr(4)
15222 .nr(16)
15223 .kr(2)
15224 .sr(1)
15225 .m(4)
15226 .n(16)
15227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015229 }
15230 }
15231
15232 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
15233 TEST_REQUIRES_ARM_NEON;
15234 for (size_t k = 17; k < 32; k++) {
15235 GemmMicrokernelTester()
15236 .mr(4)
15237 .nr(16)
15238 .kr(2)
15239 .sr(1)
15240 .m(4)
15241 .n(16)
15242 .k(k)
15243 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080015244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015245 }
15246 }
15247
15248 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
15249 TEST_REQUIRES_ARM_NEON;
15250 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015251 for (uint32_t n = 1; n <= 16; n++) {
15252 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015253 GemmMicrokernelTester()
15254 .mr(4)
15255 .nr(16)
15256 .kr(2)
15257 .sr(1)
15258 .m(m)
15259 .n(n)
15260 .k(k)
15261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015263 }
15264 }
15265 }
15266 }
15267
15268 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16) {
15269 TEST_REQUIRES_ARM_NEON;
15270 for (size_t k = 32; k <= 160; k += 16) {
15271 GemmMicrokernelTester()
15272 .mr(4)
15273 .nr(16)
15274 .kr(2)
15275 .sr(1)
15276 .m(4)
15277 .n(16)
15278 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015280 }
15281 }
15282
15283 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16_strided_a) {
15284 TEST_REQUIRES_ARM_NEON;
15285 for (size_t k = 32; k <= 160; k += 16) {
15286 GemmMicrokernelTester()
15287 .mr(4)
15288 .nr(16)
15289 .kr(2)
15290 .sr(1)
15291 .m(4)
15292 .n(16)
15293 .k(k)
15294 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080015295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015296 }
15297 }
15298
15299 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
15300 TEST_REQUIRES_ARM_NEON;
15301 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015302 for (uint32_t n = 1; n <= 16; n++) {
15303 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015304 GemmMicrokernelTester()
15305 .mr(4)
15306 .nr(16)
15307 .kr(2)
15308 .sr(1)
15309 .m(m)
15310 .n(n)
15311 .k(k)
15312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015313 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015314 }
15315 }
15316 }
15317 }
15318
15319 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16) {
15320 TEST_REQUIRES_ARM_NEON;
15321 for (uint32_t n = 17; n < 32; n++) {
15322 for (size_t k = 1; k <= 80; k += 17) {
15323 GemmMicrokernelTester()
15324 .mr(4)
15325 .nr(16)
15326 .kr(2)
15327 .sr(1)
15328 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015329 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015330 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015332 }
15333 }
15334 }
15335
15336 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
15337 TEST_REQUIRES_ARM_NEON;
15338 for (uint32_t n = 17; n < 32; n++) {
15339 for (size_t k = 1; k <= 80; k += 17) {
15340 GemmMicrokernelTester()
15341 .mr(4)
15342 .nr(16)
15343 .kr(2)
15344 .sr(1)
15345 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015346 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015347 .k(k)
15348 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015350 }
15351 }
15352 }
15353
15354 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_strided_a) {
15355 TEST_REQUIRES_ARM_NEON;
15356 for (uint32_t n = 17; n < 32; n++) {
15357 for (size_t k = 1; k <= 80; k += 17) {
15358 GemmMicrokernelTester()
15359 .mr(4)
15360 .nr(16)
15361 .kr(2)
15362 .sr(1)
15363 .m(4)
15364 .n(n)
15365 .k(k)
15366 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015368 }
15369 }
15370 }
15371
15372 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
15373 TEST_REQUIRES_ARM_NEON;
15374 for (uint32_t n = 17; n < 32; n++) {
15375 for (size_t k = 1; k <= 80; k += 17) {
15376 for (uint32_t m = 1; m <= 4; m++) {
15377 GemmMicrokernelTester()
15378 .mr(4)
15379 .nr(16)
15380 .kr(2)
15381 .sr(1)
15382 .m(m)
15383 .n(n)
15384 .k(k)
15385 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015386 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015387 }
15388 }
15389 }
15390 }
15391
15392 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16) {
15393 TEST_REQUIRES_ARM_NEON;
15394 for (uint32_t n = 32; n <= 48; n += 16) {
15395 for (size_t k = 1; k <= 80; k += 17) {
15396 GemmMicrokernelTester()
15397 .mr(4)
15398 .nr(16)
15399 .kr(2)
15400 .sr(1)
15401 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015402 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015403 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015405 }
15406 }
15407 }
15408
15409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
15410 TEST_REQUIRES_ARM_NEON;
15411 for (uint32_t n = 32; n <= 48; n += 16) {
15412 for (size_t k = 1; k <= 80; k += 17) {
15413 GemmMicrokernelTester()
15414 .mr(4)
15415 .nr(16)
15416 .kr(2)
15417 .sr(1)
15418 .m(4)
15419 .n(n)
15420 .k(k)
15421 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015423 }
15424 }
15425 }
15426
15427 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_strided_a) {
15428 TEST_REQUIRES_ARM_NEON;
15429 for (uint32_t n = 32; n <= 48; n += 16) {
15430 for (size_t k = 1; k <= 80; k += 17) {
15431 GemmMicrokernelTester()
15432 .mr(4)
15433 .nr(16)
15434 .kr(2)
15435 .sr(1)
15436 .m(4)
15437 .n(n)
15438 .k(k)
15439 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015440 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015441 }
15442 }
15443 }
15444
15445 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
15446 TEST_REQUIRES_ARM_NEON;
15447 for (uint32_t n = 32; n <= 48; n += 16) {
15448 for (size_t k = 1; k <= 80; k += 17) {
15449 for (uint32_t m = 1; m <= 4; m++) {
15450 GemmMicrokernelTester()
15451 .mr(4)
15452 .nr(16)
15453 .kr(2)
15454 .sr(1)
15455 .m(m)
15456 .n(n)
15457 .k(k)
15458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015460 }
15461 }
15462 }
15463 }
15464
15465 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
15466 TEST_REQUIRES_ARM_NEON;
15467 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015468 for (uint32_t n = 1; n <= 16; n++) {
15469 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015470 GemmMicrokernelTester()
15471 .mr(4)
15472 .nr(16)
15473 .kr(2)
15474 .sr(1)
15475 .m(m)
15476 .n(n)
15477 .k(k)
15478 .cm_stride(19)
15479 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015480 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015481 }
15482 }
15483 }
15484 }
15485
15486 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, qmin) {
15487 TEST_REQUIRES_ARM_NEON;
15488 GemmMicrokernelTester()
15489 .mr(4)
15490 .nr(16)
15491 .kr(2)
15492 .sr(1)
15493 .m(4)
15494 .n(16)
15495 .k(16)
15496 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015498 }
15499
15500 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, qmax) {
15501 TEST_REQUIRES_ARM_NEON;
15502 GemmMicrokernelTester()
15503 .mr(4)
15504 .nr(16)
15505 .kr(2)
15506 .sr(1)
15507 .m(4)
15508 .n(16)
15509 .k(16)
15510 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015512 }
15513
15514 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cm) {
15515 TEST_REQUIRES_ARM_NEON;
15516 GemmMicrokernelTester()
15517 .mr(4)
15518 .nr(16)
15519 .kr(2)
15520 .sr(1)
15521 .m(4)
15522 .n(16)
15523 .k(16)
15524 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015526 }
15527#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde4d3f762021-12-23 15:31:43 -080015528
15529
Frank Barcharde31f29e2021-12-21 15:57:10 -080015530#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080015531 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015532 TEST_REQUIRES_ARM_NEON;
15533 GemmMicrokernelTester()
15534 .mr(2)
15535 .nr(8)
15536 .kr(8)
15537 .sr(1)
15538 .m(2)
15539 .n(8)
15540 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015542 }
15543
Frank Barcharde22685a2021-11-12 11:36:58 -080015544 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015545 TEST_REQUIRES_ARM_NEON;
15546 GemmMicrokernelTester()
15547 .mr(2)
15548 .nr(8)
15549 .kr(8)
15550 .sr(1)
15551 .m(2)
15552 .n(8)
15553 .k(16)
15554 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015555 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015556 }
15557
Frank Barcharde22685a2021-11-12 11:36:58 -080015558 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015559 TEST_REQUIRES_ARM_NEON;
15560 GemmMicrokernelTester()
15561 .mr(2)
15562 .nr(8)
15563 .kr(8)
15564 .sr(1)
15565 .m(2)
15566 .n(8)
15567 .k(16)
15568 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015570 }
15571
Frank Barcharde22685a2021-11-12 11:36:58 -080015572 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015573 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015574 for (uint32_t n = 1; n <= 8; n++) {
15575 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015576 GemmMicrokernelTester()
15577 .mr(2)
15578 .nr(8)
15579 .kr(8)
15580 .sr(1)
15581 .m(m)
15582 .n(n)
15583 .k(16)
15584 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015586 }
15587 }
15588 }
15589
Frank Barcharde22685a2021-11-12 11:36:58 -080015590 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015591 TEST_REQUIRES_ARM_NEON;
15592 for (uint32_t m = 1; m <= 2; m++) {
15593 GemmMicrokernelTester()
15594 .mr(2)
15595 .nr(8)
15596 .kr(8)
15597 .sr(1)
15598 .m(m)
15599 .n(8)
15600 .k(16)
15601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015603 }
15604 }
15605
Frank Barcharde22685a2021-11-12 11:36:58 -080015606 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015607 TEST_REQUIRES_ARM_NEON;
15608 for (uint32_t n = 1; n <= 8; n++) {
15609 GemmMicrokernelTester()
15610 .mr(2)
15611 .nr(8)
15612 .kr(8)
15613 .sr(1)
15614 .m(2)
15615 .n(n)
15616 .k(16)
15617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015619 }
15620 }
15621
Frank Barcharde22685a2021-11-12 11:36:58 -080015622 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_lt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015623 TEST_REQUIRES_ARM_NEON;
15624 for (size_t k = 1; k < 16; k++) {
15625 GemmMicrokernelTester()
15626 .mr(2)
15627 .nr(8)
15628 .kr(8)
15629 .sr(1)
15630 .m(2)
15631 .n(8)
15632 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015634 }
15635 }
15636
Frank Barcharde22685a2021-11-12 11:36:58 -080015637 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_lt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015638 TEST_REQUIRES_ARM_NEON;
15639 for (size_t k = 1; k < 16; k++) {
15640 GemmMicrokernelTester()
15641 .mr(2)
15642 .nr(8)
15643 .kr(8)
15644 .sr(1)
15645 .m(2)
15646 .n(8)
15647 .k(k)
15648 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015650 }
15651 }
15652
Frank Barcharde22685a2021-11-12 11:36:58 -080015653 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015654 TEST_REQUIRES_ARM_NEON;
15655 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015656 for (uint32_t n = 1; n <= 8; n++) {
15657 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015658 GemmMicrokernelTester()
15659 .mr(2)
15660 .nr(8)
15661 .kr(8)
15662 .sr(1)
15663 .m(m)
15664 .n(n)
15665 .k(k)
15666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015668 }
15669 }
15670 }
15671 }
15672
Frank Barcharde22685a2021-11-12 11:36:58 -080015673 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_gt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015674 TEST_REQUIRES_ARM_NEON;
15675 for (size_t k = 17; k < 32; k++) {
15676 GemmMicrokernelTester()
15677 .mr(2)
15678 .nr(8)
15679 .kr(8)
15680 .sr(1)
15681 .m(2)
15682 .n(8)
15683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015685 }
15686 }
15687
Frank Barcharde22685a2021-11-12 11:36:58 -080015688 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_gt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015689 TEST_REQUIRES_ARM_NEON;
15690 for (size_t k = 17; k < 32; k++) {
15691 GemmMicrokernelTester()
15692 .mr(2)
15693 .nr(8)
15694 .kr(8)
15695 .sr(1)
15696 .m(2)
15697 .n(8)
15698 .k(k)
15699 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080015700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015701 }
15702 }
15703
Frank Barcharde22685a2021-11-12 11:36:58 -080015704 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015705 TEST_REQUIRES_ARM_NEON;
15706 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015707 for (uint32_t n = 1; n <= 8; n++) {
15708 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015709 GemmMicrokernelTester()
15710 .mr(2)
15711 .nr(8)
15712 .kr(8)
15713 .sr(1)
15714 .m(m)
15715 .n(n)
15716 .k(k)
15717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015719 }
15720 }
15721 }
15722 }
15723
Frank Barcharde22685a2021-11-12 11:36:58 -080015724 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_div_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015725 TEST_REQUIRES_ARM_NEON;
15726 for (size_t k = 32; k <= 160; k += 16) {
15727 GemmMicrokernelTester()
15728 .mr(2)
15729 .nr(8)
15730 .kr(8)
15731 .sr(1)
15732 .m(2)
15733 .n(8)
15734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015736 }
15737 }
15738
Frank Barcharde22685a2021-11-12 11:36:58 -080015739 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_div_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015740 TEST_REQUIRES_ARM_NEON;
15741 for (size_t k = 32; k <= 160; k += 16) {
15742 GemmMicrokernelTester()
15743 .mr(2)
15744 .nr(8)
15745 .kr(8)
15746 .sr(1)
15747 .m(2)
15748 .n(8)
15749 .k(k)
15750 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080015751 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015752 }
15753 }
15754
Frank Barcharde22685a2021-11-12 11:36:58 -080015755 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015756 TEST_REQUIRES_ARM_NEON;
15757 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015758 for (uint32_t n = 1; n <= 8; n++) {
15759 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015760 GemmMicrokernelTester()
15761 .mr(2)
15762 .nr(8)
15763 .kr(8)
15764 .sr(1)
15765 .m(m)
15766 .n(n)
15767 .k(k)
15768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015769 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015770 }
15771 }
15772 }
15773 }
15774
Frank Barcharde22685a2021-11-12 11:36:58 -080015775 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015776 TEST_REQUIRES_ARM_NEON;
15777 for (uint32_t n = 9; n < 16; n++) {
15778 for (size_t k = 1; k <= 80; k += 17) {
15779 GemmMicrokernelTester()
15780 .mr(2)
15781 .nr(8)
15782 .kr(8)
15783 .sr(1)
15784 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015785 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070015786 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015788 }
15789 }
15790 }
15791
Frank Barcharde22685a2021-11-12 11:36:58 -080015792 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015793 TEST_REQUIRES_ARM_NEON;
15794 for (uint32_t n = 9; n < 16; n++) {
15795 for (size_t k = 1; k <= 80; k += 17) {
15796 GemmMicrokernelTester()
15797 .mr(2)
15798 .nr(8)
15799 .kr(8)
15800 .sr(1)
15801 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015802 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070015803 .k(k)
15804 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015806 }
15807 }
15808 }
15809
Frank Barcharde22685a2021-11-12 11:36:58 -080015810 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015811 TEST_REQUIRES_ARM_NEON;
15812 for (uint32_t n = 9; n < 16; n++) {
15813 for (size_t k = 1; k <= 80; k += 17) {
15814 GemmMicrokernelTester()
15815 .mr(2)
15816 .nr(8)
15817 .kr(8)
15818 .sr(1)
15819 .m(2)
15820 .n(n)
15821 .k(k)
15822 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015824 }
15825 }
15826 }
15827
Frank Barcharde22685a2021-11-12 11:36:58 -080015828 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015829 TEST_REQUIRES_ARM_NEON;
15830 for (uint32_t n = 9; n < 16; n++) {
15831 for (size_t k = 1; k <= 80; k += 17) {
15832 for (uint32_t m = 1; m <= 2; m++) {
15833 GemmMicrokernelTester()
15834 .mr(2)
15835 .nr(8)
15836 .kr(8)
15837 .sr(1)
15838 .m(m)
15839 .n(n)
15840 .k(k)
15841 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015842 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015843 }
15844 }
15845 }
15846 }
15847
Frank Barcharde22685a2021-11-12 11:36:58 -080015848 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015849 TEST_REQUIRES_ARM_NEON;
15850 for (uint32_t n = 16; n <= 24; n += 8) {
15851 for (size_t k = 1; k <= 80; k += 17) {
15852 GemmMicrokernelTester()
15853 .mr(2)
15854 .nr(8)
15855 .kr(8)
15856 .sr(1)
15857 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015858 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070015859 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015861 }
15862 }
15863 }
15864
Frank Barcharde22685a2021-11-12 11:36:58 -080015865 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015866 TEST_REQUIRES_ARM_NEON;
15867 for (uint32_t n = 16; n <= 24; n += 8) {
15868 for (size_t k = 1; k <= 80; k += 17) {
15869 GemmMicrokernelTester()
15870 .mr(2)
15871 .nr(8)
15872 .kr(8)
15873 .sr(1)
15874 .m(2)
15875 .n(n)
15876 .k(k)
15877 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015879 }
15880 }
15881 }
15882
Frank Barcharde22685a2021-11-12 11:36:58 -080015883 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015884 TEST_REQUIRES_ARM_NEON;
15885 for (uint32_t n = 16; n <= 24; n += 8) {
15886 for (size_t k = 1; k <= 80; k += 17) {
15887 GemmMicrokernelTester()
15888 .mr(2)
15889 .nr(8)
15890 .kr(8)
15891 .sr(1)
15892 .m(2)
15893 .n(n)
15894 .k(k)
15895 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015896 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015897 }
15898 }
15899 }
15900
Frank Barcharde22685a2021-11-12 11:36:58 -080015901 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015902 TEST_REQUIRES_ARM_NEON;
15903 for (uint32_t n = 16; n <= 24; n += 8) {
15904 for (size_t k = 1; k <= 80; k += 17) {
15905 for (uint32_t m = 1; m <= 2; m++) {
15906 GemmMicrokernelTester()
15907 .mr(2)
15908 .nr(8)
15909 .kr(8)
15910 .sr(1)
15911 .m(m)
15912 .n(n)
15913 .k(k)
15914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015916 }
15917 }
15918 }
15919 }
15920
Frank Barcharde22685a2021-11-12 11:36:58 -080015921 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015922 TEST_REQUIRES_ARM_NEON;
15923 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015924 for (uint32_t n = 1; n <= 8; n++) {
15925 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015926 GemmMicrokernelTester()
15927 .mr(2)
15928 .nr(8)
15929 .kr(8)
15930 .sr(1)
15931 .m(m)
15932 .n(n)
15933 .k(k)
15934 .cm_stride(11)
15935 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015936 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015937 }
15938 }
15939 }
15940 }
15941
Frank Barcharde22685a2021-11-12 11:36:58 -080015942 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, qmin) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015943 TEST_REQUIRES_ARM_NEON;
15944 GemmMicrokernelTester()
15945 .mr(2)
15946 .nr(8)
15947 .kr(8)
15948 .sr(1)
15949 .m(2)
15950 .n(8)
15951 .k(16)
15952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015954 }
15955
Frank Barcharde22685a2021-11-12 11:36:58 -080015956 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, qmax) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015957 TEST_REQUIRES_ARM_NEON;
15958 GemmMicrokernelTester()
15959 .mr(2)
15960 .nr(8)
15961 .kr(8)
15962 .sr(1)
15963 .m(2)
15964 .n(8)
15965 .k(16)
15966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015968 }
15969
Frank Barcharde22685a2021-11-12 11:36:58 -080015970 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, strided_cm) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015971 TEST_REQUIRES_ARM_NEON;
15972 GemmMicrokernelTester()
15973 .mr(2)
15974 .nr(8)
15975 .kr(8)
15976 .sr(1)
15977 .m(2)
15978 .n(8)
15979 .k(16)
15980 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015982 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080015983#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070015984
15985
Frank Barcharde31f29e2021-12-21 15:57:10 -080015986#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080015987 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070015988 TEST_REQUIRES_ARM_NEON;
15989 GemmMicrokernelTester()
15990 .mr(2)
15991 .nr(8)
15992 .kr(8)
15993 .sr(1)
15994 .m(2)
15995 .n(8)
15996 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070015998 }
15999
Frank Barcharde22685a2021-11-12 11:36:58 -080016000 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016001 TEST_REQUIRES_ARM_NEON;
16002 GemmMicrokernelTester()
16003 .mr(2)
16004 .nr(8)
16005 .kr(8)
16006 .sr(1)
16007 .m(2)
16008 .n(8)
16009 .k(16)
16010 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016011 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016012 }
16013
Frank Barcharde22685a2021-11-12 11:36:58 -080016014 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016015 TEST_REQUIRES_ARM_NEON;
16016 GemmMicrokernelTester()
16017 .mr(2)
16018 .nr(8)
16019 .kr(8)
16020 .sr(1)
16021 .m(2)
16022 .n(8)
16023 .k(16)
16024 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016026 }
16027
Frank Barcharde22685a2021-11-12 11:36:58 -080016028 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016029 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016030 for (uint32_t n = 1; n <= 8; n++) {
16031 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016032 GemmMicrokernelTester()
16033 .mr(2)
16034 .nr(8)
16035 .kr(8)
16036 .sr(1)
16037 .m(m)
16038 .n(n)
16039 .k(16)
16040 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016042 }
16043 }
16044 }
16045
Frank Barcharde22685a2021-11-12 11:36:58 -080016046 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016047 TEST_REQUIRES_ARM_NEON;
16048 for (uint32_t m = 1; m <= 2; m++) {
16049 GemmMicrokernelTester()
16050 .mr(2)
16051 .nr(8)
16052 .kr(8)
16053 .sr(1)
16054 .m(m)
16055 .n(8)
16056 .k(16)
16057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016059 }
16060 }
16061
Frank Barcharde22685a2021-11-12 11:36:58 -080016062 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016063 TEST_REQUIRES_ARM_NEON;
16064 for (uint32_t n = 1; n <= 8; n++) {
16065 GemmMicrokernelTester()
16066 .mr(2)
16067 .nr(8)
16068 .kr(8)
16069 .sr(1)
16070 .m(2)
16071 .n(n)
16072 .k(16)
16073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016075 }
16076 }
16077
Frank Barcharde22685a2021-11-12 11:36:58 -080016078 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016079 TEST_REQUIRES_ARM_NEON;
16080 for (size_t k = 1; k < 16; k++) {
16081 GemmMicrokernelTester()
16082 .mr(2)
16083 .nr(8)
16084 .kr(8)
16085 .sr(1)
16086 .m(2)
16087 .n(8)
16088 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016090 }
16091 }
16092
Frank Barcharde22685a2021-11-12 11:36:58 -080016093 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016094 TEST_REQUIRES_ARM_NEON;
16095 for (size_t k = 1; k < 16; k++) {
16096 GemmMicrokernelTester()
16097 .mr(2)
16098 .nr(8)
16099 .kr(8)
16100 .sr(1)
16101 .m(2)
16102 .n(8)
16103 .k(k)
16104 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016106 }
16107 }
16108
Frank Barcharde22685a2021-11-12 11:36:58 -080016109 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016110 TEST_REQUIRES_ARM_NEON;
16111 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016112 for (uint32_t n = 1; n <= 8; n++) {
16113 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016114 GemmMicrokernelTester()
16115 .mr(2)
16116 .nr(8)
16117 .kr(8)
16118 .sr(1)
16119 .m(m)
16120 .n(n)
16121 .k(k)
16122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016124 }
16125 }
16126 }
16127 }
16128
Frank Barcharde22685a2021-11-12 11:36:58 -080016129 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016130 TEST_REQUIRES_ARM_NEON;
16131 for (size_t k = 17; k < 32; k++) {
16132 GemmMicrokernelTester()
16133 .mr(2)
16134 .nr(8)
16135 .kr(8)
16136 .sr(1)
16137 .m(2)
16138 .n(8)
16139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016141 }
16142 }
16143
Frank Barcharde22685a2021-11-12 11:36:58 -080016144 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016145 TEST_REQUIRES_ARM_NEON;
16146 for (size_t k = 17; k < 32; k++) {
16147 GemmMicrokernelTester()
16148 .mr(2)
16149 .nr(8)
16150 .kr(8)
16151 .sr(1)
16152 .m(2)
16153 .n(8)
16154 .k(k)
16155 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080016156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016157 }
16158 }
16159
Frank Barcharde22685a2021-11-12 11:36:58 -080016160 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016161 TEST_REQUIRES_ARM_NEON;
16162 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016163 for (uint32_t n = 1; n <= 8; n++) {
16164 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016165 GemmMicrokernelTester()
16166 .mr(2)
16167 .nr(8)
16168 .kr(8)
16169 .sr(1)
16170 .m(m)
16171 .n(n)
16172 .k(k)
16173 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016175 }
16176 }
16177 }
16178 }
16179
Frank Barcharde22685a2021-11-12 11:36:58 -080016180 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016181 TEST_REQUIRES_ARM_NEON;
16182 for (size_t k = 32; k <= 160; k += 16) {
16183 GemmMicrokernelTester()
16184 .mr(2)
16185 .nr(8)
16186 .kr(8)
16187 .sr(1)
16188 .m(2)
16189 .n(8)
16190 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016192 }
16193 }
16194
Frank Barcharde22685a2021-11-12 11:36:58 -080016195 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016196 TEST_REQUIRES_ARM_NEON;
16197 for (size_t k = 32; k <= 160; k += 16) {
16198 GemmMicrokernelTester()
16199 .mr(2)
16200 .nr(8)
16201 .kr(8)
16202 .sr(1)
16203 .m(2)
16204 .n(8)
16205 .k(k)
16206 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080016207 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016208 }
16209 }
16210
Frank Barcharde22685a2021-11-12 11:36:58 -080016211 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016212 TEST_REQUIRES_ARM_NEON;
16213 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016214 for (uint32_t n = 1; n <= 8; n++) {
16215 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016216 GemmMicrokernelTester()
16217 .mr(2)
16218 .nr(8)
16219 .kr(8)
16220 .sr(1)
16221 .m(m)
16222 .n(n)
16223 .k(k)
16224 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016226 }
16227 }
16228 }
16229 }
16230
Frank Barcharde22685a2021-11-12 11:36:58 -080016231 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016232 TEST_REQUIRES_ARM_NEON;
16233 for (uint32_t n = 9; n < 16; n++) {
16234 for (size_t k = 1; k <= 80; k += 17) {
16235 GemmMicrokernelTester()
16236 .mr(2)
16237 .nr(8)
16238 .kr(8)
16239 .sr(1)
16240 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016241 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070016242 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016244 }
16245 }
16246 }
16247
Frank Barcharde22685a2021-11-12 11:36:58 -080016248 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016249 TEST_REQUIRES_ARM_NEON;
16250 for (uint32_t n = 9; n < 16; n++) {
16251 for (size_t k = 1; k <= 80; k += 17) {
16252 GemmMicrokernelTester()
16253 .mr(2)
16254 .nr(8)
16255 .kr(8)
16256 .sr(1)
16257 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016258 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070016259 .k(k)
16260 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016262 }
16263 }
16264 }
16265
Frank Barcharde22685a2021-11-12 11:36:58 -080016266 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016267 TEST_REQUIRES_ARM_NEON;
16268 for (uint32_t n = 9; n < 16; n++) {
16269 for (size_t k = 1; k <= 80; k += 17) {
16270 GemmMicrokernelTester()
16271 .mr(2)
16272 .nr(8)
16273 .kr(8)
16274 .sr(1)
16275 .m(2)
16276 .n(n)
16277 .k(k)
16278 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016280 }
16281 }
16282 }
16283
Frank Barcharde22685a2021-11-12 11:36:58 -080016284 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016285 TEST_REQUIRES_ARM_NEON;
16286 for (uint32_t n = 9; n < 16; n++) {
16287 for (size_t k = 1; k <= 80; k += 17) {
16288 for (uint32_t m = 1; m <= 2; m++) {
16289 GemmMicrokernelTester()
16290 .mr(2)
16291 .nr(8)
16292 .kr(8)
16293 .sr(1)
16294 .m(m)
16295 .n(n)
16296 .k(k)
16297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016298 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016299 }
16300 }
16301 }
16302 }
16303
Frank Barcharde22685a2021-11-12 11:36:58 -080016304 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016305 TEST_REQUIRES_ARM_NEON;
16306 for (uint32_t n = 16; n <= 24; n += 8) {
16307 for (size_t k = 1; k <= 80; k += 17) {
16308 GemmMicrokernelTester()
16309 .mr(2)
16310 .nr(8)
16311 .kr(8)
16312 .sr(1)
16313 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016314 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070016315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016317 }
16318 }
16319 }
16320
Frank Barcharde22685a2021-11-12 11:36:58 -080016321 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016322 TEST_REQUIRES_ARM_NEON;
16323 for (uint32_t n = 16; n <= 24; n += 8) {
16324 for (size_t k = 1; k <= 80; k += 17) {
16325 GemmMicrokernelTester()
16326 .mr(2)
16327 .nr(8)
16328 .kr(8)
16329 .sr(1)
16330 .m(2)
16331 .n(n)
16332 .k(k)
16333 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016335 }
16336 }
16337 }
16338
Frank Barcharde22685a2021-11-12 11:36:58 -080016339 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016340 TEST_REQUIRES_ARM_NEON;
16341 for (uint32_t n = 16; n <= 24; n += 8) {
16342 for (size_t k = 1; k <= 80; k += 17) {
16343 GemmMicrokernelTester()
16344 .mr(2)
16345 .nr(8)
16346 .kr(8)
16347 .sr(1)
16348 .m(2)
16349 .n(n)
16350 .k(k)
16351 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016352 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016353 }
16354 }
16355 }
16356
Frank Barcharde22685a2021-11-12 11:36:58 -080016357 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016358 TEST_REQUIRES_ARM_NEON;
16359 for (uint32_t n = 16; n <= 24; n += 8) {
16360 for (size_t k = 1; k <= 80; k += 17) {
16361 for (uint32_t m = 1; m <= 2; m++) {
16362 GemmMicrokernelTester()
16363 .mr(2)
16364 .nr(8)
16365 .kr(8)
16366 .sr(1)
16367 .m(m)
16368 .n(n)
16369 .k(k)
16370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016372 }
16373 }
16374 }
16375 }
16376
Frank Barcharde22685a2021-11-12 11:36:58 -080016377 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016378 TEST_REQUIRES_ARM_NEON;
16379 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016380 for (uint32_t n = 1; n <= 8; n++) {
16381 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016382 GemmMicrokernelTester()
16383 .mr(2)
16384 .nr(8)
16385 .kr(8)
16386 .sr(1)
16387 .m(m)
16388 .n(n)
16389 .k(k)
16390 .cm_stride(11)
16391 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016393 }
16394 }
16395 }
16396 }
16397
Frank Barcharde22685a2021-11-12 11:36:58 -080016398 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016399 TEST_REQUIRES_ARM_NEON;
16400 GemmMicrokernelTester()
16401 .mr(2)
16402 .nr(8)
16403 .kr(8)
16404 .sr(1)
16405 .m(2)
16406 .n(8)
16407 .k(16)
16408 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016410 }
16411
Frank Barcharde22685a2021-11-12 11:36:58 -080016412 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016413 TEST_REQUIRES_ARM_NEON;
16414 GemmMicrokernelTester()
16415 .mr(2)
16416 .nr(8)
16417 .kr(8)
16418 .sr(1)
16419 .m(2)
16420 .n(8)
16421 .k(16)
16422 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016424 }
16425
Frank Barcharde22685a2021-11-12 11:36:58 -080016426 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016427 TEST_REQUIRES_ARM_NEON;
16428 GemmMicrokernelTester()
16429 .mr(2)
16430 .nr(8)
16431 .kr(8)
16432 .sr(1)
16433 .m(2)
16434 .n(8)
16435 .k(16)
16436 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016438 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080016439#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070016440
16441
Frank Barcharde31f29e2021-12-21 15:57:10 -080016442#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080016443 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016444 TEST_REQUIRES_ARM_NEON;
16445 GemmMicrokernelTester()
16446 .mr(2)
16447 .nr(8)
16448 .kr(16)
16449 .sr(1)
16450 .m(2)
16451 .n(8)
16452 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016454 }
16455
Frank Barcharde22685a2021-11-12 11:36:58 -080016456 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016457 TEST_REQUIRES_ARM_NEON;
16458 GemmMicrokernelTester()
16459 .mr(2)
16460 .nr(8)
16461 .kr(16)
16462 .sr(1)
16463 .m(2)
16464 .n(8)
16465 .k(16)
16466 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016467 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016468 }
16469
Frank Barcharde22685a2021-11-12 11:36:58 -080016470 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016471 TEST_REQUIRES_ARM_NEON;
16472 GemmMicrokernelTester()
16473 .mr(2)
16474 .nr(8)
16475 .kr(16)
16476 .sr(1)
16477 .m(2)
16478 .n(8)
16479 .k(16)
16480 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016482 }
16483
Frank Barcharde22685a2021-11-12 11:36:58 -080016484 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016485 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016486 for (uint32_t n = 1; n <= 8; n++) {
16487 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016488 GemmMicrokernelTester()
16489 .mr(2)
16490 .nr(8)
16491 .kr(16)
16492 .sr(1)
16493 .m(m)
16494 .n(n)
16495 .k(16)
16496 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016498 }
16499 }
16500 }
16501
Frank Barcharde22685a2021-11-12 11:36:58 -080016502 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016503 TEST_REQUIRES_ARM_NEON;
16504 for (uint32_t m = 1; m <= 2; m++) {
16505 GemmMicrokernelTester()
16506 .mr(2)
16507 .nr(8)
16508 .kr(16)
16509 .sr(1)
16510 .m(m)
16511 .n(8)
16512 .k(16)
16513 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016515 }
16516 }
16517
Frank Barcharde22685a2021-11-12 11:36:58 -080016518 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016519 TEST_REQUIRES_ARM_NEON;
16520 for (uint32_t n = 1; n <= 8; n++) {
16521 GemmMicrokernelTester()
16522 .mr(2)
16523 .nr(8)
16524 .kr(16)
16525 .sr(1)
16526 .m(2)
16527 .n(n)
16528 .k(16)
16529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016531 }
16532 }
16533
Frank Barcharde22685a2021-11-12 11:36:58 -080016534 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016535 TEST_REQUIRES_ARM_NEON;
16536 for (size_t k = 1; k < 16; k++) {
16537 GemmMicrokernelTester()
16538 .mr(2)
16539 .nr(8)
16540 .kr(16)
16541 .sr(1)
16542 .m(2)
16543 .n(8)
16544 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016546 }
16547 }
16548
Frank Barcharde22685a2021-11-12 11:36:58 -080016549 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016550 TEST_REQUIRES_ARM_NEON;
16551 for (size_t k = 1; k < 16; k++) {
16552 GemmMicrokernelTester()
16553 .mr(2)
16554 .nr(8)
16555 .kr(16)
16556 .sr(1)
16557 .m(2)
16558 .n(8)
16559 .k(k)
16560 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016562 }
16563 }
16564
Frank Barcharde22685a2021-11-12 11:36:58 -080016565 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016566 TEST_REQUIRES_ARM_NEON;
16567 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016568 for (uint32_t n = 1; n <= 8; n++) {
16569 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016570 GemmMicrokernelTester()
16571 .mr(2)
16572 .nr(8)
16573 .kr(16)
16574 .sr(1)
16575 .m(m)
16576 .n(n)
16577 .k(k)
16578 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016580 }
16581 }
16582 }
16583 }
16584
Frank Barcharde22685a2021-11-12 11:36:58 -080016585 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016586 TEST_REQUIRES_ARM_NEON;
16587 for (size_t k = 17; k < 32; k++) {
16588 GemmMicrokernelTester()
16589 .mr(2)
16590 .nr(8)
16591 .kr(16)
16592 .sr(1)
16593 .m(2)
16594 .n(8)
16595 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016597 }
16598 }
16599
Frank Barcharde22685a2021-11-12 11:36:58 -080016600 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016601 TEST_REQUIRES_ARM_NEON;
16602 for (size_t k = 17; k < 32; k++) {
16603 GemmMicrokernelTester()
16604 .mr(2)
16605 .nr(8)
16606 .kr(16)
16607 .sr(1)
16608 .m(2)
16609 .n(8)
16610 .k(k)
16611 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080016612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016613 }
16614 }
16615
Frank Barcharde22685a2021-11-12 11:36:58 -080016616 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016617 TEST_REQUIRES_ARM_NEON;
16618 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016619 for (uint32_t n = 1; n <= 8; n++) {
16620 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016621 GemmMicrokernelTester()
16622 .mr(2)
16623 .nr(8)
16624 .kr(16)
16625 .sr(1)
16626 .m(m)
16627 .n(n)
16628 .k(k)
16629 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016630 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016631 }
16632 }
16633 }
16634 }
16635
Frank Barcharde22685a2021-11-12 11:36:58 -080016636 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016637 TEST_REQUIRES_ARM_NEON;
16638 for (size_t k = 32; k <= 160; k += 16) {
16639 GemmMicrokernelTester()
16640 .mr(2)
16641 .nr(8)
16642 .kr(16)
16643 .sr(1)
16644 .m(2)
16645 .n(8)
16646 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016648 }
16649 }
16650
Frank Barcharde22685a2021-11-12 11:36:58 -080016651 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016652 TEST_REQUIRES_ARM_NEON;
16653 for (size_t k = 32; k <= 160; k += 16) {
16654 GemmMicrokernelTester()
16655 .mr(2)
16656 .nr(8)
16657 .kr(16)
16658 .sr(1)
16659 .m(2)
16660 .n(8)
16661 .k(k)
16662 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080016663 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016664 }
16665 }
16666
Frank Barcharde22685a2021-11-12 11:36:58 -080016667 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016668 TEST_REQUIRES_ARM_NEON;
16669 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016670 for (uint32_t n = 1; n <= 8; n++) {
16671 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016672 GemmMicrokernelTester()
16673 .mr(2)
16674 .nr(8)
16675 .kr(16)
16676 .sr(1)
16677 .m(m)
16678 .n(n)
16679 .k(k)
16680 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016681 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016682 }
16683 }
16684 }
16685 }
16686
Frank Barcharde22685a2021-11-12 11:36:58 -080016687 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016688 TEST_REQUIRES_ARM_NEON;
16689 for (uint32_t n = 9; n < 16; n++) {
16690 for (size_t k = 1; k <= 80; k += 17) {
16691 GemmMicrokernelTester()
16692 .mr(2)
16693 .nr(8)
16694 .kr(16)
16695 .sr(1)
16696 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016697 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070016698 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016700 }
16701 }
16702 }
16703
Frank Barcharde22685a2021-11-12 11:36:58 -080016704 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016705 TEST_REQUIRES_ARM_NEON;
16706 for (uint32_t n = 9; n < 16; n++) {
16707 for (size_t k = 1; k <= 80; k += 17) {
16708 GemmMicrokernelTester()
16709 .mr(2)
16710 .nr(8)
16711 .kr(16)
16712 .sr(1)
16713 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016714 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070016715 .k(k)
16716 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016718 }
16719 }
16720 }
16721
Frank Barcharde22685a2021-11-12 11:36:58 -080016722 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016723 TEST_REQUIRES_ARM_NEON;
16724 for (uint32_t n = 9; n < 16; n++) {
16725 for (size_t k = 1; k <= 80; k += 17) {
16726 GemmMicrokernelTester()
16727 .mr(2)
16728 .nr(8)
16729 .kr(16)
16730 .sr(1)
16731 .m(2)
16732 .n(n)
16733 .k(k)
16734 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016736 }
16737 }
16738 }
16739
Frank Barcharde22685a2021-11-12 11:36:58 -080016740 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016741 TEST_REQUIRES_ARM_NEON;
16742 for (uint32_t n = 9; n < 16; n++) {
16743 for (size_t k = 1; k <= 80; k += 17) {
16744 for (uint32_t m = 1; m <= 2; m++) {
16745 GemmMicrokernelTester()
16746 .mr(2)
16747 .nr(8)
16748 .kr(16)
16749 .sr(1)
16750 .m(m)
16751 .n(n)
16752 .k(k)
16753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016754 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016755 }
16756 }
16757 }
16758 }
16759
Frank Barcharde22685a2021-11-12 11:36:58 -080016760 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016761 TEST_REQUIRES_ARM_NEON;
16762 for (uint32_t n = 16; n <= 24; n += 8) {
16763 for (size_t k = 1; k <= 80; k += 17) {
16764 GemmMicrokernelTester()
16765 .mr(2)
16766 .nr(8)
16767 .kr(16)
16768 .sr(1)
16769 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016770 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070016771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016773 }
16774 }
16775 }
16776
Frank Barcharde22685a2021-11-12 11:36:58 -080016777 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016778 TEST_REQUIRES_ARM_NEON;
16779 for (uint32_t n = 16; n <= 24; n += 8) {
16780 for (size_t k = 1; k <= 80; k += 17) {
16781 GemmMicrokernelTester()
16782 .mr(2)
16783 .nr(8)
16784 .kr(16)
16785 .sr(1)
16786 .m(2)
16787 .n(n)
16788 .k(k)
16789 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016791 }
16792 }
16793 }
16794
Frank Barcharde22685a2021-11-12 11:36:58 -080016795 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016796 TEST_REQUIRES_ARM_NEON;
16797 for (uint32_t n = 16; n <= 24; n += 8) {
16798 for (size_t k = 1; k <= 80; k += 17) {
16799 GemmMicrokernelTester()
16800 .mr(2)
16801 .nr(8)
16802 .kr(16)
16803 .sr(1)
16804 .m(2)
16805 .n(n)
16806 .k(k)
16807 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016808 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016809 }
16810 }
16811 }
16812
Frank Barcharde22685a2021-11-12 11:36:58 -080016813 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016814 TEST_REQUIRES_ARM_NEON;
16815 for (uint32_t n = 16; n <= 24; n += 8) {
16816 for (size_t k = 1; k <= 80; k += 17) {
16817 for (uint32_t m = 1; m <= 2; m++) {
16818 GemmMicrokernelTester()
16819 .mr(2)
16820 .nr(8)
16821 .kr(16)
16822 .sr(1)
16823 .m(m)
16824 .n(n)
16825 .k(k)
16826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016828 }
16829 }
16830 }
16831 }
16832
Frank Barcharde22685a2021-11-12 11:36:58 -080016833 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016834 TEST_REQUIRES_ARM_NEON;
16835 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016836 for (uint32_t n = 1; n <= 8; n++) {
16837 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016838 GemmMicrokernelTester()
16839 .mr(2)
16840 .nr(8)
16841 .kr(16)
16842 .sr(1)
16843 .m(m)
16844 .n(n)
16845 .k(k)
16846 .cm_stride(11)
16847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016849 }
16850 }
16851 }
16852 }
16853
Frank Barcharde22685a2021-11-12 11:36:58 -080016854 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, qmin) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016855 TEST_REQUIRES_ARM_NEON;
16856 GemmMicrokernelTester()
16857 .mr(2)
16858 .nr(8)
16859 .kr(16)
16860 .sr(1)
16861 .m(2)
16862 .n(8)
16863 .k(16)
16864 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016866 }
16867
Frank Barcharde22685a2021-11-12 11:36:58 -080016868 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, qmax) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016869 TEST_REQUIRES_ARM_NEON;
16870 GemmMicrokernelTester()
16871 .mr(2)
16872 .nr(8)
16873 .kr(16)
16874 .sr(1)
16875 .m(2)
16876 .n(8)
16877 .k(16)
16878 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016880 }
16881
Frank Barcharde22685a2021-11-12 11:36:58 -080016882 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016883 TEST_REQUIRES_ARM_NEON;
16884 GemmMicrokernelTester()
16885 .mr(2)
16886 .nr(8)
16887 .kr(16)
16888 .sr(1)
16889 .m(2)
16890 .n(8)
16891 .k(16)
16892 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016894 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080016895#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070016896
16897
Frank Barcharde31f29e2021-12-21 15:57:10 -080016898#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080016899 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016900 TEST_REQUIRES_ARM_NEON;
16901 GemmMicrokernelTester()
16902 .mr(1)
16903 .nr(8)
16904 .kr(8)
16905 .sr(1)
16906 .m(1)
16907 .n(8)
16908 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016909 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016910 }
16911
Frank Barcharde22685a2021-11-12 11:36:58 -080016912 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016913 TEST_REQUIRES_ARM_NEON;
16914 GemmMicrokernelTester()
16915 .mr(1)
16916 .nr(8)
16917 .kr(8)
16918 .sr(1)
16919 .m(1)
16920 .n(8)
16921 .k(16)
16922 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016923 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016924 }
16925
Frank Barcharde22685a2021-11-12 11:36:58 -080016926 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016927 TEST_REQUIRES_ARM_NEON;
16928 GemmMicrokernelTester()
16929 .mr(1)
16930 .nr(8)
16931 .kr(8)
16932 .sr(1)
16933 .m(1)
16934 .n(8)
16935 .k(16)
16936 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016938 }
16939
Frank Barcharde22685a2021-11-12 11:36:58 -080016940 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016941 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016942 for (uint32_t n = 1; n <= 8; n++) {
16943 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016944 GemmMicrokernelTester()
16945 .mr(1)
16946 .nr(8)
16947 .kr(8)
16948 .sr(1)
16949 .m(m)
16950 .n(n)
16951 .k(16)
16952 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016954 }
16955 }
16956 }
16957
Frank Barcharde22685a2021-11-12 11:36:58 -080016958 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016959 TEST_REQUIRES_ARM_NEON;
16960 for (uint32_t m = 1; m <= 1; m++) {
16961 GemmMicrokernelTester()
16962 .mr(1)
16963 .nr(8)
16964 .kr(8)
16965 .sr(1)
16966 .m(m)
16967 .n(8)
16968 .k(16)
16969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016971 }
16972 }
16973
Frank Barcharde22685a2021-11-12 11:36:58 -080016974 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016975 TEST_REQUIRES_ARM_NEON;
16976 for (uint32_t n = 1; n <= 8; n++) {
16977 GemmMicrokernelTester()
16978 .mr(1)
16979 .nr(8)
16980 .kr(8)
16981 .sr(1)
16982 .m(1)
16983 .n(n)
16984 .k(16)
16985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016987 }
16988 }
16989
Frank Barcharde22685a2021-11-12 11:36:58 -080016990 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_lt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016991 TEST_REQUIRES_ARM_NEON;
16992 for (size_t k = 1; k < 16; k++) {
16993 GemmMicrokernelTester()
16994 .mr(1)
16995 .nr(8)
16996 .kr(8)
16997 .sr(1)
16998 .m(1)
16999 .n(8)
17000 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017002 }
17003 }
17004
Frank Barcharde22685a2021-11-12 11:36:58 -080017005 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_lt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017006 TEST_REQUIRES_ARM_NEON;
17007 for (size_t k = 1; k < 16; k++) {
17008 GemmMicrokernelTester()
17009 .mr(1)
17010 .nr(8)
17011 .kr(8)
17012 .sr(1)
17013 .m(1)
17014 .n(8)
17015 .k(k)
17016 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017018 }
17019 }
17020
Frank Barcharde22685a2021-11-12 11:36:58 -080017021 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017022 TEST_REQUIRES_ARM_NEON;
17023 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017024 for (uint32_t n = 1; n <= 8; n++) {
17025 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017026 GemmMicrokernelTester()
17027 .mr(1)
17028 .nr(8)
17029 .kr(8)
17030 .sr(1)
17031 .m(m)
17032 .n(n)
17033 .k(k)
17034 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017036 }
17037 }
17038 }
17039 }
17040
Frank Barcharde22685a2021-11-12 11:36:58 -080017041 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_gt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017042 TEST_REQUIRES_ARM_NEON;
17043 for (size_t k = 17; k < 32; k++) {
17044 GemmMicrokernelTester()
17045 .mr(1)
17046 .nr(8)
17047 .kr(8)
17048 .sr(1)
17049 .m(1)
17050 .n(8)
17051 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017053 }
17054 }
17055
Frank Barcharde22685a2021-11-12 11:36:58 -080017056 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_gt_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017057 TEST_REQUIRES_ARM_NEON;
17058 for (size_t k = 17; k < 32; k++) {
17059 GemmMicrokernelTester()
17060 .mr(1)
17061 .nr(8)
17062 .kr(8)
17063 .sr(1)
17064 .m(1)
17065 .n(8)
17066 .k(k)
17067 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080017068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017069 }
17070 }
17071
Frank Barcharde22685a2021-11-12 11:36:58 -080017072 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017073 TEST_REQUIRES_ARM_NEON;
17074 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017075 for (uint32_t n = 1; n <= 8; n++) {
17076 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017077 GemmMicrokernelTester()
17078 .mr(1)
17079 .nr(8)
17080 .kr(8)
17081 .sr(1)
17082 .m(m)
17083 .n(n)
17084 .k(k)
17085 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017086 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017087 }
17088 }
17089 }
17090 }
17091
Frank Barcharde22685a2021-11-12 11:36:58 -080017092 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_div_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017093 TEST_REQUIRES_ARM_NEON;
17094 for (size_t k = 32; k <= 160; k += 16) {
17095 GemmMicrokernelTester()
17096 .mr(1)
17097 .nr(8)
17098 .kr(8)
17099 .sr(1)
17100 .m(1)
17101 .n(8)
17102 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017104 }
17105 }
17106
Frank Barcharde22685a2021-11-12 11:36:58 -080017107 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_div_16_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017108 TEST_REQUIRES_ARM_NEON;
17109 for (size_t k = 32; k <= 160; k += 16) {
17110 GemmMicrokernelTester()
17111 .mr(1)
17112 .nr(8)
17113 .kr(8)
17114 .sr(1)
17115 .m(1)
17116 .n(8)
17117 .k(k)
17118 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080017119 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017120 }
17121 }
17122
Frank Barcharde22685a2021-11-12 11:36:58 -080017123 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017124 TEST_REQUIRES_ARM_NEON;
17125 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017126 for (uint32_t n = 1; n <= 8; n++) {
17127 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017128 GemmMicrokernelTester()
17129 .mr(1)
17130 .nr(8)
17131 .kr(8)
17132 .sr(1)
17133 .m(m)
17134 .n(n)
17135 .k(k)
17136 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017137 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017138 }
17139 }
17140 }
17141 }
17142
Frank Barcharde22685a2021-11-12 11:36:58 -080017143 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017144 TEST_REQUIRES_ARM_NEON;
17145 for (uint32_t n = 9; n < 16; n++) {
17146 for (size_t k = 1; k <= 80; k += 17) {
17147 GemmMicrokernelTester()
17148 .mr(1)
17149 .nr(8)
17150 .kr(8)
17151 .sr(1)
17152 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017153 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017154 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017155 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017156 }
17157 }
17158 }
17159
Frank Barcharde22685a2021-11-12 11:36:58 -080017160 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017161 TEST_REQUIRES_ARM_NEON;
17162 for (uint32_t n = 9; n < 16; n++) {
17163 for (size_t k = 1; k <= 80; k += 17) {
17164 GemmMicrokernelTester()
17165 .mr(1)
17166 .nr(8)
17167 .kr(8)
17168 .sr(1)
17169 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017170 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017171 .k(k)
17172 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017174 }
17175 }
17176 }
17177
Frank Barcharde22685a2021-11-12 11:36:58 -080017178 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017179 TEST_REQUIRES_ARM_NEON;
17180 for (uint32_t n = 9; n < 16; n++) {
17181 for (size_t k = 1; k <= 80; k += 17) {
17182 GemmMicrokernelTester()
17183 .mr(1)
17184 .nr(8)
17185 .kr(8)
17186 .sr(1)
17187 .m(1)
17188 .n(n)
17189 .k(k)
17190 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017192 }
17193 }
17194 }
17195
Frank Barcharde22685a2021-11-12 11:36:58 -080017196 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017197 TEST_REQUIRES_ARM_NEON;
17198 for (uint32_t n = 9; n < 16; n++) {
17199 for (size_t k = 1; k <= 80; k += 17) {
17200 for (uint32_t m = 1; m <= 1; m++) {
17201 GemmMicrokernelTester()
17202 .mr(1)
17203 .nr(8)
17204 .kr(8)
17205 .sr(1)
17206 .m(m)
17207 .n(n)
17208 .k(k)
17209 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017210 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017211 }
17212 }
17213 }
17214 }
17215
Frank Barcharde22685a2021-11-12 11:36:58 -080017216 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017217 TEST_REQUIRES_ARM_NEON;
17218 for (uint32_t n = 16; n <= 24; n += 8) {
17219 for (size_t k = 1; k <= 80; k += 17) {
17220 GemmMicrokernelTester()
17221 .mr(1)
17222 .nr(8)
17223 .kr(8)
17224 .sr(1)
17225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017226 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017229 }
17230 }
17231 }
17232
Frank Barcharde22685a2021-11-12 11:36:58 -080017233 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017234 TEST_REQUIRES_ARM_NEON;
17235 for (uint32_t n = 16; n <= 24; n += 8) {
17236 for (size_t k = 1; k <= 80; k += 17) {
17237 GemmMicrokernelTester()
17238 .mr(1)
17239 .nr(8)
17240 .kr(8)
17241 .sr(1)
17242 .m(1)
17243 .n(n)
17244 .k(k)
17245 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017247 }
17248 }
17249 }
17250
Frank Barcharde22685a2021-11-12 11:36:58 -080017251 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_a) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017252 TEST_REQUIRES_ARM_NEON;
17253 for (uint32_t n = 16; n <= 24; n += 8) {
17254 for (size_t k = 1; k <= 80; k += 17) {
17255 GemmMicrokernelTester()
17256 .mr(1)
17257 .nr(8)
17258 .kr(8)
17259 .sr(1)
17260 .m(1)
17261 .n(n)
17262 .k(k)
17263 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017264 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017265 }
17266 }
17267 }
17268
Frank Barcharde22685a2021-11-12 11:36:58 -080017269 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017270 TEST_REQUIRES_ARM_NEON;
17271 for (uint32_t n = 16; n <= 24; n += 8) {
17272 for (size_t k = 1; k <= 80; k += 17) {
17273 for (uint32_t m = 1; m <= 1; m++) {
17274 GemmMicrokernelTester()
17275 .mr(1)
17276 .nr(8)
17277 .kr(8)
17278 .sr(1)
17279 .m(m)
17280 .n(n)
17281 .k(k)
17282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017284 }
17285 }
17286 }
17287 }
17288
Frank Barcharde22685a2021-11-12 11:36:58 -080017289 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017290 TEST_REQUIRES_ARM_NEON;
17291 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017292 for (uint32_t n = 1; n <= 8; n++) {
17293 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017294 GemmMicrokernelTester()
17295 .mr(1)
17296 .nr(8)
17297 .kr(8)
17298 .sr(1)
17299 .m(m)
17300 .n(n)
17301 .k(k)
17302 .cm_stride(11)
17303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017305 }
17306 }
17307 }
17308 }
17309
Frank Barcharde22685a2021-11-12 11:36:58 -080017310 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, qmin) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017311 TEST_REQUIRES_ARM_NEON;
17312 GemmMicrokernelTester()
17313 .mr(1)
17314 .nr(8)
17315 .kr(8)
17316 .sr(1)
17317 .m(1)
17318 .n(8)
17319 .k(16)
17320 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017322 }
17323
Frank Barcharde22685a2021-11-12 11:36:58 -080017324 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, qmax) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017325 TEST_REQUIRES_ARM_NEON;
17326 GemmMicrokernelTester()
17327 .mr(1)
17328 .nr(8)
17329 .kr(8)
17330 .sr(1)
17331 .m(1)
17332 .n(8)
17333 .k(16)
17334 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017336 }
17337
Frank Barcharde22685a2021-11-12 11:36:58 -080017338 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, strided_cm) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017339 TEST_REQUIRES_ARM_NEON;
17340 GemmMicrokernelTester()
17341 .mr(1)
17342 .nr(8)
17343 .kr(8)
17344 .sr(1)
17345 .m(1)
17346 .n(8)
17347 .k(16)
17348 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017350 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080017351#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070017352
17353
Frank Barcharde31f29e2021-12-21 15:57:10 -080017354#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -080017355 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
17356 TEST_REQUIRES_ARM_NEON;
17357 GemmMicrokernelTester()
17358 .mr(4)
17359 .nr(16)
17360 .kr(1)
17361 .sr(1)
17362 .m(4)
17363 .n(16)
17364 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017365 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017366 }
17367
17368 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
17369 TEST_REQUIRES_ARM_NEON;
17370 GemmMicrokernelTester()
17371 .mr(4)
17372 .nr(16)
17373 .kr(1)
17374 .sr(1)
17375 .m(4)
17376 .n(16)
17377 .k(8)
17378 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017379 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017380 }
17381
17382 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
17383 TEST_REQUIRES_ARM_NEON;
17384 GemmMicrokernelTester()
17385 .mr(4)
17386 .nr(16)
17387 .kr(1)
17388 .sr(1)
17389 .m(4)
17390 .n(16)
17391 .k(8)
17392 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017394 }
17395
17396 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
17397 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017398 for (uint32_t n = 1; n <= 16; n++) {
17399 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017400 GemmMicrokernelTester()
17401 .mr(4)
17402 .nr(16)
17403 .kr(1)
17404 .sr(1)
17405 .m(m)
17406 .n(n)
17407 .k(8)
17408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017410 }
17411 }
17412 }
17413
17414 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
17415 TEST_REQUIRES_ARM_NEON;
17416 for (uint32_t m = 1; m <= 4; m++) {
17417 GemmMicrokernelTester()
17418 .mr(4)
17419 .nr(16)
17420 .kr(1)
17421 .sr(1)
17422 .m(m)
17423 .n(16)
17424 .k(8)
17425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017427 }
17428 }
17429
17430 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
17431 TEST_REQUIRES_ARM_NEON;
17432 for (uint32_t n = 1; n <= 16; n++) {
17433 GemmMicrokernelTester()
17434 .mr(4)
17435 .nr(16)
17436 .kr(1)
17437 .sr(1)
17438 .m(4)
17439 .n(n)
17440 .k(8)
17441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017443 }
17444 }
17445
17446 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
17447 TEST_REQUIRES_ARM_NEON;
17448 for (size_t k = 1; k < 8; k++) {
17449 GemmMicrokernelTester()
17450 .mr(4)
17451 .nr(16)
17452 .kr(1)
17453 .sr(1)
17454 .m(4)
17455 .n(16)
17456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017458 }
17459 }
17460
17461 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
17462 TEST_REQUIRES_ARM_NEON;
17463 for (size_t k = 1; k < 8; k++) {
17464 GemmMicrokernelTester()
17465 .mr(4)
17466 .nr(16)
17467 .kr(1)
17468 .sr(1)
17469 .m(4)
17470 .n(16)
17471 .k(k)
17472 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017474 }
17475 }
17476
17477 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
17478 TEST_REQUIRES_ARM_NEON;
17479 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017480 for (uint32_t n = 1; n <= 16; n++) {
17481 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017482 GemmMicrokernelTester()
17483 .mr(4)
17484 .nr(16)
17485 .kr(1)
17486 .sr(1)
17487 .m(m)
17488 .n(n)
17489 .k(k)
17490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017492 }
17493 }
17494 }
17495 }
17496
17497 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
17498 TEST_REQUIRES_ARM_NEON;
17499 for (size_t k = 9; k < 16; k++) {
17500 GemmMicrokernelTester()
17501 .mr(4)
17502 .nr(16)
17503 .kr(1)
17504 .sr(1)
17505 .m(4)
17506 .n(16)
17507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017509 }
17510 }
17511
17512 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
17513 TEST_REQUIRES_ARM_NEON;
17514 for (size_t k = 9; k < 16; k++) {
17515 GemmMicrokernelTester()
17516 .mr(4)
17517 .nr(16)
17518 .kr(1)
17519 .sr(1)
17520 .m(4)
17521 .n(16)
17522 .k(k)
17523 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017525 }
17526 }
17527
17528 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
17529 TEST_REQUIRES_ARM_NEON;
17530 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017531 for (uint32_t n = 1; n <= 16; n++) {
17532 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017533 GemmMicrokernelTester()
17534 .mr(4)
17535 .nr(16)
17536 .kr(1)
17537 .sr(1)
17538 .m(m)
17539 .n(n)
17540 .k(k)
17541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017542 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017543 }
17544 }
17545 }
17546 }
17547
17548 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
17549 TEST_REQUIRES_ARM_NEON;
17550 for (size_t k = 16; k <= 80; k += 8) {
17551 GemmMicrokernelTester()
17552 .mr(4)
17553 .nr(16)
17554 .kr(1)
17555 .sr(1)
17556 .m(4)
17557 .n(16)
17558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017560 }
17561 }
17562
17563 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
17564 TEST_REQUIRES_ARM_NEON;
17565 for (size_t k = 16; k <= 80; k += 8) {
17566 GemmMicrokernelTester()
17567 .mr(4)
17568 .nr(16)
17569 .kr(1)
17570 .sr(1)
17571 .m(4)
17572 .n(16)
17573 .k(k)
17574 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017575 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017576 }
17577 }
17578
17579 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
17580 TEST_REQUIRES_ARM_NEON;
17581 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017582 for (uint32_t n = 1; n <= 16; n++) {
17583 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017584 GemmMicrokernelTester()
17585 .mr(4)
17586 .nr(16)
17587 .kr(1)
17588 .sr(1)
17589 .m(m)
17590 .n(n)
17591 .k(k)
17592 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017593 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017594 }
17595 }
17596 }
17597 }
17598
17599 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
17600 TEST_REQUIRES_ARM_NEON;
17601 for (uint32_t n = 17; n < 32; n++) {
17602 for (size_t k = 1; k <= 40; k += 9) {
17603 GemmMicrokernelTester()
17604 .mr(4)
17605 .nr(16)
17606 .kr(1)
17607 .sr(1)
17608 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017609 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017610 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017611 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017612 }
17613 }
17614 }
17615
17616 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
17617 TEST_REQUIRES_ARM_NEON;
17618 for (uint32_t n = 17; n < 32; n++) {
17619 for (size_t k = 1; k <= 40; k += 9) {
17620 GemmMicrokernelTester()
17621 .mr(4)
17622 .nr(16)
17623 .kr(1)
17624 .sr(1)
17625 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017626 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017627 .k(k)
17628 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017630 }
17631 }
17632 }
17633
17634 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_a) {
17635 TEST_REQUIRES_ARM_NEON;
17636 for (uint32_t n = 17; n < 32; n++) {
17637 for (size_t k = 1; k <= 40; k += 9) {
17638 GemmMicrokernelTester()
17639 .mr(4)
17640 .nr(16)
17641 .kr(1)
17642 .sr(1)
17643 .m(4)
17644 .n(n)
17645 .k(k)
17646 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017648 }
17649 }
17650 }
17651
17652 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
17653 TEST_REQUIRES_ARM_NEON;
17654 for (uint32_t n = 17; n < 32; n++) {
17655 for (size_t k = 1; k <= 40; k += 9) {
17656 for (uint32_t m = 1; m <= 4; m++) {
17657 GemmMicrokernelTester()
17658 .mr(4)
17659 .nr(16)
17660 .kr(1)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017666 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017667 }
17668 }
17669 }
17670 }
17671
17672 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
17673 TEST_REQUIRES_ARM_NEON;
17674 for (uint32_t n = 32; n <= 48; n += 16) {
17675 for (size_t k = 1; k <= 40; k += 9) {
17676 GemmMicrokernelTester()
17677 .mr(4)
17678 .nr(16)
17679 .kr(1)
17680 .sr(1)
17681 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017682 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017685 }
17686 }
17687 }
17688
17689 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
17690 TEST_REQUIRES_ARM_NEON;
17691 for (uint32_t n = 32; n <= 48; n += 16) {
17692 for (size_t k = 1; k <= 40; k += 9) {
17693 GemmMicrokernelTester()
17694 .mr(4)
17695 .nr(16)
17696 .kr(1)
17697 .sr(1)
17698 .m(4)
17699 .n(n)
17700 .k(k)
17701 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017703 }
17704 }
17705 }
17706
17707 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_a) {
17708 TEST_REQUIRES_ARM_NEON;
17709 for (uint32_t n = 32; n <= 48; n += 16) {
17710 for (size_t k = 1; k <= 40; k += 9) {
17711 GemmMicrokernelTester()
17712 .mr(4)
17713 .nr(16)
17714 .kr(1)
17715 .sr(1)
17716 .m(4)
17717 .n(n)
17718 .k(k)
17719 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017720 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017721 }
17722 }
17723 }
17724
17725 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
17726 TEST_REQUIRES_ARM_NEON;
17727 for (uint32_t n = 32; n <= 48; n += 16) {
17728 for (size_t k = 1; k <= 40; k += 9) {
17729 for (uint32_t m = 1; m <= 4; m++) {
17730 GemmMicrokernelTester()
17731 .mr(4)
17732 .nr(16)
17733 .kr(1)
17734 .sr(1)
17735 .m(m)
17736 .n(n)
17737 .k(k)
17738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017740 }
17741 }
17742 }
17743 }
17744
17745 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
17746 TEST_REQUIRES_ARM_NEON;
17747 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017748 for (uint32_t n = 1; n <= 16; n++) {
17749 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017750 GemmMicrokernelTester()
17751 .mr(4)
17752 .nr(16)
17753 .kr(1)
17754 .sr(1)
17755 .m(m)
17756 .n(n)
17757 .k(k)
17758 .cm_stride(19)
17759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017761 }
17762 }
17763 }
17764 }
17765
17766 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
17767 TEST_REQUIRES_ARM_NEON;
17768 GemmMicrokernelTester()
17769 .mr(4)
17770 .nr(16)
17771 .kr(1)
17772 .sr(1)
17773 .m(4)
17774 .n(16)
17775 .k(8)
17776 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017778 }
17779
17780 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
17781 TEST_REQUIRES_ARM_NEON;
17782 GemmMicrokernelTester()
17783 .mr(4)
17784 .nr(16)
17785 .kr(1)
17786 .sr(1)
17787 .m(4)
17788 .n(16)
17789 .k(8)
17790 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017792 }
17793
17794 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
17795 TEST_REQUIRES_ARM_NEON;
17796 GemmMicrokernelTester()
17797 .mr(4)
17798 .nr(16)
17799 .kr(1)
17800 .sr(1)
17801 .m(4)
17802 .n(16)
17803 .k(8)
17804 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017806 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080017807#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -080017808
17809
Frank Barcharde31f29e2021-12-21 15:57:10 -080017810#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017811 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
17812 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017813 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017814 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017815 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017816 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017817 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017818 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017819 .n(16)
17820 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017821 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017822 }
17823
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017824 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
17825 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017826 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017827 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017828 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017829 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017830 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017831 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017832 .n(16)
17833 .k(8)
17834 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017835 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017836 }
17837
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017838 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
17839 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017840 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017841 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017842 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017843 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017844 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017845 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017846 .n(16)
17847 .k(8)
17848 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017850 }
17851
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017852 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
17853 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017854 for (uint32_t n = 1; n <= 16; n++) {
17855 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017856 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017857 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017858 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017859 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017860 .sr(1)
17861 .m(m)
17862 .n(n)
17863 .k(8)
17864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017866 }
17867 }
17868 }
17869
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017870 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
17871 TEST_REQUIRES_ARM_NEON_DOT;
17872 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017873 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017874 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017875 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017876 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017877 .sr(1)
17878 .m(m)
17879 .n(16)
17880 .k(8)
17881 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017883 }
17884 }
17885
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017886 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
17887 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017888 for (uint32_t n = 1; n <= 16; n++) {
17889 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017890 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017891 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017892 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017893 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017894 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017895 .n(n)
17896 .k(8)
17897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017899 }
17900 }
17901
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017902 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
17903 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017904 for (size_t k = 1; k < 8; k++) {
17905 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017906 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017907 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017908 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017909 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017910 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017911 .n(16)
17912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017914 }
17915 }
17916
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017917 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
17918 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017919 for (size_t k = 1; k < 8; k++) {
17920 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017921 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017922 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017923 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017924 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017925 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017926 .n(16)
17927 .k(k)
17928 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017930 }
17931 }
17932
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017933 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
17934 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017935 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017936 for (uint32_t n = 1; n <= 16; n++) {
17937 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017938 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017939 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017940 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017941 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017942 .sr(1)
17943 .m(m)
17944 .n(n)
17945 .k(k)
17946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017948 }
17949 }
17950 }
17951 }
17952
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017953 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
17954 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017955 for (size_t k = 9; k < 16; k++) {
17956 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017957 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017958 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017959 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017960 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017961 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017962 .n(16)
17963 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017965 }
17966 }
17967
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017968 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
17969 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017970 for (size_t k = 9; k < 16; k++) {
17971 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017972 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017973 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017974 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017975 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017976 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017977 .n(16)
17978 .k(k)
17979 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017981 }
17982 }
17983
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017984 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
17985 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080017986 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017987 for (uint32_t n = 1; n <= 16; n++) {
17988 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017989 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017990 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080017991 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017992 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080017993 .sr(1)
17994 .m(m)
17995 .n(n)
17996 .k(k)
17997 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017998 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017999 }
18000 }
18001 }
18002 }
18003
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018004 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
18005 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018006 for (size_t k = 16; k <= 80; k += 8) {
18007 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018008 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018009 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018010 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018011 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018012 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018013 .n(16)
18014 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018016 }
18017 }
18018
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018019 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
18020 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018021 for (size_t k = 16; k <= 80; k += 8) {
18022 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018023 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018024 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018025 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018026 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018027 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018028 .n(16)
18029 .k(k)
18030 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018031 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018032 }
18033 }
18034
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018035 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
18036 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018037 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018038 for (uint32_t n = 1; n <= 16; n++) {
18039 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080018040 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018041 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018042 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018043 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018044 .sr(1)
18045 .m(m)
18046 .n(n)
18047 .k(k)
18048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018049 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018050 }
18051 }
18052 }
18053 }
18054
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018055 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
18056 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018057 for (uint32_t n = 17; n < 32; n++) {
18058 for (size_t k = 1; k <= 40; k += 9) {
18059 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018060 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018061 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018062 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018063 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018064 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018065 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080018066 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018067 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018068 }
18069 }
18070 }
18071
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018072 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
18073 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018074 for (uint32_t n = 17; n < 32; n++) {
18075 for (size_t k = 1; k <= 40; k += 9) {
18076 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018077 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018078 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018079 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018080 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018081 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018082 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080018083 .k(k)
18084 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018086 }
18087 }
18088 }
18089
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018090 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
18091 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018092 for (uint32_t n = 17; n < 32; n++) {
18093 for (size_t k = 1; k <= 40; k += 9) {
18094 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018095 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018096 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018097 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018098 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018099 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018100 .n(n)
18101 .k(k)
18102 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018104 }
18105 }
18106 }
18107
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018108 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
18109 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018110 for (uint32_t n = 17; n < 32; n++) {
18111 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018112 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080018113 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018114 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018115 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018116 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018117 .sr(1)
18118 .m(m)
18119 .n(n)
18120 .k(k)
18121 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018122 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018123 }
18124 }
18125 }
18126 }
18127
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018128 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
18129 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018130 for (uint32_t n = 32; n <= 48; n += 16) {
18131 for (size_t k = 1; k <= 40; k += 9) {
18132 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018133 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018134 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018135 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018136 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018137 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018138 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080018139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018141 }
18142 }
18143 }
18144
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018145 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
18146 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018147 for (uint32_t n = 32; n <= 48; n += 16) {
18148 for (size_t k = 1; k <= 40; k += 9) {
18149 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018150 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018151 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018152 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018153 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018154 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018155 .n(n)
18156 .k(k)
18157 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018159 }
18160 }
18161 }
18162
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018163 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
18164 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018165 for (uint32_t n = 32; n <= 48; n += 16) {
18166 for (size_t k = 1; k <= 40; k += 9) {
18167 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018168 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018169 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018170 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018171 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018172 .m(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018173 .n(n)
18174 .k(k)
18175 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018176 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018177 }
18178 }
18179 }
18180
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018181 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
18182 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018183 for (uint32_t n = 32; n <= 48; n += 16) {
18184 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018185 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080018186 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018187 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018188 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018189 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018190 .sr(1)
18191 .m(m)
18192 .n(n)
18193 .k(k)
18194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018196 }
18197 }
18198 }
18199 }
18200
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018201 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
18202 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018203 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018204 for (uint32_t n = 1; n <= 16; n++) {
18205 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080018206 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018207 .mr(1)
Frank Barchard5cffb642021-11-22 13:59:43 -080018208 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018209 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018210 .sr(1)
18211 .m(m)
18212 .n(n)
18213 .k(k)
18214 .cm_stride(19)
18215 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018217 }
18218 }
18219 }
18220 }
18221
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018222 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, qmin) {
18223 TEST_REQUIRES_ARM_NEON_DOT;
18224 GemmMicrokernelTester()
18225 .mr(1)
18226 .nr(16)
18227 .kr(4)
18228 .sr(1)
18229 .m(1)
18230 .n(16)
18231 .k(8)
18232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018234 }
18235
18236 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, qmax) {
18237 TEST_REQUIRES_ARM_NEON_DOT;
18238 GemmMicrokernelTester()
18239 .mr(1)
18240 .nr(16)
18241 .kr(4)
18242 .sr(1)
18243 .m(1)
18244 .n(16)
18245 .k(8)
18246 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018248 }
18249
18250 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
18251 TEST_REQUIRES_ARM_NEON_DOT;
18252 GemmMicrokernelTester()
18253 .mr(1)
18254 .nr(16)
18255 .kr(4)
18256 .sr(1)
18257 .m(1)
18258 .n(16)
18259 .k(8)
18260 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018262 }
18263#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
18264
18265
18266#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
18267 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
18268 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018269 GemmMicrokernelTester()
18270 .mr(4)
18271 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018272 .kr(4)
18273 .sr(1)
18274 .m(4)
18275 .n(16)
18276 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018277 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018278 }
18279
18280 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
18281 TEST_REQUIRES_ARM_NEON_DOT;
18282 GemmMicrokernelTester()
18283 .mr(4)
18284 .nr(16)
18285 .kr(4)
18286 .sr(1)
18287 .m(4)
18288 .n(16)
18289 .k(8)
18290 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018291 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018292 }
18293
18294 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
18295 TEST_REQUIRES_ARM_NEON_DOT;
18296 GemmMicrokernelTester()
18297 .mr(4)
18298 .nr(16)
18299 .kr(4)
18300 .sr(1)
18301 .m(4)
18302 .n(16)
18303 .k(8)
18304 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018306 }
18307
18308 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
18309 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018310 for (uint32_t n = 1; n <= 16; n++) {
18311 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018312 GemmMicrokernelTester()
18313 .mr(4)
18314 .nr(16)
18315 .kr(4)
18316 .sr(1)
18317 .m(m)
18318 .n(n)
18319 .k(8)
18320 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018322 }
18323 }
18324 }
18325
18326 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
18327 TEST_REQUIRES_ARM_NEON_DOT;
18328 for (uint32_t m = 1; m <= 4; m++) {
18329 GemmMicrokernelTester()
18330 .mr(4)
18331 .nr(16)
18332 .kr(4)
18333 .sr(1)
18334 .m(m)
18335 .n(16)
18336 .k(8)
18337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018339 }
18340 }
18341
18342 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
18343 TEST_REQUIRES_ARM_NEON_DOT;
18344 for (uint32_t n = 1; n <= 16; n++) {
18345 GemmMicrokernelTester()
18346 .mr(4)
18347 .nr(16)
18348 .kr(4)
18349 .sr(1)
18350 .m(4)
18351 .n(n)
18352 .k(8)
18353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018355 }
18356 }
18357
18358 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
18359 TEST_REQUIRES_ARM_NEON_DOT;
18360 for (size_t k = 1; k < 8; k++) {
18361 GemmMicrokernelTester()
18362 .mr(4)
18363 .nr(16)
18364 .kr(4)
18365 .sr(1)
18366 .m(4)
18367 .n(16)
18368 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018370 }
18371 }
18372
18373 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
18374 TEST_REQUIRES_ARM_NEON_DOT;
18375 for (size_t k = 1; k < 8; k++) {
18376 GemmMicrokernelTester()
18377 .mr(4)
18378 .nr(16)
18379 .kr(4)
18380 .sr(1)
18381 .m(4)
18382 .n(16)
18383 .k(k)
18384 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018386 }
18387 }
18388
18389 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
18390 TEST_REQUIRES_ARM_NEON_DOT;
18391 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018392 for (uint32_t n = 1; n <= 16; n++) {
18393 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018394 GemmMicrokernelTester()
18395 .mr(4)
18396 .nr(16)
18397 .kr(4)
18398 .sr(1)
18399 .m(m)
18400 .n(n)
18401 .k(k)
18402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018403 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018404 }
18405 }
18406 }
18407 }
18408
18409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
18410 TEST_REQUIRES_ARM_NEON_DOT;
18411 for (size_t k = 9; k < 16; k++) {
18412 GemmMicrokernelTester()
18413 .mr(4)
18414 .nr(16)
18415 .kr(4)
18416 .sr(1)
18417 .m(4)
18418 .n(16)
18419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018421 }
18422 }
18423
18424 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
18425 TEST_REQUIRES_ARM_NEON_DOT;
18426 for (size_t k = 9; k < 16; k++) {
18427 GemmMicrokernelTester()
18428 .mr(4)
18429 .nr(16)
18430 .kr(4)
18431 .sr(1)
18432 .m(4)
18433 .n(16)
18434 .k(k)
18435 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018436 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018437 }
18438 }
18439
18440 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
18441 TEST_REQUIRES_ARM_NEON_DOT;
18442 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018443 for (uint32_t n = 1; n <= 16; n++) {
18444 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018445 GemmMicrokernelTester()
18446 .mr(4)
18447 .nr(16)
18448 .kr(4)
18449 .sr(1)
18450 .m(m)
18451 .n(n)
18452 .k(k)
18453 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018454 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018455 }
18456 }
18457 }
18458 }
18459
18460 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
18461 TEST_REQUIRES_ARM_NEON_DOT;
18462 for (size_t k = 16; k <= 80; k += 8) {
18463 GemmMicrokernelTester()
18464 .mr(4)
18465 .nr(16)
18466 .kr(4)
18467 .sr(1)
18468 .m(4)
18469 .n(16)
18470 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018472 }
18473 }
18474
18475 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
18476 TEST_REQUIRES_ARM_NEON_DOT;
18477 for (size_t k = 16; k <= 80; k += 8) {
18478 GemmMicrokernelTester()
18479 .mr(4)
18480 .nr(16)
18481 .kr(4)
18482 .sr(1)
18483 .m(4)
18484 .n(16)
18485 .k(k)
18486 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018487 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018488 }
18489 }
18490
18491 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
18492 TEST_REQUIRES_ARM_NEON_DOT;
18493 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018494 for (uint32_t n = 1; n <= 16; n++) {
18495 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018496 GemmMicrokernelTester()
18497 .mr(4)
18498 .nr(16)
18499 .kr(4)
18500 .sr(1)
18501 .m(m)
18502 .n(n)
18503 .k(k)
18504 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018505 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018506 }
18507 }
18508 }
18509 }
18510
18511 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
18512 TEST_REQUIRES_ARM_NEON_DOT;
18513 for (uint32_t n = 17; n < 32; n++) {
18514 for (size_t k = 1; k <= 40; k += 9) {
18515 GemmMicrokernelTester()
18516 .mr(4)
18517 .nr(16)
18518 .kr(4)
18519 .sr(1)
18520 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018521 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018522 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018523 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018524 }
18525 }
18526 }
18527
18528 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
18529 TEST_REQUIRES_ARM_NEON_DOT;
18530 for (uint32_t n = 17; n < 32; n++) {
18531 for (size_t k = 1; k <= 40; k += 9) {
18532 GemmMicrokernelTester()
18533 .mr(4)
18534 .nr(16)
18535 .kr(4)
18536 .sr(1)
18537 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018538 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018539 .k(k)
18540 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018542 }
18543 }
18544 }
18545
18546 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
18547 TEST_REQUIRES_ARM_NEON_DOT;
18548 for (uint32_t n = 17; n < 32; n++) {
18549 for (size_t k = 1; k <= 40; k += 9) {
18550 GemmMicrokernelTester()
18551 .mr(4)
18552 .nr(16)
18553 .kr(4)
18554 .sr(1)
18555 .m(4)
18556 .n(n)
18557 .k(k)
18558 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018560 }
18561 }
18562 }
18563
18564 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
18565 TEST_REQUIRES_ARM_NEON_DOT;
18566 for (uint32_t n = 17; n < 32; n++) {
18567 for (size_t k = 1; k <= 40; k += 9) {
18568 for (uint32_t m = 1; m <= 4; m++) {
18569 GemmMicrokernelTester()
18570 .mr(4)
18571 .nr(16)
18572 .kr(4)
18573 .sr(1)
18574 .m(m)
18575 .n(n)
18576 .k(k)
18577 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018578 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018579 }
18580 }
18581 }
18582 }
18583
18584 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
18585 TEST_REQUIRES_ARM_NEON_DOT;
18586 for (uint32_t n = 32; n <= 48; n += 16) {
18587 for (size_t k = 1; k <= 40; k += 9) {
18588 GemmMicrokernelTester()
18589 .mr(4)
18590 .nr(16)
18591 .kr(4)
18592 .sr(1)
18593 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018594 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018595 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018597 }
18598 }
18599 }
18600
18601 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
18602 TEST_REQUIRES_ARM_NEON_DOT;
18603 for (uint32_t n = 32; n <= 48; n += 16) {
18604 for (size_t k = 1; k <= 40; k += 9) {
18605 GemmMicrokernelTester()
18606 .mr(4)
18607 .nr(16)
18608 .kr(4)
18609 .sr(1)
18610 .m(4)
18611 .n(n)
18612 .k(k)
18613 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018615 }
18616 }
18617 }
18618
18619 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
18620 TEST_REQUIRES_ARM_NEON_DOT;
18621 for (uint32_t n = 32; n <= 48; n += 16) {
18622 for (size_t k = 1; k <= 40; k += 9) {
18623 GemmMicrokernelTester()
18624 .mr(4)
18625 .nr(16)
18626 .kr(4)
18627 .sr(1)
18628 .m(4)
18629 .n(n)
18630 .k(k)
18631 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018632 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018633 }
18634 }
18635 }
18636
18637 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
18638 TEST_REQUIRES_ARM_NEON_DOT;
18639 for (uint32_t n = 32; n <= 48; n += 16) {
18640 for (size_t k = 1; k <= 40; k += 9) {
18641 for (uint32_t m = 1; m <= 4; m++) {
18642 GemmMicrokernelTester()
18643 .mr(4)
18644 .nr(16)
18645 .kr(4)
18646 .sr(1)
18647 .m(m)
18648 .n(n)
18649 .k(k)
18650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018652 }
18653 }
18654 }
18655 }
18656
18657 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
18658 TEST_REQUIRES_ARM_NEON_DOT;
18659 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018660 for (uint32_t n = 1; n <= 16; n++) {
18661 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018662 GemmMicrokernelTester()
18663 .mr(4)
18664 .nr(16)
18665 .kr(4)
18666 .sr(1)
18667 .m(m)
18668 .n(n)
18669 .k(k)
18670 .cm_stride(19)
18671 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018673 }
18674 }
18675 }
18676 }
18677
18678 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
18679 TEST_REQUIRES_ARM_NEON_DOT;
18680 GemmMicrokernelTester()
18681 .mr(4)
18682 .nr(16)
18683 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018684 .sr(1)
18685 .m(4)
18686 .n(16)
18687 .k(8)
18688 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018690 }
18691
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018692 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
18693 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018694 GemmMicrokernelTester()
18695 .mr(4)
18696 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018697 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018698 .sr(1)
18699 .m(4)
18700 .n(16)
18701 .k(8)
18702 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018704 }
18705
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018706 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
18707 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard5cffb642021-11-22 13:59:43 -080018708 GemmMicrokernelTester()
18709 .mr(4)
18710 .nr(16)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018711 .kr(4)
Frank Barchard5cffb642021-11-22 13:59:43 -080018712 .sr(1)
18713 .m(4)
18714 .n(16)
18715 .k(8)
18716 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018718 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080018719#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -080018720
18721
Marat Dukhane903dff2021-07-16 19:43:41 -070018722#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080018723 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8) {
18724 TEST_REQUIRES_ARM_NEON;
18725 GemmMicrokernelTester()
18726 .mr(1)
18727 .nr(16)
18728 .kr(8)
18729 .sr(1)
18730 .m(1)
18731 .n(16)
18732 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018733 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018734 }
18735
18736 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cn) {
18737 TEST_REQUIRES_ARM_NEON;
18738 GemmMicrokernelTester()
18739 .mr(1)
18740 .nr(16)
18741 .kr(8)
18742 .sr(1)
18743 .m(1)
18744 .n(16)
18745 .k(8)
18746 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018747 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018748 }
18749
18750 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_strided_a) {
18751 TEST_REQUIRES_ARM_NEON;
18752 GemmMicrokernelTester()
18753 .mr(1)
18754 .nr(16)
18755 .kr(8)
18756 .sr(1)
18757 .m(1)
18758 .n(16)
18759 .k(8)
18760 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018762 }
18763
18764 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile) {
18765 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018766 for (uint32_t n = 1; n <= 16; n++) {
18767 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080018768 GemmMicrokernelTester()
18769 .mr(1)
18770 .nr(16)
18771 .kr(8)
18772 .sr(1)
18773 .m(m)
18774 .n(n)
18775 .k(8)
18776 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018778 }
18779 }
18780 }
18781
18782 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_m) {
18783 TEST_REQUIRES_ARM_NEON;
18784 for (uint32_t m = 1; m <= 1; m++) {
18785 GemmMicrokernelTester()
18786 .mr(1)
18787 .nr(16)
18788 .kr(8)
18789 .sr(1)
18790 .m(m)
18791 .n(16)
18792 .k(8)
18793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018795 }
18796 }
18797
18798 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_n) {
18799 TEST_REQUIRES_ARM_NEON;
18800 for (uint32_t n = 1; n <= 16; n++) {
18801 GemmMicrokernelTester()
18802 .mr(1)
18803 .nr(16)
18804 .kr(8)
18805 .sr(1)
18806 .m(1)
18807 .n(n)
18808 .k(8)
18809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018811 }
18812 }
18813
18814 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8) {
18815 TEST_REQUIRES_ARM_NEON;
18816 for (size_t k = 1; k < 8; k++) {
18817 GemmMicrokernelTester()
18818 .mr(1)
18819 .nr(16)
18820 .kr(8)
18821 .sr(1)
18822 .m(1)
18823 .n(16)
18824 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018826 }
18827 }
18828
18829 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8_strided_a) {
18830 TEST_REQUIRES_ARM_NEON;
18831 for (size_t k = 1; k < 8; k++) {
18832 GemmMicrokernelTester()
18833 .mr(1)
18834 .nr(16)
18835 .kr(8)
18836 .sr(1)
18837 .m(1)
18838 .n(16)
18839 .k(k)
18840 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018841 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018842 }
18843 }
18844
18845 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8_subtile) {
18846 TEST_REQUIRES_ARM_NEON;
18847 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018848 for (uint32_t n = 1; n <= 16; n++) {
18849 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080018850 GemmMicrokernelTester()
18851 .mr(1)
18852 .nr(16)
18853 .kr(8)
18854 .sr(1)
18855 .m(m)
18856 .n(n)
18857 .k(k)
18858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018859 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018860 }
18861 }
18862 }
18863 }
18864
18865 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8) {
18866 TEST_REQUIRES_ARM_NEON;
18867 for (size_t k = 9; k < 16; k++) {
18868 GemmMicrokernelTester()
18869 .mr(1)
18870 .nr(16)
18871 .kr(8)
18872 .sr(1)
18873 .m(1)
18874 .n(16)
18875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018877 }
18878 }
18879
18880 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8_strided_a) {
18881 TEST_REQUIRES_ARM_NEON;
18882 for (size_t k = 9; k < 16; k++) {
18883 GemmMicrokernelTester()
18884 .mr(1)
18885 .nr(16)
18886 .kr(8)
18887 .sr(1)
18888 .m(1)
18889 .n(16)
18890 .k(k)
18891 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018892 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018893 }
18894 }
18895
18896 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8_subtile) {
18897 TEST_REQUIRES_ARM_NEON;
18898 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018899 for (uint32_t n = 1; n <= 16; n++) {
18900 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080018901 GemmMicrokernelTester()
18902 .mr(1)
18903 .nr(16)
18904 .kr(8)
18905 .sr(1)
18906 .m(m)
18907 .n(n)
18908 .k(k)
18909 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018910 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018911 }
18912 }
18913 }
18914 }
18915
18916 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8) {
18917 TEST_REQUIRES_ARM_NEON;
18918 for (size_t k = 16; k <= 80; k += 8) {
18919 GemmMicrokernelTester()
18920 .mr(1)
18921 .nr(16)
18922 .kr(8)
18923 .sr(1)
18924 .m(1)
18925 .n(16)
18926 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018928 }
18929 }
18930
18931 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8_strided_a) {
18932 TEST_REQUIRES_ARM_NEON;
18933 for (size_t k = 16; k <= 80; k += 8) {
18934 GemmMicrokernelTester()
18935 .mr(1)
18936 .nr(16)
18937 .kr(8)
18938 .sr(1)
18939 .m(1)
18940 .n(16)
18941 .k(k)
18942 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018943 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018944 }
18945 }
18946
18947 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8_subtile) {
18948 TEST_REQUIRES_ARM_NEON;
18949 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018950 for (uint32_t n = 1; n <= 16; n++) {
18951 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080018952 GemmMicrokernelTester()
18953 .mr(1)
18954 .nr(16)
18955 .kr(8)
18956 .sr(1)
18957 .m(m)
18958 .n(n)
18959 .k(k)
18960 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018961 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018962 }
18963 }
18964 }
18965 }
18966
18967 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16) {
18968 TEST_REQUIRES_ARM_NEON;
18969 for (uint32_t n = 17; n < 32; n++) {
18970 for (size_t k = 1; k <= 40; k += 9) {
18971 GemmMicrokernelTester()
18972 .mr(1)
18973 .nr(16)
18974 .kr(8)
18975 .sr(1)
18976 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018977 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080018978 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018979 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018980 }
18981 }
18982 }
18983
18984 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_strided_cn) {
18985 TEST_REQUIRES_ARM_NEON;
18986 for (uint32_t n = 17; n < 32; n++) {
18987 for (size_t k = 1; k <= 40; k += 9) {
18988 GemmMicrokernelTester()
18989 .mr(1)
18990 .nr(16)
18991 .kr(8)
18992 .sr(1)
18993 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018994 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080018995 .k(k)
18996 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018998 }
18999 }
19000 }
19001
19002 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_strided_a) {
19003 TEST_REQUIRES_ARM_NEON;
19004 for (uint32_t n = 17; n < 32; n++) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(1)
19008 .nr(16)
19009 .kr(8)
19010 .sr(1)
19011 .m(1)
19012 .n(n)
19013 .k(k)
19014 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019016 }
19017 }
19018 }
19019
19020 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_subtile) {
19021 TEST_REQUIRES_ARM_NEON;
19022 for (uint32_t n = 17; n < 32; n++) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 1; m++) {
19025 GemmMicrokernelTester()
19026 .mr(1)
19027 .nr(16)
19028 .kr(8)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019034 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019035 }
19036 }
19037 }
19038 }
19039
19040 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16) {
19041 TEST_REQUIRES_ARM_NEON;
19042 for (uint32_t n = 32; n <= 48; n += 16) {
19043 for (size_t k = 1; k <= 40; k += 9) {
19044 GemmMicrokernelTester()
19045 .mr(1)
19046 .nr(16)
19047 .kr(8)
19048 .sr(1)
19049 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019050 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019051 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019053 }
19054 }
19055 }
19056
19057 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_strided_cn) {
19058 TEST_REQUIRES_ARM_NEON;
19059 for (uint32_t n = 32; n <= 48; n += 16) {
19060 for (size_t k = 1; k <= 40; k += 9) {
19061 GemmMicrokernelTester()
19062 .mr(1)
19063 .nr(16)
19064 .kr(8)
19065 .sr(1)
19066 .m(1)
19067 .n(n)
19068 .k(k)
19069 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019071 }
19072 }
19073 }
19074
19075 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_strided_a) {
19076 TEST_REQUIRES_ARM_NEON;
19077 for (uint32_t n = 32; n <= 48; n += 16) {
19078 for (size_t k = 1; k <= 40; k += 9) {
19079 GemmMicrokernelTester()
19080 .mr(1)
19081 .nr(16)
19082 .kr(8)
19083 .sr(1)
19084 .m(1)
19085 .n(n)
19086 .k(k)
19087 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019088 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019089 }
19090 }
19091 }
19092
19093 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_subtile) {
19094 TEST_REQUIRES_ARM_NEON;
19095 for (uint32_t n = 32; n <= 48; n += 16) {
19096 for (size_t k = 1; k <= 40; k += 9) {
19097 for (uint32_t m = 1; m <= 1; m++) {
19098 GemmMicrokernelTester()
19099 .mr(1)
19100 .nr(16)
19101 .kr(8)
19102 .sr(1)
19103 .m(m)
19104 .n(n)
19105 .k(k)
19106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019108 }
19109 }
19110 }
19111 }
19112
19113 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm_subtile) {
19114 TEST_REQUIRES_ARM_NEON;
19115 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019116 for (uint32_t n = 1; n <= 16; n++) {
19117 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019118 GemmMicrokernelTester()
19119 .mr(1)
19120 .nr(16)
19121 .kr(8)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(19)
19127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019128 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019129 }
19130 }
19131 }
19132 }
19133
19134 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmin) {
19135 TEST_REQUIRES_ARM_NEON;
19136 GemmMicrokernelTester()
19137 .mr(1)
19138 .nr(16)
19139 .kr(8)
19140 .sr(1)
19141 .m(1)
19142 .n(16)
19143 .k(8)
19144 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019146 }
19147
19148 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmax) {
19149 TEST_REQUIRES_ARM_NEON;
19150 GemmMicrokernelTester()
19151 .mr(1)
19152 .nr(16)
19153 .kr(8)
19154 .sr(1)
19155 .m(1)
19156 .n(16)
19157 .k(8)
19158 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019160 }
19161
19162 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm) {
19163 TEST_REQUIRES_ARM_NEON;
19164 GemmMicrokernelTester()
19165 .mr(1)
19166 .nr(16)
19167 .kr(8)
19168 .sr(1)
19169 .m(1)
19170 .n(16)
19171 .k(8)
19172 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019174 }
19175#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19176
19177
19178#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080019179 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8) {
19180 TEST_REQUIRES_ARM_NEON;
19181 GemmMicrokernelTester()
19182 .mr(3)
19183 .nr(16)
19184 .kr(8)
19185 .sr(1)
19186 .m(3)
19187 .n(16)
19188 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019189 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019190 }
19191
19192 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cn) {
19193 TEST_REQUIRES_ARM_NEON;
19194 GemmMicrokernelTester()
19195 .mr(3)
19196 .nr(16)
19197 .kr(8)
19198 .sr(1)
19199 .m(3)
19200 .n(16)
19201 .k(8)
19202 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019203 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019204 }
19205
19206 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_strided_a) {
19207 TEST_REQUIRES_ARM_NEON;
19208 GemmMicrokernelTester()
19209 .mr(3)
19210 .nr(16)
19211 .kr(8)
19212 .sr(1)
19213 .m(3)
19214 .n(16)
19215 .k(8)
19216 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019218 }
19219
19220 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile) {
19221 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019222 for (uint32_t n = 1; n <= 16; n++) {
19223 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019224 GemmMicrokernelTester()
19225 .mr(3)
19226 .nr(16)
19227 .kr(8)
19228 .sr(1)
19229 .m(m)
19230 .n(n)
19231 .k(8)
19232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019234 }
19235 }
19236 }
19237
19238 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_m) {
19239 TEST_REQUIRES_ARM_NEON;
19240 for (uint32_t m = 1; m <= 3; m++) {
19241 GemmMicrokernelTester()
19242 .mr(3)
19243 .nr(16)
19244 .kr(8)
19245 .sr(1)
19246 .m(m)
19247 .n(16)
19248 .k(8)
19249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019251 }
19252 }
19253
19254 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_n) {
19255 TEST_REQUIRES_ARM_NEON;
19256 for (uint32_t n = 1; n <= 16; n++) {
19257 GemmMicrokernelTester()
19258 .mr(3)
19259 .nr(16)
19260 .kr(8)
19261 .sr(1)
19262 .m(3)
19263 .n(n)
19264 .k(8)
19265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019267 }
19268 }
19269
19270 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8) {
19271 TEST_REQUIRES_ARM_NEON;
19272 for (size_t k = 1; k < 8; k++) {
19273 GemmMicrokernelTester()
19274 .mr(3)
19275 .nr(16)
19276 .kr(8)
19277 .sr(1)
19278 .m(3)
19279 .n(16)
19280 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019282 }
19283 }
19284
19285 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8_strided_a) {
19286 TEST_REQUIRES_ARM_NEON;
19287 for (size_t k = 1; k < 8; k++) {
19288 GemmMicrokernelTester()
19289 .mr(3)
19290 .nr(16)
19291 .kr(8)
19292 .sr(1)
19293 .m(3)
19294 .n(16)
19295 .k(k)
19296 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019297 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019298 }
19299 }
19300
19301 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8_subtile) {
19302 TEST_REQUIRES_ARM_NEON;
19303 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019304 for (uint32_t n = 1; n <= 16; n++) {
19305 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019306 GemmMicrokernelTester()
19307 .mr(3)
19308 .nr(16)
19309 .kr(8)
19310 .sr(1)
19311 .m(m)
19312 .n(n)
19313 .k(k)
19314 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019315 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019316 }
19317 }
19318 }
19319 }
19320
19321 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8) {
19322 TEST_REQUIRES_ARM_NEON;
19323 for (size_t k = 9; k < 16; k++) {
19324 GemmMicrokernelTester()
19325 .mr(3)
19326 .nr(16)
19327 .kr(8)
19328 .sr(1)
19329 .m(3)
19330 .n(16)
19331 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019333 }
19334 }
19335
19336 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8_strided_a) {
19337 TEST_REQUIRES_ARM_NEON;
19338 for (size_t k = 9; k < 16; k++) {
19339 GemmMicrokernelTester()
19340 .mr(3)
19341 .nr(16)
19342 .kr(8)
19343 .sr(1)
19344 .m(3)
19345 .n(16)
19346 .k(k)
19347 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019348 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019349 }
19350 }
19351
19352 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8_subtile) {
19353 TEST_REQUIRES_ARM_NEON;
19354 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019355 for (uint32_t n = 1; n <= 16; n++) {
19356 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019357 GemmMicrokernelTester()
19358 .mr(3)
19359 .nr(16)
19360 .kr(8)
19361 .sr(1)
19362 .m(m)
19363 .n(n)
19364 .k(k)
19365 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019366 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019367 }
19368 }
19369 }
19370 }
19371
19372 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8) {
19373 TEST_REQUIRES_ARM_NEON;
19374 for (size_t k = 16; k <= 80; k += 8) {
19375 GemmMicrokernelTester()
19376 .mr(3)
19377 .nr(16)
19378 .kr(8)
19379 .sr(1)
19380 .m(3)
19381 .n(16)
19382 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019384 }
19385 }
19386
19387 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8_strided_a) {
19388 TEST_REQUIRES_ARM_NEON;
19389 for (size_t k = 16; k <= 80; k += 8) {
19390 GemmMicrokernelTester()
19391 .mr(3)
19392 .nr(16)
19393 .kr(8)
19394 .sr(1)
19395 .m(3)
19396 .n(16)
19397 .k(k)
19398 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019399 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019400 }
19401 }
19402
19403 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8_subtile) {
19404 TEST_REQUIRES_ARM_NEON;
19405 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019406 for (uint32_t n = 1; n <= 16; n++) {
19407 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019408 GemmMicrokernelTester()
19409 .mr(3)
19410 .nr(16)
19411 .kr(8)
19412 .sr(1)
19413 .m(m)
19414 .n(n)
19415 .k(k)
19416 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019417 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019418 }
19419 }
19420 }
19421 }
19422
19423 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16) {
19424 TEST_REQUIRES_ARM_NEON;
19425 for (uint32_t n = 17; n < 32; n++) {
19426 for (size_t k = 1; k <= 40; k += 9) {
19427 GemmMicrokernelTester()
19428 .mr(3)
19429 .nr(16)
19430 .kr(8)
19431 .sr(1)
19432 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019433 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019434 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019435 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019436 }
19437 }
19438 }
19439
19440 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_strided_cn) {
19441 TEST_REQUIRES_ARM_NEON;
19442 for (uint32_t n = 17; n < 32; n++) {
19443 for (size_t k = 1; k <= 40; k += 9) {
19444 GemmMicrokernelTester()
19445 .mr(3)
19446 .nr(16)
19447 .kr(8)
19448 .sr(1)
19449 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019450 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019451 .k(k)
19452 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019454 }
19455 }
19456 }
19457
19458 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_strided_a) {
19459 TEST_REQUIRES_ARM_NEON;
19460 for (uint32_t n = 17; n < 32; n++) {
19461 for (size_t k = 1; k <= 40; k += 9) {
19462 GemmMicrokernelTester()
19463 .mr(3)
19464 .nr(16)
19465 .kr(8)
19466 .sr(1)
19467 .m(3)
19468 .n(n)
19469 .k(k)
19470 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019472 }
19473 }
19474 }
19475
19476 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_subtile) {
19477 TEST_REQUIRES_ARM_NEON;
19478 for (uint32_t n = 17; n < 32; n++) {
19479 for (size_t k = 1; k <= 40; k += 9) {
19480 for (uint32_t m = 1; m <= 3; m++) {
19481 GemmMicrokernelTester()
19482 .mr(3)
19483 .nr(16)
19484 .kr(8)
19485 .sr(1)
19486 .m(m)
19487 .n(n)
19488 .k(k)
19489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019490 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019491 }
19492 }
19493 }
19494 }
19495
19496 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16) {
19497 TEST_REQUIRES_ARM_NEON;
19498 for (uint32_t n = 32; n <= 48; n += 16) {
19499 for (size_t k = 1; k <= 40; k += 9) {
19500 GemmMicrokernelTester()
19501 .mr(3)
19502 .nr(16)
19503 .kr(8)
19504 .sr(1)
19505 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019506 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019509 }
19510 }
19511 }
19512
19513 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_strided_cn) {
19514 TEST_REQUIRES_ARM_NEON;
19515 for (uint32_t n = 32; n <= 48; n += 16) {
19516 for (size_t k = 1; k <= 40; k += 9) {
19517 GemmMicrokernelTester()
19518 .mr(3)
19519 .nr(16)
19520 .kr(8)
19521 .sr(1)
19522 .m(3)
19523 .n(n)
19524 .k(k)
19525 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019527 }
19528 }
19529 }
19530
19531 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_strided_a) {
19532 TEST_REQUIRES_ARM_NEON;
19533 for (uint32_t n = 32; n <= 48; n += 16) {
19534 for (size_t k = 1; k <= 40; k += 9) {
19535 GemmMicrokernelTester()
19536 .mr(3)
19537 .nr(16)
19538 .kr(8)
19539 .sr(1)
19540 .m(3)
19541 .n(n)
19542 .k(k)
19543 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019544 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019545 }
19546 }
19547 }
19548
19549 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_subtile) {
19550 TEST_REQUIRES_ARM_NEON;
19551 for (uint32_t n = 32; n <= 48; n += 16) {
19552 for (size_t k = 1; k <= 40; k += 9) {
19553 for (uint32_t m = 1; m <= 3; m++) {
19554 GemmMicrokernelTester()
19555 .mr(3)
19556 .nr(16)
19557 .kr(8)
19558 .sr(1)
19559 .m(m)
19560 .n(n)
19561 .k(k)
19562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019564 }
19565 }
19566 }
19567 }
19568
19569 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm_subtile) {
19570 TEST_REQUIRES_ARM_NEON;
19571 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019572 for (uint32_t n = 1; n <= 16; n++) {
19573 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019574 GemmMicrokernelTester()
19575 .mr(3)
19576 .nr(16)
19577 .kr(8)
19578 .sr(1)
19579 .m(m)
19580 .n(n)
19581 .k(k)
19582 .cm_stride(19)
19583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019584 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019585 }
19586 }
19587 }
19588 }
19589
19590 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmin) {
19591 TEST_REQUIRES_ARM_NEON;
19592 GemmMicrokernelTester()
19593 .mr(3)
19594 .nr(16)
19595 .kr(8)
19596 .sr(1)
19597 .m(3)
19598 .n(16)
19599 .k(8)
19600 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019602 }
19603
19604 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmax) {
19605 TEST_REQUIRES_ARM_NEON;
19606 GemmMicrokernelTester()
19607 .mr(3)
19608 .nr(16)
19609 .kr(8)
19610 .sr(1)
19611 .m(3)
19612 .n(16)
19613 .k(8)
19614 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019616 }
19617
19618 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm) {
19619 TEST_REQUIRES_ARM_NEON;
19620 GemmMicrokernelTester()
19621 .mr(3)
19622 .nr(16)
19623 .kr(8)
19624 .sr(1)
19625 .m(3)
19626 .n(16)
19627 .k(8)
19628 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019630 }
19631#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19632
19633
19634#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080019635 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16) {
19636 TEST_REQUIRES_ARM_NEON;
19637 GemmMicrokernelTester()
19638 .mr(3)
19639 .nr(8)
19640 .kr(8)
19641 .sr(1)
19642 .m(3)
19643 .n(8)
19644 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080019645 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019646 }
19647
19648 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cn) {
19649 TEST_REQUIRES_ARM_NEON;
19650 GemmMicrokernelTester()
19651 .mr(3)
19652 .nr(8)
19653 .kr(8)
19654 .sr(1)
19655 .m(3)
19656 .n(8)
19657 .k(16)
19658 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019659 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019660 }
19661
19662 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_strided_a) {
19663 TEST_REQUIRES_ARM_NEON;
19664 GemmMicrokernelTester()
19665 .mr(3)
19666 .nr(8)
19667 .kr(8)
19668 .sr(1)
19669 .m(3)
19670 .n(8)
19671 .k(16)
19672 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019674 }
19675
19676 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile) {
19677 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019678 for (uint32_t n = 1; n <= 8; n++) {
19679 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019680 GemmMicrokernelTester()
19681 .mr(3)
19682 .nr(8)
19683 .kr(8)
19684 .sr(1)
19685 .m(m)
19686 .n(n)
19687 .k(16)
19688 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019690 }
19691 }
19692 }
19693
19694 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile_m) {
19695 TEST_REQUIRES_ARM_NEON;
19696 for (uint32_t m = 1; m <= 3; m++) {
19697 GemmMicrokernelTester()
19698 .mr(3)
19699 .nr(8)
19700 .kr(8)
19701 .sr(1)
19702 .m(m)
19703 .n(8)
19704 .k(16)
19705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019707 }
19708 }
19709
19710 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_eq_16_subtile_n) {
19711 TEST_REQUIRES_ARM_NEON;
19712 for (uint32_t n = 1; n <= 8; n++) {
19713 GemmMicrokernelTester()
19714 .mr(3)
19715 .nr(8)
19716 .kr(8)
19717 .sr(1)
19718 .m(3)
19719 .n(n)
19720 .k(16)
19721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019723 }
19724 }
19725
19726 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16) {
19727 TEST_REQUIRES_ARM_NEON;
19728 for (size_t k = 1; k < 16; k++) {
19729 GemmMicrokernelTester()
19730 .mr(3)
19731 .nr(8)
19732 .kr(8)
19733 .sr(1)
19734 .m(3)
19735 .n(8)
19736 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019738 }
19739 }
19740
19741 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16_strided_a) {
19742 TEST_REQUIRES_ARM_NEON;
19743 for (size_t k = 1; k < 16; k++) {
19744 GemmMicrokernelTester()
19745 .mr(3)
19746 .nr(8)
19747 .kr(8)
19748 .sr(1)
19749 .m(3)
19750 .n(8)
19751 .k(k)
19752 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019753 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019754 }
19755 }
19756
19757 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_lt_16_subtile) {
19758 TEST_REQUIRES_ARM_NEON;
19759 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019760 for (uint32_t n = 1; n <= 8; n++) {
19761 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019762 GemmMicrokernelTester()
19763 .mr(3)
19764 .nr(8)
19765 .kr(8)
19766 .sr(1)
19767 .m(m)
19768 .n(n)
19769 .k(k)
19770 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019771 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019772 }
19773 }
19774 }
19775 }
19776
19777 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16) {
19778 TEST_REQUIRES_ARM_NEON;
19779 for (size_t k = 17; k < 32; k++) {
19780 GemmMicrokernelTester()
19781 .mr(3)
19782 .nr(8)
19783 .kr(8)
19784 .sr(1)
19785 .m(3)
19786 .n(8)
19787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019789 }
19790 }
19791
19792 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16_strided_a) {
19793 TEST_REQUIRES_ARM_NEON;
19794 for (size_t k = 17; k < 32; k++) {
19795 GemmMicrokernelTester()
19796 .mr(3)
19797 .nr(8)
19798 .kr(8)
19799 .sr(1)
19800 .m(3)
19801 .n(8)
19802 .k(k)
19803 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080019804 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019805 }
19806 }
19807
19808 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_gt_16_subtile) {
19809 TEST_REQUIRES_ARM_NEON;
19810 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019811 for (uint32_t n = 1; n <= 8; n++) {
19812 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019813 GemmMicrokernelTester()
19814 .mr(3)
19815 .nr(8)
19816 .kr(8)
19817 .sr(1)
19818 .m(m)
19819 .n(n)
19820 .k(k)
19821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019822 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019823 }
19824 }
19825 }
19826 }
19827
19828 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16) {
19829 TEST_REQUIRES_ARM_NEON;
19830 for (size_t k = 32; k <= 160; k += 16) {
19831 GemmMicrokernelTester()
19832 .mr(3)
19833 .nr(8)
19834 .kr(8)
19835 .sr(1)
19836 .m(3)
19837 .n(8)
19838 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019840 }
19841 }
19842
19843 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16_strided_a) {
19844 TEST_REQUIRES_ARM_NEON;
19845 for (size_t k = 32; k <= 160; k += 16) {
19846 GemmMicrokernelTester()
19847 .mr(3)
19848 .nr(8)
19849 .kr(8)
19850 .sr(1)
19851 .m(3)
19852 .n(8)
19853 .k(k)
19854 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080019855 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019856 }
19857 }
19858
19859 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, k_div_16_subtile) {
19860 TEST_REQUIRES_ARM_NEON;
19861 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019862 for (uint32_t n = 1; n <= 8; n++) {
19863 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019864 GemmMicrokernelTester()
19865 .mr(3)
19866 .nr(8)
19867 .kr(8)
19868 .sr(1)
19869 .m(m)
19870 .n(n)
19871 .k(k)
19872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019873 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019874 }
19875 }
19876 }
19877 }
19878
19879 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8) {
19880 TEST_REQUIRES_ARM_NEON;
19881 for (uint32_t n = 9; n < 16; n++) {
19882 for (size_t k = 1; k <= 80; k += 17) {
19883 GemmMicrokernelTester()
19884 .mr(3)
19885 .nr(8)
19886 .kr(8)
19887 .sr(1)
19888 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019889 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019890 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019891 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019892 }
19893 }
19894 }
19895
19896 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_strided_cn) {
19897 TEST_REQUIRES_ARM_NEON;
19898 for (uint32_t n = 9; n < 16; n++) {
19899 for (size_t k = 1; k <= 80; k += 17) {
19900 GemmMicrokernelTester()
19901 .mr(3)
19902 .nr(8)
19903 .kr(8)
19904 .sr(1)
19905 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019906 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019907 .k(k)
19908 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019909 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019910 }
19911 }
19912 }
19913
19914 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_strided_a) {
19915 TEST_REQUIRES_ARM_NEON;
19916 for (uint32_t n = 9; n < 16; n++) {
19917 for (size_t k = 1; k <= 80; k += 17) {
19918 GemmMicrokernelTester()
19919 .mr(3)
19920 .nr(8)
19921 .kr(8)
19922 .sr(1)
19923 .m(3)
19924 .n(n)
19925 .k(k)
19926 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019928 }
19929 }
19930 }
19931
19932 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_gt_8_subtile) {
19933 TEST_REQUIRES_ARM_NEON;
19934 for (uint32_t n = 9; n < 16; n++) {
19935 for (size_t k = 1; k <= 80; k += 17) {
19936 for (uint32_t m = 1; m <= 3; m++) {
19937 GemmMicrokernelTester()
19938 .mr(3)
19939 .nr(8)
19940 .kr(8)
19941 .sr(1)
19942 .m(m)
19943 .n(n)
19944 .k(k)
19945 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019946 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019947 }
19948 }
19949 }
19950 }
19951
19952 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8) {
19953 TEST_REQUIRES_ARM_NEON;
19954 for (uint32_t n = 16; n <= 24; n += 8) {
19955 for (size_t k = 1; k <= 80; k += 17) {
19956 GemmMicrokernelTester()
19957 .mr(3)
19958 .nr(8)
19959 .kr(8)
19960 .sr(1)
19961 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019962 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019963 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019965 }
19966 }
19967 }
19968
19969 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_strided_cn) {
19970 TEST_REQUIRES_ARM_NEON;
19971 for (uint32_t n = 16; n <= 24; n += 8) {
19972 for (size_t k = 1; k <= 80; k += 17) {
19973 GemmMicrokernelTester()
19974 .mr(3)
19975 .nr(8)
19976 .kr(8)
19977 .sr(1)
19978 .m(3)
19979 .n(n)
19980 .k(k)
19981 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019983 }
19984 }
19985 }
19986
19987 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_strided_a) {
19988 TEST_REQUIRES_ARM_NEON;
19989 for (uint32_t n = 16; n <= 24; n += 8) {
19990 for (size_t k = 1; k <= 80; k += 17) {
19991 GemmMicrokernelTester()
19992 .mr(3)
19993 .nr(8)
19994 .kr(8)
19995 .sr(1)
19996 .m(3)
19997 .n(n)
19998 .k(k)
19999 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020000 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020001 }
20002 }
20003 }
20004
20005 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, n_div_8_subtile) {
20006 TEST_REQUIRES_ARM_NEON;
20007 for (uint32_t n = 16; n <= 24; n += 8) {
20008 for (size_t k = 1; k <= 80; k += 17) {
20009 for (uint32_t m = 1; m <= 3; m++) {
20010 GemmMicrokernelTester()
20011 .mr(3)
20012 .nr(8)
20013 .kr(8)
20014 .sr(1)
20015 .m(m)
20016 .n(n)
20017 .k(k)
20018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020020 }
20021 }
20022 }
20023 }
20024
20025 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cm_subtile) {
20026 TEST_REQUIRES_ARM_NEON;
20027 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020028 for (uint32_t n = 1; n <= 8; n++) {
20029 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020030 GemmMicrokernelTester()
20031 .mr(3)
20032 .nr(8)
20033 .kr(8)
20034 .sr(1)
20035 .m(m)
20036 .n(n)
20037 .k(k)
20038 .cm_stride(11)
20039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020040 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020041 }
20042 }
20043 }
20044 }
20045
20046 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, qmin) {
20047 TEST_REQUIRES_ARM_NEON;
20048 GemmMicrokernelTester()
20049 .mr(3)
20050 .nr(8)
20051 .kr(8)
20052 .sr(1)
20053 .m(3)
20054 .n(8)
20055 .k(16)
20056 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020058 }
20059
20060 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, qmax) {
20061 TEST_REQUIRES_ARM_NEON;
20062 GemmMicrokernelTester()
20063 .mr(3)
20064 .nr(8)
20065 .kr(8)
20066 .sr(1)
20067 .m(3)
20068 .n(8)
20069 .k(16)
20070 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020072 }
20073
20074 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MLAL, strided_cm) {
20075 TEST_REQUIRES_ARM_NEON;
20076 GemmMicrokernelTester()
20077 .mr(3)
20078 .nr(8)
20079 .kr(8)
20080 .sr(1)
20081 .m(3)
20082 .n(8)
20083 .k(16)
20084 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020086 }
20087#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20088
20089
20090#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080020091 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_eq_16) {
20092 TEST_REQUIRES_ARM_NEON;
20093 GemmMicrokernelTester()
20094 .mr(2)
20095 .nr(16)
20096 .kr(8)
20097 .sr(1)
20098 .m(2)
20099 .n(16)
20100 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020101 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020102 }
20103
20104 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, strided_cn) {
20105 TEST_REQUIRES_ARM_NEON;
20106 GemmMicrokernelTester()
20107 .mr(2)
20108 .nr(16)
20109 .kr(8)
20110 .sr(1)
20111 .m(2)
20112 .n(16)
20113 .k(16)
20114 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020115 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020116 }
20117
20118 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_eq_16_strided_a) {
20119 TEST_REQUIRES_ARM_NEON;
20120 GemmMicrokernelTester()
20121 .mr(2)
20122 .nr(16)
20123 .kr(8)
20124 .sr(1)
20125 .m(2)
20126 .n(16)
20127 .k(16)
20128 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020130 }
20131
20132 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_eq_16_subtile) {
20133 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020134 for (uint32_t n = 1; n <= 16; n++) {
20135 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020136 GemmMicrokernelTester()
20137 .mr(2)
20138 .nr(16)
20139 .kr(8)
20140 .sr(1)
20141 .m(m)
20142 .n(n)
20143 .k(16)
20144 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020146 }
20147 }
20148 }
20149
20150 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_eq_16_subtile_m) {
20151 TEST_REQUIRES_ARM_NEON;
20152 for (uint32_t m = 1; m <= 2; m++) {
20153 GemmMicrokernelTester()
20154 .mr(2)
20155 .nr(16)
20156 .kr(8)
20157 .sr(1)
20158 .m(m)
20159 .n(16)
20160 .k(16)
20161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020163 }
20164 }
20165
20166 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_eq_16_subtile_n) {
20167 TEST_REQUIRES_ARM_NEON;
20168 for (uint32_t n = 1; n <= 16; n++) {
20169 GemmMicrokernelTester()
20170 .mr(2)
20171 .nr(16)
20172 .kr(8)
20173 .sr(1)
20174 .m(2)
20175 .n(n)
20176 .k(16)
20177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020179 }
20180 }
20181
20182 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_lt_16) {
20183 TEST_REQUIRES_ARM_NEON;
20184 for (size_t k = 1; k < 16; k++) {
20185 GemmMicrokernelTester()
20186 .mr(2)
20187 .nr(16)
20188 .kr(8)
20189 .sr(1)
20190 .m(2)
20191 .n(16)
20192 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020194 }
20195 }
20196
20197 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_lt_16_strided_a) {
20198 TEST_REQUIRES_ARM_NEON;
20199 for (size_t k = 1; k < 16; k++) {
20200 GemmMicrokernelTester()
20201 .mr(2)
20202 .nr(16)
20203 .kr(8)
20204 .sr(1)
20205 .m(2)
20206 .n(16)
20207 .k(k)
20208 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020209 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020210 }
20211 }
20212
20213 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_lt_16_subtile) {
20214 TEST_REQUIRES_ARM_NEON;
20215 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020216 for (uint32_t n = 1; n <= 16; n++) {
20217 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020218 GemmMicrokernelTester()
20219 .mr(2)
20220 .nr(16)
20221 .kr(8)
20222 .sr(1)
20223 .m(m)
20224 .n(n)
20225 .k(k)
20226 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020227 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020228 }
20229 }
20230 }
20231 }
20232
20233 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_gt_16) {
20234 TEST_REQUIRES_ARM_NEON;
20235 for (size_t k = 17; k < 32; k++) {
20236 GemmMicrokernelTester()
20237 .mr(2)
20238 .nr(16)
20239 .kr(8)
20240 .sr(1)
20241 .m(2)
20242 .n(16)
20243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020245 }
20246 }
20247
20248 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_gt_16_strided_a) {
20249 TEST_REQUIRES_ARM_NEON;
20250 for (size_t k = 17; k < 32; k++) {
20251 GemmMicrokernelTester()
20252 .mr(2)
20253 .nr(16)
20254 .kr(8)
20255 .sr(1)
20256 .m(2)
20257 .n(16)
20258 .k(k)
20259 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080020260 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020261 }
20262 }
20263
20264 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_gt_16_subtile) {
20265 TEST_REQUIRES_ARM_NEON;
20266 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020267 for (uint32_t n = 1; n <= 16; n++) {
20268 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020269 GemmMicrokernelTester()
20270 .mr(2)
20271 .nr(16)
20272 .kr(8)
20273 .sr(1)
20274 .m(m)
20275 .n(n)
20276 .k(k)
20277 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020278 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020279 }
20280 }
20281 }
20282 }
20283
20284 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_div_16) {
20285 TEST_REQUIRES_ARM_NEON;
20286 for (size_t k = 32; k <= 160; k += 16) {
20287 GemmMicrokernelTester()
20288 .mr(2)
20289 .nr(16)
20290 .kr(8)
20291 .sr(1)
20292 .m(2)
20293 .n(16)
20294 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020296 }
20297 }
20298
20299 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_div_16_strided_a) {
20300 TEST_REQUIRES_ARM_NEON;
20301 for (size_t k = 32; k <= 160; k += 16) {
20302 GemmMicrokernelTester()
20303 .mr(2)
20304 .nr(16)
20305 .kr(8)
20306 .sr(1)
20307 .m(2)
20308 .n(16)
20309 .k(k)
20310 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080020311 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020312 }
20313 }
20314
20315 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, k_div_16_subtile) {
20316 TEST_REQUIRES_ARM_NEON;
20317 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020318 for (uint32_t n = 1; n <= 16; n++) {
20319 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020320 GemmMicrokernelTester()
20321 .mr(2)
20322 .nr(16)
20323 .kr(8)
20324 .sr(1)
20325 .m(m)
20326 .n(n)
20327 .k(k)
20328 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020329 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020330 }
20331 }
20332 }
20333 }
20334
20335 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_gt_16) {
20336 TEST_REQUIRES_ARM_NEON;
20337 for (uint32_t n = 17; n < 32; n++) {
20338 for (size_t k = 1; k <= 80; k += 17) {
20339 GemmMicrokernelTester()
20340 .mr(2)
20341 .nr(16)
20342 .kr(8)
20343 .sr(1)
20344 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020345 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020346 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020347 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020348 }
20349 }
20350 }
20351
20352 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_gt_16_strided_cn) {
20353 TEST_REQUIRES_ARM_NEON;
20354 for (uint32_t n = 17; n < 32; n++) {
20355 for (size_t k = 1; k <= 80; k += 17) {
20356 GemmMicrokernelTester()
20357 .mr(2)
20358 .nr(16)
20359 .kr(8)
20360 .sr(1)
20361 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020362 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020363 .k(k)
20364 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020365 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020366 }
20367 }
20368 }
20369
20370 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_gt_16_strided_a) {
20371 TEST_REQUIRES_ARM_NEON;
20372 for (uint32_t n = 17; n < 32; n++) {
20373 for (size_t k = 1; k <= 80; k += 17) {
20374 GemmMicrokernelTester()
20375 .mr(2)
20376 .nr(16)
20377 .kr(8)
20378 .sr(1)
20379 .m(2)
20380 .n(n)
20381 .k(k)
20382 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020384 }
20385 }
20386 }
20387
20388 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_gt_16_subtile) {
20389 TEST_REQUIRES_ARM_NEON;
20390 for (uint32_t n = 17; n < 32; n++) {
20391 for (size_t k = 1; k <= 80; k += 17) {
20392 for (uint32_t m = 1; m <= 2; m++) {
20393 GemmMicrokernelTester()
20394 .mr(2)
20395 .nr(16)
20396 .kr(8)
20397 .sr(1)
20398 .m(m)
20399 .n(n)
20400 .k(k)
20401 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020402 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020403 }
20404 }
20405 }
20406 }
20407
20408 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_div_16) {
20409 TEST_REQUIRES_ARM_NEON;
20410 for (uint32_t n = 32; n <= 48; n += 16) {
20411 for (size_t k = 1; k <= 80; k += 17) {
20412 GemmMicrokernelTester()
20413 .mr(2)
20414 .nr(16)
20415 .kr(8)
20416 .sr(1)
20417 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020418 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020421 }
20422 }
20423 }
20424
20425 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_div_16_strided_cn) {
20426 TEST_REQUIRES_ARM_NEON;
20427 for (uint32_t n = 32; n <= 48; n += 16) {
20428 for (size_t k = 1; k <= 80; k += 17) {
20429 GemmMicrokernelTester()
20430 .mr(2)
20431 .nr(16)
20432 .kr(8)
20433 .sr(1)
20434 .m(2)
20435 .n(n)
20436 .k(k)
20437 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020439 }
20440 }
20441 }
20442
20443 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_div_16_strided_a) {
20444 TEST_REQUIRES_ARM_NEON;
20445 for (uint32_t n = 32; n <= 48; n += 16) {
20446 for (size_t k = 1; k <= 80; k += 17) {
20447 GemmMicrokernelTester()
20448 .mr(2)
20449 .nr(16)
20450 .kr(8)
20451 .sr(1)
20452 .m(2)
20453 .n(n)
20454 .k(k)
20455 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020456 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020457 }
20458 }
20459 }
20460
20461 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, n_div_16_subtile) {
20462 TEST_REQUIRES_ARM_NEON;
20463 for (uint32_t n = 32; n <= 48; n += 16) {
20464 for (size_t k = 1; k <= 80; k += 17) {
20465 for (uint32_t m = 1; m <= 2; m++) {
20466 GemmMicrokernelTester()
20467 .mr(2)
20468 .nr(16)
20469 .kr(8)
20470 .sr(1)
20471 .m(m)
20472 .n(n)
20473 .k(k)
20474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020476 }
20477 }
20478 }
20479 }
20480
20481 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, strided_cm_subtile) {
20482 TEST_REQUIRES_ARM_NEON;
20483 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020484 for (uint32_t n = 1; n <= 16; n++) {
20485 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020486 GemmMicrokernelTester()
20487 .mr(2)
20488 .nr(16)
20489 .kr(8)
20490 .sr(1)
20491 .m(m)
20492 .n(n)
20493 .k(k)
20494 .cm_stride(19)
20495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020496 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020497 }
20498 }
20499 }
20500 }
20501
20502 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, qmin) {
20503 TEST_REQUIRES_ARM_NEON;
20504 GemmMicrokernelTester()
20505 .mr(2)
20506 .nr(16)
20507 .kr(8)
20508 .sr(1)
20509 .m(2)
20510 .n(16)
20511 .k(16)
20512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020514 }
20515
20516 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, qmax) {
20517 TEST_REQUIRES_ARM_NEON;
20518 GemmMicrokernelTester()
20519 .mr(2)
20520 .nr(16)
20521 .kr(8)
20522 .sr(1)
20523 .m(2)
20524 .n(16)
20525 .k(16)
20526 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020528 }
20529
20530 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C8__NEON_MLAL, strided_cm) {
20531 TEST_REQUIRES_ARM_NEON;
20532 GemmMicrokernelTester()
20533 .mr(2)
20534 .nr(16)
20535 .kr(8)
20536 .sr(1)
20537 .m(2)
20538 .n(16)
20539 .k(16)
20540 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020542 }
20543#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20544
20545
20546#if XNN_ARCH_ARM || XNN_ARCH_ARM64
20547 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16) {
20548 TEST_REQUIRES_ARM_NEON;
20549 GemmMicrokernelTester()
20550 .mr(3)
20551 .nr(16)
20552 .kr(8)
20553 .sr(1)
20554 .m(3)
20555 .n(16)
20556 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080020557 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020558 }
20559
20560 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cn) {
20561 TEST_REQUIRES_ARM_NEON;
20562 GemmMicrokernelTester()
20563 .mr(3)
20564 .nr(16)
20565 .kr(8)
20566 .sr(1)
20567 .m(3)
20568 .n(16)
20569 .k(16)
20570 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020571 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020572 }
20573
20574 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_strided_a) {
20575 TEST_REQUIRES_ARM_NEON;
20576 GemmMicrokernelTester()
20577 .mr(3)
20578 .nr(16)
20579 .kr(8)
20580 .sr(1)
20581 .m(3)
20582 .n(16)
20583 .k(16)
20584 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020586 }
20587
20588 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile) {
20589 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020590 for (uint32_t n = 1; n <= 16; n++) {
20591 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020592 GemmMicrokernelTester()
20593 .mr(3)
20594 .nr(16)
20595 .kr(8)
20596 .sr(1)
20597 .m(m)
20598 .n(n)
20599 .k(16)
20600 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020602 }
20603 }
20604 }
20605
20606 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile_m) {
20607 TEST_REQUIRES_ARM_NEON;
20608 for (uint32_t m = 1; m <= 3; m++) {
20609 GemmMicrokernelTester()
20610 .mr(3)
20611 .nr(16)
20612 .kr(8)
20613 .sr(1)
20614 .m(m)
20615 .n(16)
20616 .k(16)
20617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020619 }
20620 }
20621
20622 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile_n) {
20623 TEST_REQUIRES_ARM_NEON;
20624 for (uint32_t n = 1; n <= 16; n++) {
20625 GemmMicrokernelTester()
20626 .mr(3)
20627 .nr(16)
20628 .kr(8)
20629 .sr(1)
20630 .m(3)
20631 .n(n)
20632 .k(16)
20633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020635 }
20636 }
20637
20638 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16) {
20639 TEST_REQUIRES_ARM_NEON;
20640 for (size_t k = 1; k < 16; k++) {
20641 GemmMicrokernelTester()
20642 .mr(3)
20643 .nr(16)
20644 .kr(8)
20645 .sr(1)
20646 .m(3)
20647 .n(16)
20648 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020650 }
20651 }
20652
20653 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16_strided_a) {
20654 TEST_REQUIRES_ARM_NEON;
20655 for (size_t k = 1; k < 16; k++) {
20656 GemmMicrokernelTester()
20657 .mr(3)
20658 .nr(16)
20659 .kr(8)
20660 .sr(1)
20661 .m(3)
20662 .n(16)
20663 .k(k)
20664 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020665 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020666 }
20667 }
20668
20669 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16_subtile) {
20670 TEST_REQUIRES_ARM_NEON;
20671 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020672 for (uint32_t n = 1; n <= 16; n++) {
20673 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020674 GemmMicrokernelTester()
20675 .mr(3)
20676 .nr(16)
20677 .kr(8)
20678 .sr(1)
20679 .m(m)
20680 .n(n)
20681 .k(k)
20682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020683 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020684 }
20685 }
20686 }
20687 }
20688
20689 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16) {
20690 TEST_REQUIRES_ARM_NEON;
20691 for (size_t k = 17; k < 32; k++) {
20692 GemmMicrokernelTester()
20693 .mr(3)
20694 .nr(16)
20695 .kr(8)
20696 .sr(1)
20697 .m(3)
20698 .n(16)
20699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020701 }
20702 }
20703
20704 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16_strided_a) {
20705 TEST_REQUIRES_ARM_NEON;
20706 for (size_t k = 17; k < 32; k++) {
20707 GemmMicrokernelTester()
20708 .mr(3)
20709 .nr(16)
20710 .kr(8)
20711 .sr(1)
20712 .m(3)
20713 .n(16)
20714 .k(k)
20715 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080020716 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020717 }
20718 }
20719
20720 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16_subtile) {
20721 TEST_REQUIRES_ARM_NEON;
20722 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020723 for (uint32_t n = 1; n <= 16; n++) {
20724 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020725 GemmMicrokernelTester()
20726 .mr(3)
20727 .nr(16)
20728 .kr(8)
20729 .sr(1)
20730 .m(m)
20731 .n(n)
20732 .k(k)
20733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020734 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020735 }
20736 }
20737 }
20738 }
20739
20740 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16) {
20741 TEST_REQUIRES_ARM_NEON;
20742 for (size_t k = 32; k <= 160; k += 16) {
20743 GemmMicrokernelTester()
20744 .mr(3)
20745 .nr(16)
20746 .kr(8)
20747 .sr(1)
20748 .m(3)
20749 .n(16)
20750 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020751 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020752 }
20753 }
20754
20755 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16_strided_a) {
20756 TEST_REQUIRES_ARM_NEON;
20757 for (size_t k = 32; k <= 160; k += 16) {
20758 GemmMicrokernelTester()
20759 .mr(3)
20760 .nr(16)
20761 .kr(8)
20762 .sr(1)
20763 .m(3)
20764 .n(16)
20765 .k(k)
20766 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080020767 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020768 }
20769 }
20770
20771 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16_subtile) {
20772 TEST_REQUIRES_ARM_NEON;
20773 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020774 for (uint32_t n = 1; n <= 16; n++) {
20775 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020776 GemmMicrokernelTester()
20777 .mr(3)
20778 .nr(16)
20779 .kr(8)
20780 .sr(1)
20781 .m(m)
20782 .n(n)
20783 .k(k)
20784 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020785 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020786 }
20787 }
20788 }
20789 }
20790
20791 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16) {
20792 TEST_REQUIRES_ARM_NEON;
20793 for (uint32_t n = 17; n < 32; n++) {
20794 for (size_t k = 1; k <= 80; k += 17) {
20795 GemmMicrokernelTester()
20796 .mr(3)
20797 .nr(16)
20798 .kr(8)
20799 .sr(1)
20800 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020801 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020802 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020803 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020804 }
20805 }
20806 }
20807
20808 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_strided_cn) {
20809 TEST_REQUIRES_ARM_NEON;
20810 for (uint32_t n = 17; n < 32; n++) {
20811 for (size_t k = 1; k <= 80; k += 17) {
20812 GemmMicrokernelTester()
20813 .mr(3)
20814 .nr(16)
20815 .kr(8)
20816 .sr(1)
20817 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020818 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020819 .k(k)
20820 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020821 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020822 }
20823 }
20824 }
20825
20826 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_strided_a) {
20827 TEST_REQUIRES_ARM_NEON;
20828 for (uint32_t n = 17; n < 32; n++) {
20829 for (size_t k = 1; k <= 80; k += 17) {
20830 GemmMicrokernelTester()
20831 .mr(3)
20832 .nr(16)
20833 .kr(8)
20834 .sr(1)
20835 .m(3)
20836 .n(n)
20837 .k(k)
20838 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020840 }
20841 }
20842 }
20843
20844 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_subtile) {
20845 TEST_REQUIRES_ARM_NEON;
20846 for (uint32_t n = 17; n < 32; n++) {
20847 for (size_t k = 1; k <= 80; k += 17) {
20848 for (uint32_t m = 1; m <= 3; m++) {
20849 GemmMicrokernelTester()
20850 .mr(3)
20851 .nr(16)
20852 .kr(8)
20853 .sr(1)
20854 .m(m)
20855 .n(n)
20856 .k(k)
20857 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020858 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020859 }
20860 }
20861 }
20862 }
20863
20864 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16) {
20865 TEST_REQUIRES_ARM_NEON;
20866 for (uint32_t n = 32; n <= 48; n += 16) {
20867 for (size_t k = 1; k <= 80; k += 17) {
20868 GemmMicrokernelTester()
20869 .mr(3)
20870 .nr(16)
20871 .kr(8)
20872 .sr(1)
20873 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020874 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020877 }
20878 }
20879 }
20880
20881 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_strided_cn) {
20882 TEST_REQUIRES_ARM_NEON;
20883 for (uint32_t n = 32; n <= 48; n += 16) {
20884 for (size_t k = 1; k <= 80; k += 17) {
20885 GemmMicrokernelTester()
20886 .mr(3)
20887 .nr(16)
20888 .kr(8)
20889 .sr(1)
20890 .m(3)
20891 .n(n)
20892 .k(k)
20893 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020895 }
20896 }
20897 }
20898
20899 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_strided_a) {
20900 TEST_REQUIRES_ARM_NEON;
20901 for (uint32_t n = 32; n <= 48; n += 16) {
20902 for (size_t k = 1; k <= 80; k += 17) {
20903 GemmMicrokernelTester()
20904 .mr(3)
20905 .nr(16)
20906 .kr(8)
20907 .sr(1)
20908 .m(3)
20909 .n(n)
20910 .k(k)
20911 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020912 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020913 }
20914 }
20915 }
20916
20917 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_subtile) {
20918 TEST_REQUIRES_ARM_NEON;
20919 for (uint32_t n = 32; n <= 48; n += 16) {
20920 for (size_t k = 1; k <= 80; k += 17) {
20921 for (uint32_t m = 1; m <= 3; m++) {
20922 GemmMicrokernelTester()
20923 .mr(3)
20924 .nr(16)
20925 .kr(8)
20926 .sr(1)
20927 .m(m)
20928 .n(n)
20929 .k(k)
20930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020932 }
20933 }
20934 }
20935 }
20936
20937 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cm_subtile) {
20938 TEST_REQUIRES_ARM_NEON;
20939 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020940 for (uint32_t n = 1; n <= 16; n++) {
20941 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020942 GemmMicrokernelTester()
20943 .mr(3)
20944 .nr(16)
20945 .kr(8)
20946 .sr(1)
20947 .m(m)
20948 .n(n)
20949 .k(k)
20950 .cm_stride(19)
20951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020952 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020953 }
20954 }
20955 }
20956 }
20957
20958 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, qmin) {
20959 TEST_REQUIRES_ARM_NEON;
20960 GemmMicrokernelTester()
20961 .mr(3)
20962 .nr(16)
20963 .kr(8)
20964 .sr(1)
20965 .m(3)
20966 .n(16)
20967 .k(16)
20968 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020970 }
20971
20972 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, qmax) {
20973 TEST_REQUIRES_ARM_NEON;
20974 GemmMicrokernelTester()
20975 .mr(3)
20976 .nr(16)
20977 .kr(8)
20978 .sr(1)
20979 .m(3)
20980 .n(16)
20981 .k(16)
20982 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020984 }
20985
20986 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cm) {
20987 TEST_REQUIRES_ARM_NEON;
20988 GemmMicrokernelTester()
20989 .mr(3)
20990 .nr(16)
20991 .kr(8)
20992 .sr(1)
20993 .m(3)
20994 .n(16)
20995 .k(16)
20996 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020998 }
20999#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21000
21001
21002#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080021003 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16) {
21004 TEST_REQUIRES_ARM_NEON;
21005 GemmMicrokernelTester()
21006 .mr(1)
21007 .nr(8)
21008 .kr(16)
21009 .sr(1)
21010 .m(1)
21011 .n(8)
21012 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080021013 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021014 }
21015
21016 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cn) {
21017 TEST_REQUIRES_ARM_NEON;
21018 GemmMicrokernelTester()
21019 .mr(1)
21020 .nr(8)
21021 .kr(16)
21022 .sr(1)
21023 .m(1)
21024 .n(8)
21025 .k(16)
21026 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021027 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021028 }
21029
21030 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_strided_a) {
21031 TEST_REQUIRES_ARM_NEON;
21032 GemmMicrokernelTester()
21033 .mr(1)
21034 .nr(8)
21035 .kr(16)
21036 .sr(1)
21037 .m(1)
21038 .n(8)
21039 .k(16)
21040 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021042 }
21043
21044 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile) {
21045 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021046 for (uint32_t n = 1; n <= 8; n++) {
21047 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021048 GemmMicrokernelTester()
21049 .mr(1)
21050 .nr(8)
21051 .kr(16)
21052 .sr(1)
21053 .m(m)
21054 .n(n)
21055 .k(16)
21056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021058 }
21059 }
21060 }
21061
21062 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile_m) {
21063 TEST_REQUIRES_ARM_NEON;
21064 for (uint32_t m = 1; m <= 1; m++) {
21065 GemmMicrokernelTester()
21066 .mr(1)
21067 .nr(8)
21068 .kr(16)
21069 .sr(1)
21070 .m(m)
21071 .n(8)
21072 .k(16)
21073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021075 }
21076 }
21077
21078 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile_n) {
21079 TEST_REQUIRES_ARM_NEON;
21080 for (uint32_t n = 1; n <= 8; n++) {
21081 GemmMicrokernelTester()
21082 .mr(1)
21083 .nr(8)
21084 .kr(16)
21085 .sr(1)
21086 .m(1)
21087 .n(n)
21088 .k(16)
21089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021091 }
21092 }
21093
21094 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16) {
21095 TEST_REQUIRES_ARM_NEON;
21096 for (size_t k = 1; k < 16; k++) {
21097 GemmMicrokernelTester()
21098 .mr(1)
21099 .nr(8)
21100 .kr(16)
21101 .sr(1)
21102 .m(1)
21103 .n(8)
21104 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021106 }
21107 }
21108
21109 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16_strided_a) {
21110 TEST_REQUIRES_ARM_NEON;
21111 for (size_t k = 1; k < 16; k++) {
21112 GemmMicrokernelTester()
21113 .mr(1)
21114 .nr(8)
21115 .kr(16)
21116 .sr(1)
21117 .m(1)
21118 .n(8)
21119 .k(k)
21120 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021121 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021122 }
21123 }
21124
21125 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16_subtile) {
21126 TEST_REQUIRES_ARM_NEON;
21127 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021128 for (uint32_t n = 1; n <= 8; n++) {
21129 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021130 GemmMicrokernelTester()
21131 .mr(1)
21132 .nr(8)
21133 .kr(16)
21134 .sr(1)
21135 .m(m)
21136 .n(n)
21137 .k(k)
21138 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021139 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021140 }
21141 }
21142 }
21143 }
21144
21145 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16) {
21146 TEST_REQUIRES_ARM_NEON;
21147 for (size_t k = 17; k < 32; k++) {
21148 GemmMicrokernelTester()
21149 .mr(1)
21150 .nr(8)
21151 .kr(16)
21152 .sr(1)
21153 .m(1)
21154 .n(8)
21155 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021157 }
21158 }
21159
21160 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16_strided_a) {
21161 TEST_REQUIRES_ARM_NEON;
21162 for (size_t k = 17; k < 32; k++) {
21163 GemmMicrokernelTester()
21164 .mr(1)
21165 .nr(8)
21166 .kr(16)
21167 .sr(1)
21168 .m(1)
21169 .n(8)
21170 .k(k)
21171 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080021172 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021173 }
21174 }
21175
21176 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16_subtile) {
21177 TEST_REQUIRES_ARM_NEON;
21178 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021179 for (uint32_t n = 1; n <= 8; n++) {
21180 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021181 GemmMicrokernelTester()
21182 .mr(1)
21183 .nr(8)
21184 .kr(16)
21185 .sr(1)
21186 .m(m)
21187 .n(n)
21188 .k(k)
21189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021190 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021191 }
21192 }
21193 }
21194 }
21195
21196 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16) {
21197 TEST_REQUIRES_ARM_NEON;
21198 for (size_t k = 32; k <= 160; k += 16) {
21199 GemmMicrokernelTester()
21200 .mr(1)
21201 .nr(8)
21202 .kr(16)
21203 .sr(1)
21204 .m(1)
21205 .n(8)
21206 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021207 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021208 }
21209 }
21210
21211 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16_strided_a) {
21212 TEST_REQUIRES_ARM_NEON;
21213 for (size_t k = 32; k <= 160; k += 16) {
21214 GemmMicrokernelTester()
21215 .mr(1)
21216 .nr(8)
21217 .kr(16)
21218 .sr(1)
21219 .m(1)
21220 .n(8)
21221 .k(k)
21222 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080021223 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021224 }
21225 }
21226
21227 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16_subtile) {
21228 TEST_REQUIRES_ARM_NEON;
21229 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021230 for (uint32_t n = 1; n <= 8; n++) {
21231 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021232 GemmMicrokernelTester()
21233 .mr(1)
21234 .nr(8)
21235 .kr(16)
21236 .sr(1)
21237 .m(m)
21238 .n(n)
21239 .k(k)
21240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021241 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021242 }
21243 }
21244 }
21245 }
21246
21247 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8) {
21248 TEST_REQUIRES_ARM_NEON;
21249 for (uint32_t n = 9; n < 16; n++) {
21250 for (size_t k = 1; k <= 80; k += 17) {
21251 GemmMicrokernelTester()
21252 .mr(1)
21253 .nr(8)
21254 .kr(16)
21255 .sr(1)
21256 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021257 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080021258 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021259 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021260 }
21261 }
21262 }
21263
21264 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_strided_cn) {
21265 TEST_REQUIRES_ARM_NEON;
21266 for (uint32_t n = 9; n < 16; n++) {
21267 for (size_t k = 1; k <= 80; k += 17) {
21268 GemmMicrokernelTester()
21269 .mr(1)
21270 .nr(8)
21271 .kr(16)
21272 .sr(1)
21273 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021274 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080021275 .k(k)
21276 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021277 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021278 }
21279 }
21280 }
21281
21282 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_strided_a) {
21283 TEST_REQUIRES_ARM_NEON;
21284 for (uint32_t n = 9; n < 16; n++) {
21285 for (size_t k = 1; k <= 80; k += 17) {
21286 GemmMicrokernelTester()
21287 .mr(1)
21288 .nr(8)
21289 .kr(16)
21290 .sr(1)
21291 .m(1)
21292 .n(n)
21293 .k(k)
21294 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021296 }
21297 }
21298 }
21299
21300 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_subtile) {
21301 TEST_REQUIRES_ARM_NEON;
21302 for (uint32_t n = 9; n < 16; n++) {
21303 for (size_t k = 1; k <= 80; k += 17) {
21304 for (uint32_t m = 1; m <= 1; m++) {
21305 GemmMicrokernelTester()
21306 .mr(1)
21307 .nr(8)
21308 .kr(16)
21309 .sr(1)
21310 .m(m)
21311 .n(n)
21312 .k(k)
21313 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021314 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021315 }
21316 }
21317 }
21318 }
21319
21320 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8) {
21321 TEST_REQUIRES_ARM_NEON;
21322 for (uint32_t n = 16; n <= 24; n += 8) {
21323 for (size_t k = 1; k <= 80; k += 17) {
21324 GemmMicrokernelTester()
21325 .mr(1)
21326 .nr(8)
21327 .kr(16)
21328 .sr(1)
21329 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021330 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080021331 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021333 }
21334 }
21335 }
21336
21337 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_strided_cn) {
21338 TEST_REQUIRES_ARM_NEON;
21339 for (uint32_t n = 16; n <= 24; n += 8) {
21340 for (size_t k = 1; k <= 80; k += 17) {
21341 GemmMicrokernelTester()
21342 .mr(1)
21343 .nr(8)
21344 .kr(16)
21345 .sr(1)
21346 .m(1)
21347 .n(n)
21348 .k(k)
21349 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021351 }
21352 }
21353 }
21354
21355 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_strided_a) {
21356 TEST_REQUIRES_ARM_NEON;
21357 for (uint32_t n = 16; n <= 24; n += 8) {
21358 for (size_t k = 1; k <= 80; k += 17) {
21359 GemmMicrokernelTester()
21360 .mr(1)
21361 .nr(8)
21362 .kr(16)
21363 .sr(1)
21364 .m(1)
21365 .n(n)
21366 .k(k)
21367 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021368 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021369 }
21370 }
21371 }
21372
21373 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_subtile) {
21374 TEST_REQUIRES_ARM_NEON;
21375 for (uint32_t n = 16; n <= 24; n += 8) {
21376 for (size_t k = 1; k <= 80; k += 17) {
21377 for (uint32_t m = 1; m <= 1; m++) {
21378 GemmMicrokernelTester()
21379 .mr(1)
21380 .nr(8)
21381 .kr(16)
21382 .sr(1)
21383 .m(m)
21384 .n(n)
21385 .k(k)
21386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021388 }
21389 }
21390 }
21391 }
21392
21393 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cm_subtile) {
21394 TEST_REQUIRES_ARM_NEON;
21395 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021396 for (uint32_t n = 1; n <= 8; n++) {
21397 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021398 GemmMicrokernelTester()
21399 .mr(1)
21400 .nr(8)
21401 .kr(16)
21402 .sr(1)
21403 .m(m)
21404 .n(n)
21405 .k(k)
21406 .cm_stride(11)
21407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021408 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021409 }
21410 }
21411 }
21412 }
21413
21414 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, qmin) {
21415 TEST_REQUIRES_ARM_NEON;
21416 GemmMicrokernelTester()
21417 .mr(1)
21418 .nr(8)
21419 .kr(16)
21420 .sr(1)
21421 .m(1)
21422 .n(8)
21423 .k(16)
21424 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021426 }
21427
21428 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, qmax) {
21429 TEST_REQUIRES_ARM_NEON;
21430 GemmMicrokernelTester()
21431 .mr(1)
21432 .nr(8)
21433 .kr(16)
21434 .sr(1)
21435 .m(1)
21436 .n(8)
21437 .k(16)
21438 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021440 }
21441
21442 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cm) {
21443 TEST_REQUIRES_ARM_NEON;
21444 GemmMicrokernelTester()
21445 .mr(1)
21446 .nr(8)
21447 .kr(16)
21448 .sr(1)
21449 .m(1)
21450 .n(8)
21451 .k(16)
21452 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021454 }
21455#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21456
21457
21458#if XNN_ARCH_ARM || XNN_ARCH_ARM64
21459 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16) {
21460 TEST_REQUIRES_ARM_NEON;
21461 GemmMicrokernelTester()
21462 .mr(2)
21463 .nr(8)
21464 .kr(16)
21465 .sr(1)
21466 .m(2)
21467 .n(8)
21468 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080021469 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021470 }
21471
21472 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cn) {
21473 TEST_REQUIRES_ARM_NEON;
21474 GemmMicrokernelTester()
21475 .mr(2)
21476 .nr(8)
21477 .kr(16)
21478 .sr(1)
21479 .m(2)
21480 .n(8)
21481 .k(16)
21482 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021483 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021484 }
21485
21486 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_strided_a) {
21487 TEST_REQUIRES_ARM_NEON;
21488 GemmMicrokernelTester()
21489 .mr(2)
21490 .nr(8)
21491 .kr(16)
21492 .sr(1)
21493 .m(2)
21494 .n(8)
21495 .k(16)
21496 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021498 }
21499
21500 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile) {
21501 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021502 for (uint32_t n = 1; n <= 8; n++) {
21503 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021504 GemmMicrokernelTester()
21505 .mr(2)
21506 .nr(8)
21507 .kr(16)
21508 .sr(1)
21509 .m(m)
21510 .n(n)
21511 .k(16)
21512 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021514 }
21515 }
21516 }
21517
21518 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile_m) {
21519 TEST_REQUIRES_ARM_NEON;
21520 for (uint32_t m = 1; m <= 2; m++) {
21521 GemmMicrokernelTester()
21522 .mr(2)
21523 .nr(8)
21524 .kr(16)
21525 .sr(1)
21526 .m(m)
21527 .n(8)
21528 .k(16)
21529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021531 }
21532 }
21533
21534 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile_n) {
21535 TEST_REQUIRES_ARM_NEON;
21536 for (uint32_t n = 1; n <= 8; n++) {
21537 GemmMicrokernelTester()
21538 .mr(2)
21539 .nr(8)
21540 .kr(16)
21541 .sr(1)
21542 .m(2)
21543 .n(n)
21544 .k(16)
21545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021547 }
21548 }
21549
21550 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16) {
21551 TEST_REQUIRES_ARM_NEON;
21552 for (size_t k = 1; k < 16; k++) {
21553 GemmMicrokernelTester()
21554 .mr(2)
21555 .nr(8)
21556 .kr(16)
21557 .sr(1)
21558 .m(2)
21559 .n(8)
21560 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021562 }
21563 }
21564
21565 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16_strided_a) {
21566 TEST_REQUIRES_ARM_NEON;
21567 for (size_t k = 1; k < 16; k++) {
21568 GemmMicrokernelTester()
21569 .mr(2)
21570 .nr(8)
21571 .kr(16)
21572 .sr(1)
21573 .m(2)
21574 .n(8)
21575 .k(k)
21576 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021577 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021578 }
21579 }
21580
21581 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16_subtile) {
21582 TEST_REQUIRES_ARM_NEON;
21583 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021584 for (uint32_t n = 1; n <= 8; n++) {
21585 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021586 GemmMicrokernelTester()
21587 .mr(2)
21588 .nr(8)
21589 .kr(16)
21590 .sr(1)
21591 .m(m)
21592 .n(n)
21593 .k(k)
21594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021595 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021596 }
21597 }
21598 }
21599 }
21600
21601 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16) {
21602 TEST_REQUIRES_ARM_NEON;
21603 for (size_t k = 17; k < 32; k++) {
21604 GemmMicrokernelTester()
21605 .mr(2)
21606 .nr(8)
21607 .kr(16)
21608 .sr(1)
21609 .m(2)
21610 .n(8)
21611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021613 }
21614 }
21615
21616 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16_strided_a) {
21617 TEST_REQUIRES_ARM_NEON;
21618 for (size_t k = 17; k < 32; k++) {
21619 GemmMicrokernelTester()
21620 .mr(2)
21621 .nr(8)
21622 .kr(16)
21623 .sr(1)
21624 .m(2)
21625 .n(8)
21626 .k(k)
21627 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080021628 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021629 }
21630 }
21631
21632 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16_subtile) {
21633 TEST_REQUIRES_ARM_NEON;
21634 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021635 for (uint32_t n = 1; n <= 8; n++) {
21636 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021637 GemmMicrokernelTester()
21638 .mr(2)
21639 .nr(8)
21640 .kr(16)
21641 .sr(1)
21642 .m(m)
21643 .n(n)
21644 .k(k)
21645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021646 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021647 }
21648 }
21649 }
21650 }
21651
21652 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16) {
21653 TEST_REQUIRES_ARM_NEON;
21654 for (size_t k = 32; k <= 160; k += 16) {
21655 GemmMicrokernelTester()
21656 .mr(2)
21657 .nr(8)
21658 .kr(16)
21659 .sr(1)
21660 .m(2)
21661 .n(8)
21662 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021663 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021664 }
21665 }
21666
21667 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16_strided_a) {
21668 TEST_REQUIRES_ARM_NEON;
21669 for (size_t k = 32; k <= 160; k += 16) {
21670 GemmMicrokernelTester()
21671 .mr(2)
21672 .nr(8)
21673 .kr(16)
21674 .sr(1)
21675 .m(2)
21676 .n(8)
21677 .k(k)
21678 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080021679 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021680 }
21681 }
21682
21683 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16_subtile) {
21684 TEST_REQUIRES_ARM_NEON;
21685 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021686 for (uint32_t n = 1; n <= 8; n++) {
21687 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021688 GemmMicrokernelTester()
21689 .mr(2)
21690 .nr(8)
21691 .kr(16)
21692 .sr(1)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021697 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021698 }
21699 }
21700 }
21701 }
21702
21703 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8) {
21704 TEST_REQUIRES_ARM_NEON;
21705 for (uint32_t n = 9; n < 16; n++) {
21706 for (size_t k = 1; k <= 80; k += 17) {
21707 GemmMicrokernelTester()
21708 .mr(2)
21709 .nr(8)
21710 .kr(16)
21711 .sr(1)
21712 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021713 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080021714 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021715 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021716 }
21717 }
21718 }
21719
21720 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_strided_cn) {
21721 TEST_REQUIRES_ARM_NEON;
21722 for (uint32_t n = 9; n < 16; n++) {
21723 for (size_t k = 1; k <= 80; k += 17) {
21724 GemmMicrokernelTester()
21725 .mr(2)
21726 .nr(8)
21727 .kr(16)
21728 .sr(1)
21729 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021730 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080021731 .k(k)
21732 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021733 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021734 }
21735 }
21736 }
21737
21738 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_strided_a) {
21739 TEST_REQUIRES_ARM_NEON;
21740 for (uint32_t n = 9; n < 16; n++) {
21741 for (size_t k = 1; k <= 80; k += 17) {
21742 GemmMicrokernelTester()
21743 .mr(2)
21744 .nr(8)
21745 .kr(16)
21746 .sr(1)
21747 .m(2)
21748 .n(n)
21749 .k(k)
21750 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021751 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021752 }
21753 }
21754 }
21755
21756 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_subtile) {
21757 TEST_REQUIRES_ARM_NEON;
21758 for (uint32_t n = 9; n < 16; n++) {
21759 for (size_t k = 1; k <= 80; k += 17) {
21760 for (uint32_t m = 1; m <= 2; m++) {
21761 GemmMicrokernelTester()
21762 .mr(2)
21763 .nr(8)
21764 .kr(16)
21765 .sr(1)
21766 .m(m)
21767 .n(n)
21768 .k(k)
21769 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021770 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021771 }
21772 }
21773 }
21774 }
21775
21776 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8) {
21777 TEST_REQUIRES_ARM_NEON;
21778 for (uint32_t n = 16; n <= 24; n += 8) {
21779 for (size_t k = 1; k <= 80; k += 17) {
21780 GemmMicrokernelTester()
21781 .mr(2)
21782 .nr(8)
21783 .kr(16)
21784 .sr(1)
21785 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021786 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080021787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021789 }
21790 }
21791 }
21792
21793 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_strided_cn) {
21794 TEST_REQUIRES_ARM_NEON;
21795 for (uint32_t n = 16; n <= 24; n += 8) {
21796 for (size_t k = 1; k <= 80; k += 17) {
21797 GemmMicrokernelTester()
21798 .mr(2)
21799 .nr(8)
21800 .kr(16)
21801 .sr(1)
21802 .m(2)
21803 .n(n)
21804 .k(k)
21805 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021807 }
21808 }
21809 }
21810
21811 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_strided_a) {
21812 TEST_REQUIRES_ARM_NEON;
21813 for (uint32_t n = 16; n <= 24; n += 8) {
21814 for (size_t k = 1; k <= 80; k += 17) {
21815 GemmMicrokernelTester()
21816 .mr(2)
21817 .nr(8)
21818 .kr(16)
21819 .sr(1)
21820 .m(2)
21821 .n(n)
21822 .k(k)
21823 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021824 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021825 }
21826 }
21827 }
21828
21829 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_subtile) {
21830 TEST_REQUIRES_ARM_NEON;
21831 for (uint32_t n = 16; n <= 24; n += 8) {
21832 for (size_t k = 1; k <= 80; k += 17) {
21833 for (uint32_t m = 1; m <= 2; m++) {
21834 GemmMicrokernelTester()
21835 .mr(2)
21836 .nr(8)
21837 .kr(16)
21838 .sr(1)
21839 .m(m)
21840 .n(n)
21841 .k(k)
21842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021844 }
21845 }
21846 }
21847 }
21848
21849 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cm_subtile) {
21850 TEST_REQUIRES_ARM_NEON;
21851 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021852 for (uint32_t n = 1; n <= 8; n++) {
21853 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021854 GemmMicrokernelTester()
21855 .mr(2)
21856 .nr(8)
21857 .kr(16)
21858 .sr(1)
21859 .m(m)
21860 .n(n)
21861 .k(k)
21862 .cm_stride(11)
21863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021864 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021865 }
21866 }
21867 }
21868 }
21869
21870 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, qmin) {
21871 TEST_REQUIRES_ARM_NEON;
21872 GemmMicrokernelTester()
21873 .mr(2)
21874 .nr(8)
21875 .kr(16)
21876 .sr(1)
21877 .m(2)
21878 .n(8)
21879 .k(16)
21880 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021882 }
21883
21884 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, qmax) {
21885 TEST_REQUIRES_ARM_NEON;
21886 GemmMicrokernelTester()
21887 .mr(2)
21888 .nr(8)
21889 .kr(16)
21890 .sr(1)
21891 .m(2)
21892 .n(8)
21893 .k(16)
21894 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021896 }
21897
21898 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cm) {
21899 TEST_REQUIRES_ARM_NEON;
21900 GemmMicrokernelTester()
21901 .mr(2)
21902 .nr(8)
21903 .kr(16)
21904 .sr(1)
21905 .m(2)
21906 .n(8)
21907 .k(16)
21908 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021909 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021910 }
21911#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21912
21913
21914#if XNN_ARCH_ARM || XNN_ARCH_ARM64
21915 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16) {
21916 TEST_REQUIRES_ARM_NEON;
21917 GemmMicrokernelTester()
21918 .mr(3)
21919 .nr(8)
21920 .kr(16)
21921 .sr(1)
21922 .m(3)
21923 .n(8)
21924 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080021925 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021926 }
21927
21928 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cn) {
21929 TEST_REQUIRES_ARM_NEON;
21930 GemmMicrokernelTester()
21931 .mr(3)
21932 .nr(8)
21933 .kr(16)
21934 .sr(1)
21935 .m(3)
21936 .n(8)
21937 .k(16)
21938 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021939 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021940 }
21941
21942 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_strided_a) {
21943 TEST_REQUIRES_ARM_NEON;
21944 GemmMicrokernelTester()
21945 .mr(3)
21946 .nr(8)
21947 .kr(16)
21948 .sr(1)
21949 .m(3)
21950 .n(8)
21951 .k(16)
21952 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021954 }
21955
21956 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile) {
21957 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021958 for (uint32_t n = 1; n <= 8; n++) {
21959 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021960 GemmMicrokernelTester()
21961 .mr(3)
21962 .nr(8)
21963 .kr(16)
21964 .sr(1)
21965 .m(m)
21966 .n(n)
21967 .k(16)
21968 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021970 }
21971 }
21972 }
21973
21974 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile_m) {
21975 TEST_REQUIRES_ARM_NEON;
21976 for (uint32_t m = 1; m <= 3; m++) {
21977 GemmMicrokernelTester()
21978 .mr(3)
21979 .nr(8)
21980 .kr(16)
21981 .sr(1)
21982 .m(m)
21983 .n(8)
21984 .k(16)
21985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021987 }
21988 }
21989
21990 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_eq_16_subtile_n) {
21991 TEST_REQUIRES_ARM_NEON;
21992 for (uint32_t n = 1; n <= 8; n++) {
21993 GemmMicrokernelTester()
21994 .mr(3)
21995 .nr(8)
21996 .kr(16)
21997 .sr(1)
21998 .m(3)
21999 .n(n)
22000 .k(16)
22001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022003 }
22004 }
22005
22006 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16) {
22007 TEST_REQUIRES_ARM_NEON;
22008 for (size_t k = 1; k < 16; k++) {
22009 GemmMicrokernelTester()
22010 .mr(3)
22011 .nr(8)
22012 .kr(16)
22013 .sr(1)
22014 .m(3)
22015 .n(8)
22016 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022018 }
22019 }
22020
22021 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16_strided_a) {
22022 TEST_REQUIRES_ARM_NEON;
22023 for (size_t k = 1; k < 16; k++) {
22024 GemmMicrokernelTester()
22025 .mr(3)
22026 .nr(8)
22027 .kr(16)
22028 .sr(1)
22029 .m(3)
22030 .n(8)
22031 .k(k)
22032 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022033 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022034 }
22035 }
22036
22037 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_lt_16_subtile) {
22038 TEST_REQUIRES_ARM_NEON;
22039 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022040 for (uint32_t n = 1; n <= 8; n++) {
22041 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022042 GemmMicrokernelTester()
22043 .mr(3)
22044 .nr(8)
22045 .kr(16)
22046 .sr(1)
22047 .m(m)
22048 .n(n)
22049 .k(k)
22050 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022051 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022052 }
22053 }
22054 }
22055 }
22056
22057 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16) {
22058 TEST_REQUIRES_ARM_NEON;
22059 for (size_t k = 17; k < 32; k++) {
22060 GemmMicrokernelTester()
22061 .mr(3)
22062 .nr(8)
22063 .kr(16)
22064 .sr(1)
22065 .m(3)
22066 .n(8)
22067 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022069 }
22070 }
22071
22072 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16_strided_a) {
22073 TEST_REQUIRES_ARM_NEON;
22074 for (size_t k = 17; k < 32; k++) {
22075 GemmMicrokernelTester()
22076 .mr(3)
22077 .nr(8)
22078 .kr(16)
22079 .sr(1)
22080 .m(3)
22081 .n(8)
22082 .k(k)
22083 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080022084 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022085 }
22086 }
22087
22088 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_gt_16_subtile) {
22089 TEST_REQUIRES_ARM_NEON;
22090 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022091 for (uint32_t n = 1; n <= 8; n++) {
22092 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022093 GemmMicrokernelTester()
22094 .mr(3)
22095 .nr(8)
22096 .kr(16)
22097 .sr(1)
22098 .m(m)
22099 .n(n)
22100 .k(k)
22101 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022102 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022103 }
22104 }
22105 }
22106 }
22107
22108 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16) {
22109 TEST_REQUIRES_ARM_NEON;
22110 for (size_t k = 32; k <= 160; k += 16) {
22111 GemmMicrokernelTester()
22112 .mr(3)
22113 .nr(8)
22114 .kr(16)
22115 .sr(1)
22116 .m(3)
22117 .n(8)
22118 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022119 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022120 }
22121 }
22122
22123 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16_strided_a) {
22124 TEST_REQUIRES_ARM_NEON;
22125 for (size_t k = 32; k <= 160; k += 16) {
22126 GemmMicrokernelTester()
22127 .mr(3)
22128 .nr(8)
22129 .kr(16)
22130 .sr(1)
22131 .m(3)
22132 .n(8)
22133 .k(k)
22134 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080022135 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022136 }
22137 }
22138
22139 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, k_div_16_subtile) {
22140 TEST_REQUIRES_ARM_NEON;
22141 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022142 for (uint32_t n = 1; n <= 8; n++) {
22143 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022144 GemmMicrokernelTester()
22145 .mr(3)
22146 .nr(8)
22147 .kr(16)
22148 .sr(1)
22149 .m(m)
22150 .n(n)
22151 .k(k)
22152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022153 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022154 }
22155 }
22156 }
22157 }
22158
22159 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8) {
22160 TEST_REQUIRES_ARM_NEON;
22161 for (uint32_t n = 9; n < 16; n++) {
22162 for (size_t k = 1; k <= 80; k += 17) {
22163 GemmMicrokernelTester()
22164 .mr(3)
22165 .nr(8)
22166 .kr(16)
22167 .sr(1)
22168 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022169 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080022170 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022171 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022172 }
22173 }
22174 }
22175
22176 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_strided_cn) {
22177 TEST_REQUIRES_ARM_NEON;
22178 for (uint32_t n = 9; n < 16; n++) {
22179 for (size_t k = 1; k <= 80; k += 17) {
22180 GemmMicrokernelTester()
22181 .mr(3)
22182 .nr(8)
22183 .kr(16)
22184 .sr(1)
22185 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022186 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080022187 .k(k)
22188 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022189 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022190 }
22191 }
22192 }
22193
22194 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_strided_a) {
22195 TEST_REQUIRES_ARM_NEON;
22196 for (uint32_t n = 9; n < 16; n++) {
22197 for (size_t k = 1; k <= 80; k += 17) {
22198 GemmMicrokernelTester()
22199 .mr(3)
22200 .nr(8)
22201 .kr(16)
22202 .sr(1)
22203 .m(3)
22204 .n(n)
22205 .k(k)
22206 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022207 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022208 }
22209 }
22210 }
22211
22212 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_gt_8_subtile) {
22213 TEST_REQUIRES_ARM_NEON;
22214 for (uint32_t n = 9; n < 16; n++) {
22215 for (size_t k = 1; k <= 80; k += 17) {
22216 for (uint32_t m = 1; m <= 3; m++) {
22217 GemmMicrokernelTester()
22218 .mr(3)
22219 .nr(8)
22220 .kr(16)
22221 .sr(1)
22222 .m(m)
22223 .n(n)
22224 .k(k)
22225 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022226 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022227 }
22228 }
22229 }
22230 }
22231
22232 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8) {
22233 TEST_REQUIRES_ARM_NEON;
22234 for (uint32_t n = 16; n <= 24; n += 8) {
22235 for (size_t k = 1; k <= 80; k += 17) {
22236 GemmMicrokernelTester()
22237 .mr(3)
22238 .nr(8)
22239 .kr(16)
22240 .sr(1)
22241 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022242 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080022243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022245 }
22246 }
22247 }
22248
22249 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_strided_cn) {
22250 TEST_REQUIRES_ARM_NEON;
22251 for (uint32_t n = 16; n <= 24; n += 8) {
22252 for (size_t k = 1; k <= 80; k += 17) {
22253 GemmMicrokernelTester()
22254 .mr(3)
22255 .nr(8)
22256 .kr(16)
22257 .sr(1)
22258 .m(3)
22259 .n(n)
22260 .k(k)
22261 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022263 }
22264 }
22265 }
22266
22267 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_strided_a) {
22268 TEST_REQUIRES_ARM_NEON;
22269 for (uint32_t n = 16; n <= 24; n += 8) {
22270 for (size_t k = 1; k <= 80; k += 17) {
22271 GemmMicrokernelTester()
22272 .mr(3)
22273 .nr(8)
22274 .kr(16)
22275 .sr(1)
22276 .m(3)
22277 .n(n)
22278 .k(k)
22279 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022280 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022281 }
22282 }
22283 }
22284
22285 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, n_div_8_subtile) {
22286 TEST_REQUIRES_ARM_NEON;
22287 for (uint32_t n = 16; n <= 24; n += 8) {
22288 for (size_t k = 1; k <= 80; k += 17) {
22289 for (uint32_t m = 1; m <= 3; m++) {
22290 GemmMicrokernelTester()
22291 .mr(3)
22292 .nr(8)
22293 .kr(16)
22294 .sr(1)
22295 .m(m)
22296 .n(n)
22297 .k(k)
22298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022300 }
22301 }
22302 }
22303 }
22304
22305 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cm_subtile) {
22306 TEST_REQUIRES_ARM_NEON;
22307 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022308 for (uint32_t n = 1; n <= 8; n++) {
22309 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022310 GemmMicrokernelTester()
22311 .mr(3)
22312 .nr(8)
22313 .kr(16)
22314 .sr(1)
22315 .m(m)
22316 .n(n)
22317 .k(k)
22318 .cm_stride(11)
22319 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022320 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022321 }
22322 }
22323 }
22324 }
22325
22326 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, qmin) {
22327 TEST_REQUIRES_ARM_NEON;
22328 GemmMicrokernelTester()
22329 .mr(3)
22330 .nr(8)
22331 .kr(16)
22332 .sr(1)
22333 .m(3)
22334 .n(8)
22335 .k(16)
22336 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022338 }
22339
22340 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, qmax) {
22341 TEST_REQUIRES_ARM_NEON;
22342 GemmMicrokernelTester()
22343 .mr(3)
22344 .nr(8)
22345 .kr(16)
22346 .sr(1)
22347 .m(3)
22348 .n(8)
22349 .k(16)
22350 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022352 }
22353
22354 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C16__NEON_MLAL, strided_cm) {
22355 TEST_REQUIRES_ARM_NEON;
22356 GemmMicrokernelTester()
22357 .mr(3)
22358 .nr(8)
22359 .kr(16)
22360 .sr(1)
22361 .m(3)
22362 .n(8)
22363 .k(16)
22364 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022365 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022366 }
22367#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
22368
22369
22370#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080022371 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16) {
22372 TEST_REQUIRES_ARM_NEON;
22373 GemmMicrokernelTester()
22374 .mr(1)
22375 .nr(16)
22376 .kr(16)
22377 .sr(1)
22378 .m(1)
22379 .n(16)
22380 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080022381 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022382 }
22383
22384 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cn) {
22385 TEST_REQUIRES_ARM_NEON;
22386 GemmMicrokernelTester()
22387 .mr(1)
22388 .nr(16)
22389 .kr(16)
22390 .sr(1)
22391 .m(1)
22392 .n(16)
22393 .k(16)
22394 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022395 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022396 }
22397
22398 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_strided_a) {
22399 TEST_REQUIRES_ARM_NEON;
22400 GemmMicrokernelTester()
22401 .mr(1)
22402 .nr(16)
22403 .kr(16)
22404 .sr(1)
22405 .m(1)
22406 .n(16)
22407 .k(16)
22408 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022410 }
22411
22412 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile) {
22413 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022414 for (uint32_t n = 1; n <= 16; n++) {
22415 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022416 GemmMicrokernelTester()
22417 .mr(1)
22418 .nr(16)
22419 .kr(16)
22420 .sr(1)
22421 .m(m)
22422 .n(n)
22423 .k(16)
22424 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022426 }
22427 }
22428 }
22429
22430 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_m) {
22431 TEST_REQUIRES_ARM_NEON;
22432 for (uint32_t m = 1; m <= 1; m++) {
22433 GemmMicrokernelTester()
22434 .mr(1)
22435 .nr(16)
22436 .kr(16)
22437 .sr(1)
22438 .m(m)
22439 .n(16)
22440 .k(16)
22441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022443 }
22444 }
22445
22446 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_n) {
22447 TEST_REQUIRES_ARM_NEON;
22448 for (uint32_t n = 1; n <= 16; n++) {
22449 GemmMicrokernelTester()
22450 .mr(1)
22451 .nr(16)
22452 .kr(16)
22453 .sr(1)
22454 .m(1)
22455 .n(n)
22456 .k(16)
22457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022459 }
22460 }
22461
22462 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16) {
22463 TEST_REQUIRES_ARM_NEON;
22464 for (size_t k = 1; k < 16; k++) {
22465 GemmMicrokernelTester()
22466 .mr(1)
22467 .nr(16)
22468 .kr(16)
22469 .sr(1)
22470 .m(1)
22471 .n(16)
22472 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022474 }
22475 }
22476
22477 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16_strided_a) {
22478 TEST_REQUIRES_ARM_NEON;
22479 for (size_t k = 1; k < 16; k++) {
22480 GemmMicrokernelTester()
22481 .mr(1)
22482 .nr(16)
22483 .kr(16)
22484 .sr(1)
22485 .m(1)
22486 .n(16)
22487 .k(k)
22488 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022489 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022490 }
22491 }
22492
22493 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16_subtile) {
22494 TEST_REQUIRES_ARM_NEON;
22495 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022496 for (uint32_t n = 1; n <= 16; n++) {
22497 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022498 GemmMicrokernelTester()
22499 .mr(1)
22500 .nr(16)
22501 .kr(16)
22502 .sr(1)
22503 .m(m)
22504 .n(n)
22505 .k(k)
22506 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022508 }
22509 }
22510 }
22511 }
22512
22513 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16) {
22514 TEST_REQUIRES_ARM_NEON;
22515 for (size_t k = 17; k < 32; k++) {
22516 GemmMicrokernelTester()
22517 .mr(1)
22518 .nr(16)
22519 .kr(16)
22520 .sr(1)
22521 .m(1)
22522 .n(16)
22523 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022525 }
22526 }
22527
22528 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16_strided_a) {
22529 TEST_REQUIRES_ARM_NEON;
22530 for (size_t k = 17; k < 32; k++) {
22531 GemmMicrokernelTester()
22532 .mr(1)
22533 .nr(16)
22534 .kr(16)
22535 .sr(1)
22536 .m(1)
22537 .n(16)
22538 .k(k)
22539 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080022540 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022541 }
22542 }
22543
22544 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16_subtile) {
22545 TEST_REQUIRES_ARM_NEON;
22546 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022547 for (uint32_t n = 1; n <= 16; n++) {
22548 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022549 GemmMicrokernelTester()
22550 .mr(1)
22551 .nr(16)
22552 .kr(16)
22553 .sr(1)
22554 .m(m)
22555 .n(n)
22556 .k(k)
22557 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022558 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022559 }
22560 }
22561 }
22562 }
22563
22564 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16) {
22565 TEST_REQUIRES_ARM_NEON;
22566 for (size_t k = 32; k <= 160; k += 16) {
22567 GemmMicrokernelTester()
22568 .mr(1)
22569 .nr(16)
22570 .kr(16)
22571 .sr(1)
22572 .m(1)
22573 .n(16)
22574 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022575 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022576 }
22577 }
22578
22579 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16_strided_a) {
22580 TEST_REQUIRES_ARM_NEON;
22581 for (size_t k = 32; k <= 160; k += 16) {
22582 GemmMicrokernelTester()
22583 .mr(1)
22584 .nr(16)
22585 .kr(16)
22586 .sr(1)
22587 .m(1)
22588 .n(16)
22589 .k(k)
22590 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080022591 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022592 }
22593 }
22594
22595 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16_subtile) {
22596 TEST_REQUIRES_ARM_NEON;
22597 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022598 for (uint32_t n = 1; n <= 16; n++) {
22599 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022600 GemmMicrokernelTester()
22601 .mr(1)
22602 .nr(16)
22603 .kr(16)
22604 .sr(1)
22605 .m(m)
22606 .n(n)
22607 .k(k)
22608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022609 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022610 }
22611 }
22612 }
22613 }
22614
22615 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16) {
22616 TEST_REQUIRES_ARM_NEON;
22617 for (uint32_t n = 17; n < 32; n++) {
22618 for (size_t k = 1; k <= 80; k += 17) {
22619 GemmMicrokernelTester()
22620 .mr(1)
22621 .nr(16)
22622 .kr(16)
22623 .sr(1)
22624 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022625 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080022626 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022627 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022628 }
22629 }
22630 }
22631
22632 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_strided_cn) {
22633 TEST_REQUIRES_ARM_NEON;
22634 for (uint32_t n = 17; n < 32; n++) {
22635 for (size_t k = 1; k <= 80; k += 17) {
22636 GemmMicrokernelTester()
22637 .mr(1)
22638 .nr(16)
22639 .kr(16)
22640 .sr(1)
22641 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022642 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080022643 .k(k)
22644 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022645 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022646 }
22647 }
22648 }
22649
22650 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_strided_a) {
22651 TEST_REQUIRES_ARM_NEON;
22652 for (uint32_t n = 17; n < 32; n++) {
22653 for (size_t k = 1; k <= 80; k += 17) {
22654 GemmMicrokernelTester()
22655 .mr(1)
22656 .nr(16)
22657 .kr(16)
22658 .sr(1)
22659 .m(1)
22660 .n(n)
22661 .k(k)
22662 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022663 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022664 }
22665 }
22666 }
22667
22668 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_subtile) {
22669 TEST_REQUIRES_ARM_NEON;
22670 for (uint32_t n = 17; n < 32; n++) {
22671 for (size_t k = 1; k <= 80; k += 17) {
22672 for (uint32_t m = 1; m <= 1; m++) {
22673 GemmMicrokernelTester()
22674 .mr(1)
22675 .nr(16)
22676 .kr(16)
22677 .sr(1)
22678 .m(m)
22679 .n(n)
22680 .k(k)
22681 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022682 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022683 }
22684 }
22685 }
22686 }
22687
22688 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16) {
22689 TEST_REQUIRES_ARM_NEON;
22690 for (uint32_t n = 32; n <= 48; n += 16) {
22691 for (size_t k = 1; k <= 80; k += 17) {
22692 GemmMicrokernelTester()
22693 .mr(1)
22694 .nr(16)
22695 .kr(16)
22696 .sr(1)
22697 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022698 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080022699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022701 }
22702 }
22703 }
22704
22705 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_strided_cn) {
22706 TEST_REQUIRES_ARM_NEON;
22707 for (uint32_t n = 32; n <= 48; n += 16) {
22708 for (size_t k = 1; k <= 80; k += 17) {
22709 GemmMicrokernelTester()
22710 .mr(1)
22711 .nr(16)
22712 .kr(16)
22713 .sr(1)
22714 .m(1)
22715 .n(n)
22716 .k(k)
22717 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022719 }
22720 }
22721 }
22722
22723 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_strided_a) {
22724 TEST_REQUIRES_ARM_NEON;
22725 for (uint32_t n = 32; n <= 48; n += 16) {
22726 for (size_t k = 1; k <= 80; k += 17) {
22727 GemmMicrokernelTester()
22728 .mr(1)
22729 .nr(16)
22730 .kr(16)
22731 .sr(1)
22732 .m(1)
22733 .n(n)
22734 .k(k)
22735 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022736 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022737 }
22738 }
22739 }
22740
22741 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_subtile) {
22742 TEST_REQUIRES_ARM_NEON;
22743 for (uint32_t n = 32; n <= 48; n += 16) {
22744 for (size_t k = 1; k <= 80; k += 17) {
22745 for (uint32_t m = 1; m <= 1; m++) {
22746 GemmMicrokernelTester()
22747 .mr(1)
22748 .nr(16)
22749 .kr(16)
22750 .sr(1)
22751 .m(m)
22752 .n(n)
22753 .k(k)
22754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022756 }
22757 }
22758 }
22759 }
22760
22761 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm_subtile) {
22762 TEST_REQUIRES_ARM_NEON;
22763 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022764 for (uint32_t n = 1; n <= 16; n++) {
22765 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022766 GemmMicrokernelTester()
22767 .mr(1)
22768 .nr(16)
22769 .kr(16)
22770 .sr(1)
22771 .m(m)
22772 .n(n)
22773 .k(k)
22774 .cm_stride(19)
22775 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022776 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022777 }
22778 }
22779 }
22780 }
22781
22782 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmin) {
22783 TEST_REQUIRES_ARM_NEON;
22784 GemmMicrokernelTester()
22785 .mr(1)
22786 .nr(16)
22787 .kr(16)
22788 .sr(1)
22789 .m(1)
22790 .n(16)
22791 .k(16)
22792 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022794 }
22795
22796 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmax) {
22797 TEST_REQUIRES_ARM_NEON;
22798 GemmMicrokernelTester()
22799 .mr(1)
22800 .nr(16)
22801 .kr(16)
22802 .sr(1)
22803 .m(1)
22804 .n(16)
22805 .k(16)
22806 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022808 }
22809
22810 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm) {
22811 TEST_REQUIRES_ARM_NEON;
22812 GemmMicrokernelTester()
22813 .mr(1)
22814 .nr(16)
22815 .kr(16)
22816 .sr(1)
22817 .m(1)
22818 .n(16)
22819 .k(16)
22820 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022821 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022822 }
22823#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
22824
22825
22826#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan89991902021-12-06 00:54:36 -080022827 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16) {
22828 TEST_REQUIRES_ARM_NEON;
22829 GemmMicrokernelTester()
22830 .mr(3)
22831 .nr(16)
22832 .kr(16)
22833 .sr(1)
22834 .m(3)
22835 .n(16)
22836 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080022837 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022838 }
22839
22840 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, strided_cn) {
22841 TEST_REQUIRES_ARM_NEON;
22842 GemmMicrokernelTester()
22843 .mr(3)
22844 .nr(16)
22845 .kr(16)
22846 .sr(1)
22847 .m(3)
22848 .n(16)
22849 .k(16)
22850 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022851 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022852 }
22853
22854 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_strided_a) {
22855 TEST_REQUIRES_ARM_NEON;
22856 GemmMicrokernelTester()
22857 .mr(3)
22858 .nr(16)
22859 .kr(16)
22860 .sr(1)
22861 .m(3)
22862 .n(16)
22863 .k(16)
22864 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022866 }
22867
22868 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_subtile) {
22869 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022870 for (uint32_t n = 1; n <= 16; n++) {
22871 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022872 GemmMicrokernelTester()
22873 .mr(3)
22874 .nr(16)
22875 .kr(16)
22876 .sr(1)
22877 .m(m)
22878 .n(n)
22879 .k(16)
22880 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022882 }
22883 }
22884 }
22885
22886 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_subtile_m) {
22887 TEST_REQUIRES_ARM_NEON;
22888 for (uint32_t m = 1; m <= 3; m++) {
22889 GemmMicrokernelTester()
22890 .mr(3)
22891 .nr(16)
22892 .kr(16)
22893 .sr(1)
22894 .m(m)
22895 .n(16)
22896 .k(16)
22897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022899 }
22900 }
22901
22902 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_subtile_n) {
22903 TEST_REQUIRES_ARM_NEON;
22904 for (uint32_t n = 1; n <= 16; n++) {
22905 GemmMicrokernelTester()
22906 .mr(3)
22907 .nr(16)
22908 .kr(16)
22909 .sr(1)
22910 .m(3)
22911 .n(n)
22912 .k(16)
22913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022915 }
22916 }
22917
22918 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_lt_16) {
22919 TEST_REQUIRES_ARM_NEON;
22920 for (size_t k = 1; k < 16; k++) {
22921 GemmMicrokernelTester()
22922 .mr(3)
22923 .nr(16)
22924 .kr(16)
22925 .sr(1)
22926 .m(3)
22927 .n(16)
22928 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022930 }
22931 }
22932
22933 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_lt_16_strided_a) {
22934 TEST_REQUIRES_ARM_NEON;
22935 for (size_t k = 1; k < 16; k++) {
22936 GemmMicrokernelTester()
22937 .mr(3)
22938 .nr(16)
22939 .kr(16)
22940 .sr(1)
22941 .m(3)
22942 .n(16)
22943 .k(k)
22944 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022945 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022946 }
22947 }
22948
22949 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_lt_16_subtile) {
22950 TEST_REQUIRES_ARM_NEON;
22951 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022952 for (uint32_t n = 1; n <= 16; n++) {
22953 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080022954 GemmMicrokernelTester()
22955 .mr(3)
22956 .nr(16)
22957 .kr(16)
22958 .sr(1)
22959 .m(m)
22960 .n(n)
22961 .k(k)
22962 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022964 }
22965 }
22966 }
22967 }
22968
22969 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_gt_16) {
22970 TEST_REQUIRES_ARM_NEON;
22971 for (size_t k = 17; k < 32; k++) {
22972 GemmMicrokernelTester()
22973 .mr(3)
22974 .nr(16)
22975 .kr(16)
22976 .sr(1)
22977 .m(3)
22978 .n(16)
22979 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022981 }
22982 }
22983
22984 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_gt_16_strided_a) {
22985 TEST_REQUIRES_ARM_NEON;
22986 for (size_t k = 17; k < 32; k++) {
22987 GemmMicrokernelTester()
22988 .mr(3)
22989 .nr(16)
22990 .kr(16)
22991 .sr(1)
22992 .m(3)
22993 .n(16)
22994 .k(k)
22995 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080022996 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080022997 }
22998 }
22999
23000 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_gt_16_subtile) {
23001 TEST_REQUIRES_ARM_NEON;
23002 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023003 for (uint32_t n = 1; n <= 16; n++) {
23004 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080023005 GemmMicrokernelTester()
23006 .mr(3)
23007 .nr(16)
23008 .kr(16)
23009 .sr(1)
23010 .m(m)
23011 .n(n)
23012 .k(k)
23013 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023014 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023015 }
23016 }
23017 }
23018 }
23019
23020 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_div_16) {
23021 TEST_REQUIRES_ARM_NEON;
23022 for (size_t k = 32; k <= 160; k += 16) {
23023 GemmMicrokernelTester()
23024 .mr(3)
23025 .nr(16)
23026 .kr(16)
23027 .sr(1)
23028 .m(3)
23029 .n(16)
23030 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023031 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023032 }
23033 }
23034
23035 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_div_16_strided_a) {
23036 TEST_REQUIRES_ARM_NEON;
23037 for (size_t k = 32; k <= 160; k += 16) {
23038 GemmMicrokernelTester()
23039 .mr(3)
23040 .nr(16)
23041 .kr(16)
23042 .sr(1)
23043 .m(3)
23044 .n(16)
23045 .k(k)
23046 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080023047 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023048 }
23049 }
23050
23051 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_div_16_subtile) {
23052 TEST_REQUIRES_ARM_NEON;
23053 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023054 for (uint32_t n = 1; n <= 16; n++) {
23055 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080023056 GemmMicrokernelTester()
23057 .mr(3)
23058 .nr(16)
23059 .kr(16)
23060 .sr(1)
23061 .m(m)
23062 .n(n)
23063 .k(k)
23064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023065 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023066 }
23067 }
23068 }
23069 }
23070
23071 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16) {
23072 TEST_REQUIRES_ARM_NEON;
23073 for (uint32_t n = 17; n < 32; n++) {
23074 for (size_t k = 1; k <= 80; k += 17) {
23075 GemmMicrokernelTester()
23076 .mr(3)
23077 .nr(16)
23078 .kr(16)
23079 .sr(1)
23080 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023081 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080023082 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023083 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023084 }
23085 }
23086 }
23087
23088 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16_strided_cn) {
23089 TEST_REQUIRES_ARM_NEON;
23090 for (uint32_t n = 17; n < 32; n++) {
23091 for (size_t k = 1; k <= 80; k += 17) {
23092 GemmMicrokernelTester()
23093 .mr(3)
23094 .nr(16)
23095 .kr(16)
23096 .sr(1)
23097 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023098 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080023099 .k(k)
23100 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023101 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023102 }
23103 }
23104 }
23105
23106 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16_strided_a) {
23107 TEST_REQUIRES_ARM_NEON;
23108 for (uint32_t n = 17; n < 32; n++) {
23109 for (size_t k = 1; k <= 80; k += 17) {
23110 GemmMicrokernelTester()
23111 .mr(3)
23112 .nr(16)
23113 .kr(16)
23114 .sr(1)
23115 .m(3)
23116 .n(n)
23117 .k(k)
23118 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023119 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023120 }
23121 }
23122 }
23123
23124 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16_subtile) {
23125 TEST_REQUIRES_ARM_NEON;
23126 for (uint32_t n = 17; n < 32; n++) {
23127 for (size_t k = 1; k <= 80; k += 17) {
23128 for (uint32_t m = 1; m <= 3; m++) {
23129 GemmMicrokernelTester()
23130 .mr(3)
23131 .nr(16)
23132 .kr(16)
23133 .sr(1)
23134 .m(m)
23135 .n(n)
23136 .k(k)
23137 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023138 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023139 }
23140 }
23141 }
23142 }
23143
23144 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16) {
23145 TEST_REQUIRES_ARM_NEON;
23146 for (uint32_t n = 32; n <= 48; n += 16) {
23147 for (size_t k = 1; k <= 80; k += 17) {
23148 GemmMicrokernelTester()
23149 .mr(3)
23150 .nr(16)
23151 .kr(16)
23152 .sr(1)
23153 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023154 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080023155 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023157 }
23158 }
23159 }
23160
23161 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16_strided_cn) {
23162 TEST_REQUIRES_ARM_NEON;
23163 for (uint32_t n = 32; n <= 48; n += 16) {
23164 for (size_t k = 1; k <= 80; k += 17) {
23165 GemmMicrokernelTester()
23166 .mr(3)
23167 .nr(16)
23168 .kr(16)
23169 .sr(1)
23170 .m(3)
23171 .n(n)
23172 .k(k)
23173 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023175 }
23176 }
23177 }
23178
23179 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16_strided_a) {
23180 TEST_REQUIRES_ARM_NEON;
23181 for (uint32_t n = 32; n <= 48; n += 16) {
23182 for (size_t k = 1; k <= 80; k += 17) {
23183 GemmMicrokernelTester()
23184 .mr(3)
23185 .nr(16)
23186 .kr(16)
23187 .sr(1)
23188 .m(3)
23189 .n(n)
23190 .k(k)
23191 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023192 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023193 }
23194 }
23195 }
23196
23197 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16_subtile) {
23198 TEST_REQUIRES_ARM_NEON;
23199 for (uint32_t n = 32; n <= 48; n += 16) {
23200 for (size_t k = 1; k <= 80; k += 17) {
23201 for (uint32_t m = 1; m <= 3; m++) {
23202 GemmMicrokernelTester()
23203 .mr(3)
23204 .nr(16)
23205 .kr(16)
23206 .sr(1)
23207 .m(m)
23208 .n(n)
23209 .k(k)
23210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023212 }
23213 }
23214 }
23215 }
23216
23217 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, strided_cm_subtile) {
23218 TEST_REQUIRES_ARM_NEON;
23219 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023220 for (uint32_t n = 1; n <= 16; n++) {
23221 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080023222 GemmMicrokernelTester()
23223 .mr(3)
23224 .nr(16)
23225 .kr(16)
23226 .sr(1)
23227 .m(m)
23228 .n(n)
23229 .k(k)
23230 .cm_stride(19)
23231 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023233 }
23234 }
23235 }
23236 }
23237
23238 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, qmin) {
23239 TEST_REQUIRES_ARM_NEON;
23240 GemmMicrokernelTester()
23241 .mr(3)
23242 .nr(16)
23243 .kr(16)
23244 .sr(1)
23245 .m(3)
23246 .n(16)
23247 .k(16)
23248 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023250 }
23251
23252 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, qmax) {
23253 TEST_REQUIRES_ARM_NEON;
23254 GemmMicrokernelTester()
23255 .mr(3)
23256 .nr(16)
23257 .kr(16)
23258 .sr(1)
23259 .m(3)
23260 .n(16)
23261 .k(16)
23262 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023264 }
23265
23266 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, strided_cm) {
23267 TEST_REQUIRES_ARM_NEON;
23268 GemmMicrokernelTester()
23269 .mr(3)
23270 .nr(16)
23271 .kr(16)
23272 .sr(1)
23273 .m(3)
23274 .n(16)
23275 .k(16)
23276 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023277 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080023278 }
23279#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
23280
23281
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023282#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
23283 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) {
23284 TEST_REQUIRES_ARM_NEON_DOT;
Marat Dukhan89991902021-12-06 00:54:36 -080023285 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023286 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023287 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023288 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023289 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023290 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023291 .n(8)
23292 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023293 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023294 }
23295
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023296 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) {
23297 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023298 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023299 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023300 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023301 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023302 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023303 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023304 .n(8)
23305 .k(8)
23306 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023307 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023308 }
23309
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023310 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_strided_a) {
23311 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023312 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023313 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023314 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023315 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023316 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023317 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023318 .n(8)
23319 .k(8)
23320 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023322 }
23323
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023324 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) {
23325 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023326 for (uint32_t n = 1; n <= 8; n++) {
23327 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023328 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023329 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023330 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023331 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023332 .sr(1)
23333 .m(m)
23334 .n(n)
23335 .k(8)
23336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023338 }
23339 }
23340 }
23341
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023342 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) {
23343 TEST_REQUIRES_ARM_NEON_DOT;
23344 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023345 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023346 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023347 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023348 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023349 .sr(1)
23350 .m(m)
23351 .n(8)
23352 .k(8)
23353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023355 }
23356 }
23357
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023358 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) {
23359 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023360 for (uint32_t n = 1; n <= 8; n++) {
23361 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023362 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023363 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023364 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023365 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023366 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023367 .n(n)
23368 .k(8)
23369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023371 }
23372 }
23373
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023374 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) {
23375 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023376 for (size_t k = 1; k < 8; k++) {
23377 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023378 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023379 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023380 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023381 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023382 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023383 .n(8)
23384 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023386 }
23387 }
23388
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023389 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_strided_a) {
23390 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023391 for (size_t k = 1; k < 8; k++) {
23392 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023393 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023394 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023395 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023396 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023397 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023398 .n(8)
23399 .k(k)
23400 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023401 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023402 }
23403 }
23404
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023405 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) {
23406 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023407 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023408 for (uint32_t n = 1; n <= 8; n++) {
23409 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023410 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023411 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023412 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023413 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023414 .sr(1)
23415 .m(m)
23416 .n(n)
23417 .k(k)
23418 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023420 }
23421 }
23422 }
23423 }
23424
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023425 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) {
23426 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023427 for (size_t k = 9; k < 16; k++) {
23428 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023429 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023430 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023431 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023432 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023433 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023434 .n(8)
23435 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023436 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023437 }
23438 }
23439
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023440 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_strided_a) {
23441 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023442 for (size_t k = 9; k < 16; k++) {
23443 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023444 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023445 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023446 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023447 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023448 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023449 .n(8)
23450 .k(k)
23451 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023452 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023453 }
23454 }
23455
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023456 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) {
23457 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023458 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023459 for (uint32_t n = 1; n <= 8; n++) {
23460 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023461 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023462 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023463 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023464 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023465 .sr(1)
23466 .m(m)
23467 .n(n)
23468 .k(k)
23469 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023470 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023471 }
23472 }
23473 }
23474 }
23475
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023476 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) {
23477 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023478 for (size_t k = 16; k <= 80; k += 8) {
23479 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023480 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023481 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023482 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023483 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023484 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023485 .n(8)
23486 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023487 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023488 }
23489 }
23490
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023491 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_strided_a) {
23492 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023493 for (size_t k = 16; k <= 80; k += 8) {
23494 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023495 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023496 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023497 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023498 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023499 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023500 .n(8)
23501 .k(k)
23502 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023503 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023504 }
23505 }
23506
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023507 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) {
23508 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023509 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023510 for (uint32_t n = 1; n <= 8; n++) {
23511 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023512 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023513 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023514 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023515 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023516 .sr(1)
23517 .m(m)
23518 .n(n)
23519 .k(k)
23520 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023521 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023522 }
23523 }
23524 }
23525 }
23526
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023527 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) {
23528 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023529 for (uint32_t n = 9; n < 16; n++) {
23530 for (size_t k = 1; k <= 40; k += 9) {
23531 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023532 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023533 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023534 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023535 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023536 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023537 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023538 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023539 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023540 }
23541 }
23542 }
23543
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023544 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) {
23545 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023546 for (uint32_t n = 9; n < 16; n++) {
23547 for (size_t k = 1; k <= 40; k += 9) {
23548 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023549 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023550 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023551 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023552 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023553 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023554 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023555 .k(k)
23556 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023557 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023558 }
23559 }
23560 }
23561
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023562 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_a) {
23563 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023564 for (uint32_t n = 9; n < 16; n++) {
23565 for (size_t k = 1; k <= 40; k += 9) {
23566 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023567 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023568 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023569 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023570 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023571 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023572 .n(n)
23573 .k(k)
23574 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023575 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023576 }
23577 }
23578 }
23579
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023580 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) {
23581 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023582 for (uint32_t n = 9; n < 16; n++) {
23583 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023584 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023585 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023586 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023587 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023588 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023589 .sr(1)
23590 .m(m)
23591 .n(n)
23592 .k(k)
23593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023594 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023595 }
23596 }
23597 }
23598 }
23599
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023600 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) {
23601 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023602 for (uint32_t n = 16; n <= 24; n += 8) {
23603 for (size_t k = 1; k <= 40; k += 9) {
23604 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023605 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023606 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023607 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023608 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023609 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023610 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023613 }
23614 }
23615 }
23616
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023617 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) {
23618 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023619 for (uint32_t n = 16; n <= 24; n += 8) {
23620 for (size_t k = 1; k <= 40; k += 9) {
23621 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023622 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023623 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023624 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023625 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023626 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023627 .n(n)
23628 .k(k)
23629 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023630 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023631 }
23632 }
23633 }
23634
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023635 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_a) {
23636 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023637 for (uint32_t n = 16; n <= 24; n += 8) {
23638 for (size_t k = 1; k <= 40; k += 9) {
23639 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023640 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023641 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023642 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023643 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023644 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023645 .n(n)
23646 .k(k)
23647 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023648 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023649 }
23650 }
23651 }
23652
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023653 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) {
23654 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023655 for (uint32_t n = 16; n <= 24; n += 8) {
23656 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023657 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023658 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023659 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023660 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023661 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023662 .sr(1)
23663 .m(m)
23664 .n(n)
23665 .k(k)
23666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023668 }
23669 }
23670 }
23671 }
23672
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023673 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) {
23674 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023675 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023676 for (uint32_t n = 1; n <= 8; n++) {
23677 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023678 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023679 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023680 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023681 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023682 .sr(1)
23683 .m(m)
23684 .n(n)
23685 .k(k)
23686 .cm_stride(11)
23687 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023689 }
23690 }
23691 }
23692 }
23693
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023694 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) {
23695 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023696 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023697 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023698 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023699 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023700 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023701 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023702 .n(8)
23703 .k(8)
23704 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023706 }
23707
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023708 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) {
23709 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023710 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023711 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023712 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023713 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023714 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023715 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023716 .n(8)
23717 .k(8)
23718 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023720 }
23721
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023722 TEST(QS8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) {
23723 TEST_REQUIRES_ARM_NEON_DOT;
Frank Barchard27bf92c2021-11-24 15:47:52 -080023724 GemmMicrokernelTester()
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023725 .mr(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023726 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023727 .kr(4)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023728 .sr(1)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023729 .m(6)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023730 .n(8)
23731 .k(8)
23732 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023733 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023734 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023735#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -080023736
23737
23738#if XNN_ARCH_ARM || XNN_ARCH_ARM64
23739 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) {
23740 TEST_REQUIRES_ARM_NEON;
23741 GemmMicrokernelTester()
23742 .mr(4)
23743 .nr(8)
23744 .kr(1)
23745 .sr(1)
23746 .m(4)
23747 .n(8)
23748 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023749 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023750 }
23751
23752 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) {
23753 TEST_REQUIRES_ARM_NEON;
23754 GemmMicrokernelTester()
23755 .mr(4)
23756 .nr(8)
23757 .kr(1)
23758 .sr(1)
23759 .m(4)
23760 .n(8)
23761 .k(8)
23762 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023763 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023764 }
23765
23766 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
23767 TEST_REQUIRES_ARM_NEON;
23768 GemmMicrokernelTester()
23769 .mr(4)
23770 .nr(8)
23771 .kr(1)
23772 .sr(1)
23773 .m(4)
23774 .n(8)
23775 .k(8)
23776 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023778 }
23779
23780 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
23781 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023782 for (uint32_t n = 1; n <= 8; n++) {
23783 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023784 GemmMicrokernelTester()
23785 .mr(4)
23786 .nr(8)
23787 .kr(1)
23788 .sr(1)
23789 .m(m)
23790 .n(n)
23791 .k(8)
23792 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023794 }
23795 }
23796 }
23797
23798 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
23799 TEST_REQUIRES_ARM_NEON;
23800 for (uint32_t m = 1; m <= 4; m++) {
23801 GemmMicrokernelTester()
23802 .mr(4)
23803 .nr(8)
23804 .kr(1)
23805 .sr(1)
23806 .m(m)
23807 .n(8)
23808 .k(8)
23809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023811 }
23812 }
23813
23814 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
23815 TEST_REQUIRES_ARM_NEON;
23816 for (uint32_t n = 1; n <= 8; n++) {
23817 GemmMicrokernelTester()
23818 .mr(4)
23819 .nr(8)
23820 .kr(1)
23821 .sr(1)
23822 .m(4)
23823 .n(n)
23824 .k(8)
23825 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023827 }
23828 }
23829
23830 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) {
23831 TEST_REQUIRES_ARM_NEON;
23832 for (size_t k = 1; k < 8; k++) {
23833 GemmMicrokernelTester()
23834 .mr(4)
23835 .nr(8)
23836 .kr(1)
23837 .sr(1)
23838 .m(4)
23839 .n(8)
23840 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023841 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023842 }
23843 }
23844
23845 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
23846 TEST_REQUIRES_ARM_NEON;
23847 for (size_t k = 1; k < 8; k++) {
23848 GemmMicrokernelTester()
23849 .mr(4)
23850 .nr(8)
23851 .kr(1)
23852 .sr(1)
23853 .m(4)
23854 .n(8)
23855 .k(k)
23856 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023857 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023858 }
23859 }
23860
23861 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
23862 TEST_REQUIRES_ARM_NEON;
23863 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023864 for (uint32_t n = 1; n <= 8; n++) {
23865 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023866 GemmMicrokernelTester()
23867 .mr(4)
23868 .nr(8)
23869 .kr(1)
23870 .sr(1)
23871 .m(m)
23872 .n(n)
23873 .k(k)
23874 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023876 }
23877 }
23878 }
23879 }
23880
23881 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) {
23882 TEST_REQUIRES_ARM_NEON;
23883 for (size_t k = 9; k < 16; k++) {
23884 GemmMicrokernelTester()
23885 .mr(4)
23886 .nr(8)
23887 .kr(1)
23888 .sr(1)
23889 .m(4)
23890 .n(8)
23891 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023892 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023893 }
23894 }
23895
23896 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
23897 TEST_REQUIRES_ARM_NEON;
23898 for (size_t k = 9; k < 16; k++) {
23899 GemmMicrokernelTester()
23900 .mr(4)
23901 .nr(8)
23902 .kr(1)
23903 .sr(1)
23904 .m(4)
23905 .n(8)
23906 .k(k)
23907 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023908 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023909 }
23910 }
23911
23912 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
23913 TEST_REQUIRES_ARM_NEON;
23914 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023915 for (uint32_t n = 1; n <= 8; n++) {
23916 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023917 GemmMicrokernelTester()
23918 .mr(4)
23919 .nr(8)
23920 .kr(1)
23921 .sr(1)
23922 .m(m)
23923 .n(n)
23924 .k(k)
23925 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023926 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023927 }
23928 }
23929 }
23930 }
23931
23932 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) {
23933 TEST_REQUIRES_ARM_NEON;
23934 for (size_t k = 16; k <= 80; k += 8) {
23935 GemmMicrokernelTester()
23936 .mr(4)
23937 .nr(8)
23938 .kr(1)
23939 .sr(1)
23940 .m(4)
23941 .n(8)
23942 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023943 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023944 }
23945 }
23946
23947 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_strided_a) {
23948 TEST_REQUIRES_ARM_NEON;
23949 for (size_t k = 16; k <= 80; k += 8) {
23950 GemmMicrokernelTester()
23951 .mr(4)
23952 .nr(8)
23953 .kr(1)
23954 .sr(1)
23955 .m(4)
23956 .n(8)
23957 .k(k)
23958 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023959 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023960 }
23961 }
23962
23963 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
23964 TEST_REQUIRES_ARM_NEON;
23965 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023966 for (uint32_t n = 1; n <= 8; n++) {
23967 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023968 GemmMicrokernelTester()
23969 .mr(4)
23970 .nr(8)
23971 .kr(1)
23972 .sr(1)
23973 .m(m)
23974 .n(n)
23975 .k(k)
23976 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023977 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023978 }
23979 }
23980 }
23981 }
23982
23983 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) {
23984 TEST_REQUIRES_ARM_NEON;
23985 for (uint32_t n = 9; n < 16; n++) {
23986 for (size_t k = 1; k <= 40; k += 9) {
23987 GemmMicrokernelTester()
23988 .mr(4)
23989 .nr(8)
23990 .kr(1)
23991 .sr(1)
23992 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023993 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023994 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023995 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023996 }
23997 }
23998 }
23999
24000 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
24001 TEST_REQUIRES_ARM_NEON;
24002 for (uint32_t n = 9; n < 16; n++) {
24003 for (size_t k = 1; k <= 40; k += 9) {
24004 GemmMicrokernelTester()
24005 .mr(4)
24006 .nr(8)
24007 .kr(1)
24008 .sr(1)
24009 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024010 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024011 .k(k)
24012 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024013 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024014 }
24015 }
24016 }
24017
24018 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
24019 TEST_REQUIRES_ARM_NEON;
24020 for (uint32_t n = 9; n < 16; n++) {
24021 for (size_t k = 1; k <= 40; k += 9) {
24022 GemmMicrokernelTester()
24023 .mr(4)
24024 .nr(8)
24025 .kr(1)
24026 .sr(1)
24027 .m(4)
24028 .n(n)
24029 .k(k)
24030 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024031 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024032 }
24033 }
24034 }
24035
24036 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
24037 TEST_REQUIRES_ARM_NEON;
24038 for (uint32_t n = 9; n < 16; n++) {
24039 for (size_t k = 1; k <= 40; k += 9) {
24040 for (uint32_t m = 1; m <= 4; m++) {
24041 GemmMicrokernelTester()
24042 .mr(4)
24043 .nr(8)
24044 .kr(1)
24045 .sr(1)
24046 .m(m)
24047 .n(n)
24048 .k(k)
24049 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024050 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024051 }
24052 }
24053 }
24054 }
24055
24056 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) {
24057 TEST_REQUIRES_ARM_NEON;
24058 for (uint32_t n = 16; n <= 24; n += 8) {
24059 for (size_t k = 1; k <= 40; k += 9) {
24060 GemmMicrokernelTester()
24061 .mr(4)
24062 .nr(8)
24063 .kr(1)
24064 .sr(1)
24065 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024066 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024067 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024069 }
24070 }
24071 }
24072
24073 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
24074 TEST_REQUIRES_ARM_NEON;
24075 for (uint32_t n = 16; n <= 24; n += 8) {
24076 for (size_t k = 1; k <= 40; k += 9) {
24077 GemmMicrokernelTester()
24078 .mr(4)
24079 .nr(8)
24080 .kr(1)
24081 .sr(1)
24082 .m(4)
24083 .n(n)
24084 .k(k)
24085 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024086 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024087 }
24088 }
24089 }
24090
24091 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_a) {
24092 TEST_REQUIRES_ARM_NEON;
24093 for (uint32_t n = 16; n <= 24; n += 8) {
24094 for (size_t k = 1; k <= 40; k += 9) {
24095 GemmMicrokernelTester()
24096 .mr(4)
24097 .nr(8)
24098 .kr(1)
24099 .sr(1)
24100 .m(4)
24101 .n(n)
24102 .k(k)
24103 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024104 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024105 }
24106 }
24107 }
24108
24109 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
24110 TEST_REQUIRES_ARM_NEON;
24111 for (uint32_t n = 16; n <= 24; n += 8) {
24112 for (size_t k = 1; k <= 40; k += 9) {
24113 for (uint32_t m = 1; m <= 4; m++) {
24114 GemmMicrokernelTester()
24115 .mr(4)
24116 .nr(8)
24117 .kr(1)
24118 .sr(1)
24119 .m(m)
24120 .n(n)
24121 .k(k)
24122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024124 }
24125 }
24126 }
24127 }
24128
24129 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
24130 TEST_REQUIRES_ARM_NEON;
24131 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024132 for (uint32_t n = 1; n <= 8; n++) {
24133 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024134 GemmMicrokernelTester()
24135 .mr(4)
24136 .nr(8)
24137 .kr(1)
24138 .sr(1)
24139 .m(m)
24140 .n(n)
24141 .k(k)
24142 .cm_stride(11)
24143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024145 }
24146 }
24147 }
24148 }
24149
24150 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) {
24151 TEST_REQUIRES_ARM_NEON;
24152 GemmMicrokernelTester()
24153 .mr(4)
24154 .nr(8)
24155 .kr(1)
24156 .sr(1)
24157 .m(4)
24158 .n(8)
24159 .k(8)
24160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024162 }
24163
24164 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) {
24165 TEST_REQUIRES_ARM_NEON;
24166 GemmMicrokernelTester()
24167 .mr(4)
24168 .nr(8)
24169 .kr(1)
24170 .sr(1)
24171 .m(4)
24172 .n(8)
24173 .k(8)
24174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024176 }
24177
24178 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) {
24179 TEST_REQUIRES_ARM_NEON;
24180 GemmMicrokernelTester()
24181 .mr(4)
24182 .nr(8)
24183 .kr(1)
24184 .sr(1)
24185 .m(4)
24186 .n(8)
24187 .k(8)
24188 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024189 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024190 }
24191#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
24192
24193
24194#if XNN_ARCH_ARM || XNN_ARCH_ARM64
24195 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8) {
24196 TEST_REQUIRES_ARM_NEON;
24197 GemmMicrokernelTester()
24198 .mr(6)
24199 .nr(8)
24200 .kr(1)
24201 .sr(1)
24202 .m(6)
24203 .n(8)
24204 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024205 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024206 }
24207
24208 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cn) {
24209 TEST_REQUIRES_ARM_NEON;
24210 GemmMicrokernelTester()
24211 .mr(6)
24212 .nr(8)
24213 .kr(1)
24214 .sr(1)
24215 .m(6)
24216 .n(8)
24217 .k(8)
24218 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024219 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024220 }
24221
24222 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
24223 TEST_REQUIRES_ARM_NEON;
24224 GemmMicrokernelTester()
24225 .mr(6)
24226 .nr(8)
24227 .kr(1)
24228 .sr(1)
24229 .m(6)
24230 .n(8)
24231 .k(8)
24232 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024234 }
24235
24236 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
24237 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024238 for (uint32_t n = 1; n <= 8; n++) {
24239 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024240 GemmMicrokernelTester()
24241 .mr(6)
24242 .nr(8)
24243 .kr(1)
24244 .sr(1)
24245 .m(m)
24246 .n(n)
24247 .k(8)
24248 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024250 }
24251 }
24252 }
24253
24254 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
24255 TEST_REQUIRES_ARM_NEON;
24256 for (uint32_t m = 1; m <= 6; m++) {
24257 GemmMicrokernelTester()
24258 .mr(6)
24259 .nr(8)
24260 .kr(1)
24261 .sr(1)
24262 .m(m)
24263 .n(8)
24264 .k(8)
24265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024267 }
24268 }
24269
24270 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
24271 TEST_REQUIRES_ARM_NEON;
24272 for (uint32_t n = 1; n <= 8; n++) {
24273 GemmMicrokernelTester()
24274 .mr(6)
24275 .nr(8)
24276 .kr(1)
24277 .sr(1)
24278 .m(6)
24279 .n(n)
24280 .k(8)
24281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024282 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024283 }
24284 }
24285
24286 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8) {
24287 TEST_REQUIRES_ARM_NEON;
24288 for (size_t k = 1; k < 8; k++) {
24289 GemmMicrokernelTester()
24290 .mr(6)
24291 .nr(8)
24292 .kr(1)
24293 .sr(1)
24294 .m(6)
24295 .n(8)
24296 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024297 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024298 }
24299 }
24300
24301 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
24302 TEST_REQUIRES_ARM_NEON;
24303 for (size_t k = 1; k < 8; k++) {
24304 GemmMicrokernelTester()
24305 .mr(6)
24306 .nr(8)
24307 .kr(1)
24308 .sr(1)
24309 .m(6)
24310 .n(8)
24311 .k(k)
24312 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024313 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024314 }
24315 }
24316
24317 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
24318 TEST_REQUIRES_ARM_NEON;
24319 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024320 for (uint32_t n = 1; n <= 8; n++) {
24321 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024322 GemmMicrokernelTester()
24323 .mr(6)
24324 .nr(8)
24325 .kr(1)
24326 .sr(1)
24327 .m(m)
24328 .n(n)
24329 .k(k)
24330 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024332 }
24333 }
24334 }
24335 }
24336
24337 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8) {
24338 TEST_REQUIRES_ARM_NEON;
24339 for (size_t k = 9; k < 16; k++) {
24340 GemmMicrokernelTester()
24341 .mr(6)
24342 .nr(8)
24343 .kr(1)
24344 .sr(1)
24345 .m(6)
24346 .n(8)
24347 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024348 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024349 }
24350 }
24351
24352 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
24353 TEST_REQUIRES_ARM_NEON;
24354 for (size_t k = 9; k < 16; k++) {
24355 GemmMicrokernelTester()
24356 .mr(6)
24357 .nr(8)
24358 .kr(1)
24359 .sr(1)
24360 .m(6)
24361 .n(8)
24362 .k(k)
24363 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024364 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024365 }
24366 }
24367
24368 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
24369 TEST_REQUIRES_ARM_NEON;
24370 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024371 for (uint32_t n = 1; n <= 8; n++) {
24372 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024373 GemmMicrokernelTester()
24374 .mr(6)
24375 .nr(8)
24376 .kr(1)
24377 .sr(1)
24378 .m(m)
24379 .n(n)
24380 .k(k)
24381 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024382 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024383 }
24384 }
24385 }
24386 }
24387
24388 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8) {
24389 TEST_REQUIRES_ARM_NEON;
24390 for (size_t k = 16; k <= 80; k += 8) {
24391 GemmMicrokernelTester()
24392 .mr(6)
24393 .nr(8)
24394 .kr(1)
24395 .sr(1)
24396 .m(6)
24397 .n(8)
24398 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024399 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024400 }
24401 }
24402
24403 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8_strided_a) {
24404 TEST_REQUIRES_ARM_NEON;
24405 for (size_t k = 16; k <= 80; k += 8) {
24406 GemmMicrokernelTester()
24407 .mr(6)
24408 .nr(8)
24409 .kr(1)
24410 .sr(1)
24411 .m(6)
24412 .n(8)
24413 .k(k)
24414 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024415 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024416 }
24417 }
24418
24419 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
24420 TEST_REQUIRES_ARM_NEON;
24421 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024422 for (uint32_t n = 1; n <= 8; n++) {
24423 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024424 GemmMicrokernelTester()
24425 .mr(6)
24426 .nr(8)
24427 .kr(1)
24428 .sr(1)
24429 .m(m)
24430 .n(n)
24431 .k(k)
24432 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024433 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024434 }
24435 }
24436 }
24437 }
24438
24439 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8) {
24440 TEST_REQUIRES_ARM_NEON;
24441 for (uint32_t n = 9; n < 16; n++) {
24442 for (size_t k = 1; k <= 40; k += 9) {
24443 GemmMicrokernelTester()
24444 .mr(6)
24445 .nr(8)
24446 .kr(1)
24447 .sr(1)
24448 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024449 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024450 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024451 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024452 }
24453 }
24454 }
24455
24456 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
24457 TEST_REQUIRES_ARM_NEON;
24458 for (uint32_t n = 9; n < 16; n++) {
24459 for (size_t k = 1; k <= 40; k += 9) {
24460 GemmMicrokernelTester()
24461 .mr(6)
24462 .nr(8)
24463 .kr(1)
24464 .sr(1)
24465 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024466 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024467 .k(k)
24468 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024469 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024470 }
24471 }
24472 }
24473
24474 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
24475 TEST_REQUIRES_ARM_NEON;
24476 for (uint32_t n = 9; n < 16; n++) {
24477 for (size_t k = 1; k <= 40; k += 9) {
24478 GemmMicrokernelTester()
24479 .mr(6)
24480 .nr(8)
24481 .kr(1)
24482 .sr(1)
24483 .m(6)
24484 .n(n)
24485 .k(k)
24486 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024487 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024488 }
24489 }
24490 }
24491
24492 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
24493 TEST_REQUIRES_ARM_NEON;
24494 for (uint32_t n = 9; n < 16; n++) {
24495 for (size_t k = 1; k <= 40; k += 9) {
24496 for (uint32_t m = 1; m <= 6; m++) {
24497 GemmMicrokernelTester()
24498 .mr(6)
24499 .nr(8)
24500 .kr(1)
24501 .sr(1)
24502 .m(m)
24503 .n(n)
24504 .k(k)
24505 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024506 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024507 }
24508 }
24509 }
24510 }
24511
24512 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8) {
24513 TEST_REQUIRES_ARM_NEON;
24514 for (uint32_t n = 16; n <= 24; n += 8) {
24515 for (size_t k = 1; k <= 40; k += 9) {
24516 GemmMicrokernelTester()
24517 .mr(6)
24518 .nr(8)
24519 .kr(1)
24520 .sr(1)
24521 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024522 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024523 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024525 }
24526 }
24527 }
24528
24529 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
24530 TEST_REQUIRES_ARM_NEON;
24531 for (uint32_t n = 16; n <= 24; n += 8) {
24532 for (size_t k = 1; k <= 40; k += 9) {
24533 GemmMicrokernelTester()
24534 .mr(6)
24535 .nr(8)
24536 .kr(1)
24537 .sr(1)
24538 .m(6)
24539 .n(n)
24540 .k(k)
24541 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024542 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024543 }
24544 }
24545 }
24546
24547 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_strided_a) {
24548 TEST_REQUIRES_ARM_NEON;
24549 for (uint32_t n = 16; n <= 24; n += 8) {
24550 for (size_t k = 1; k <= 40; k += 9) {
24551 GemmMicrokernelTester()
24552 .mr(6)
24553 .nr(8)
24554 .kr(1)
24555 .sr(1)
24556 .m(6)
24557 .n(n)
24558 .k(k)
24559 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024560 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024561 }
24562 }
24563 }
24564
24565 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
24566 TEST_REQUIRES_ARM_NEON;
24567 for (uint32_t n = 16; n <= 24; n += 8) {
24568 for (size_t k = 1; k <= 40; k += 9) {
24569 for (uint32_t m = 1; m <= 6; m++) {
24570 GemmMicrokernelTester()
24571 .mr(6)
24572 .nr(8)
24573 .kr(1)
24574 .sr(1)
24575 .m(m)
24576 .n(n)
24577 .k(k)
24578 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024580 }
24581 }
24582 }
24583 }
24584
24585 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
24586 TEST_REQUIRES_ARM_NEON;
24587 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024588 for (uint32_t n = 1; n <= 8; n++) {
24589 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024590 GemmMicrokernelTester()
24591 .mr(6)
24592 .nr(8)
24593 .kr(1)
24594 .sr(1)
24595 .m(m)
24596 .n(n)
24597 .k(k)
24598 .cm_stride(11)
24599 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024601 }
24602 }
24603 }
24604 }
24605
24606 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmin) {
24607 TEST_REQUIRES_ARM_NEON;
24608 GemmMicrokernelTester()
24609 .mr(6)
24610 .nr(8)
24611 .kr(1)
24612 .sr(1)
24613 .m(6)
24614 .n(8)
24615 .k(8)
24616 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024618 }
24619
24620 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmax) {
24621 TEST_REQUIRES_ARM_NEON;
24622 GemmMicrokernelTester()
24623 .mr(6)
24624 .nr(8)
24625 .kr(1)
24626 .sr(1)
24627 .m(6)
24628 .n(8)
24629 .k(8)
24630 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024632 }
24633
24634 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm) {
24635 TEST_REQUIRES_ARM_NEON;
24636 GemmMicrokernelTester()
24637 .mr(6)
24638 .nr(8)
24639 .kr(1)
24640 .sr(1)
24641 .m(6)
24642 .n(8)
24643 .k(8)
24644 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024645 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024646 }
24647#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
24648
24649
24650#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -080024651 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
24652 TEST_REQUIRES_ARM_NEON;
24653 GemmMicrokernelTester()
24654 .mr(3)
24655 .nr(8)
24656 .kr(1)
24657 .sr(1)
24658 .m(3)
24659 .n(8)
24660 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024661 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024662 }
24663
24664 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
24665 TEST_REQUIRES_ARM_NEON;
24666 GemmMicrokernelTester()
24667 .mr(3)
24668 .nr(8)
24669 .kr(1)
24670 .sr(1)
24671 .m(3)
24672 .n(8)
24673 .k(8)
24674 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024675 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024676 }
24677
24678 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
24679 TEST_REQUIRES_ARM_NEON;
24680 GemmMicrokernelTester()
24681 .mr(3)
24682 .nr(8)
24683 .kr(1)
24684 .sr(1)
24685 .m(3)
24686 .n(8)
24687 .k(8)
24688 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024690 }
24691
24692 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
24693 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024694 for (uint32_t n = 1; n <= 8; n++) {
24695 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024696 GemmMicrokernelTester()
24697 .mr(3)
24698 .nr(8)
24699 .kr(1)
24700 .sr(1)
24701 .m(m)
24702 .n(n)
24703 .k(8)
24704 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024706 }
24707 }
24708 }
24709
24710 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
24711 TEST_REQUIRES_ARM_NEON;
24712 for (uint32_t m = 1; m <= 3; m++) {
24713 GemmMicrokernelTester()
24714 .mr(3)
24715 .nr(8)
24716 .kr(1)
24717 .sr(1)
24718 .m(m)
24719 .n(8)
24720 .k(8)
24721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024723 }
24724 }
24725
24726 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
24727 TEST_REQUIRES_ARM_NEON;
24728 for (uint32_t n = 1; n <= 8; n++) {
24729 GemmMicrokernelTester()
24730 .mr(3)
24731 .nr(8)
24732 .kr(1)
24733 .sr(1)
24734 .m(3)
24735 .n(n)
24736 .k(8)
24737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024738 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024739 }
24740 }
24741
24742 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
24743 TEST_REQUIRES_ARM_NEON;
24744 for (size_t k = 1; k < 8; k++) {
24745 GemmMicrokernelTester()
24746 .mr(3)
24747 .nr(8)
24748 .kr(1)
24749 .sr(1)
24750 .m(3)
24751 .n(8)
24752 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024753 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024754 }
24755 }
24756
24757 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
24758 TEST_REQUIRES_ARM_NEON;
24759 for (size_t k = 1; k < 8; k++) {
24760 GemmMicrokernelTester()
24761 .mr(3)
24762 .nr(8)
24763 .kr(1)
24764 .sr(1)
24765 .m(3)
24766 .n(8)
24767 .k(k)
24768 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024769 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024770 }
24771 }
24772
24773 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
24774 TEST_REQUIRES_ARM_NEON;
24775 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024776 for (uint32_t n = 1; n <= 8; n++) {
24777 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024778 GemmMicrokernelTester()
24779 .mr(3)
24780 .nr(8)
24781 .kr(1)
24782 .sr(1)
24783 .m(m)
24784 .n(n)
24785 .k(k)
24786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024788 }
24789 }
24790 }
24791 }
24792
24793 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
24794 TEST_REQUIRES_ARM_NEON;
24795 for (size_t k = 9; k < 16; k++) {
24796 GemmMicrokernelTester()
24797 .mr(3)
24798 .nr(8)
24799 .kr(1)
24800 .sr(1)
24801 .m(3)
24802 .n(8)
24803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024804 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024805 }
24806 }
24807
24808 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
24809 TEST_REQUIRES_ARM_NEON;
24810 for (size_t k = 9; k < 16; k++) {
24811 GemmMicrokernelTester()
24812 .mr(3)
24813 .nr(8)
24814 .kr(1)
24815 .sr(1)
24816 .m(3)
24817 .n(8)
24818 .k(k)
24819 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024820 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024821 }
24822 }
24823
24824 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
24825 TEST_REQUIRES_ARM_NEON;
24826 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024827 for (uint32_t n = 1; n <= 8; n++) {
24828 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024829 GemmMicrokernelTester()
24830 .mr(3)
24831 .nr(8)
24832 .kr(1)
24833 .sr(1)
24834 .m(m)
24835 .n(n)
24836 .k(k)
24837 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024838 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024839 }
24840 }
24841 }
24842 }
24843
24844 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
24845 TEST_REQUIRES_ARM_NEON;
24846 for (size_t k = 16; k <= 80; k += 8) {
24847 GemmMicrokernelTester()
24848 .mr(3)
24849 .nr(8)
24850 .kr(1)
24851 .sr(1)
24852 .m(3)
24853 .n(8)
24854 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024855 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024856 }
24857 }
24858
24859 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
24860 TEST_REQUIRES_ARM_NEON;
24861 for (size_t k = 16; k <= 80; k += 8) {
24862 GemmMicrokernelTester()
24863 .mr(3)
24864 .nr(8)
24865 .kr(1)
24866 .sr(1)
24867 .m(3)
24868 .n(8)
24869 .k(k)
24870 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024871 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024872 }
24873 }
24874
24875 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
24876 TEST_REQUIRES_ARM_NEON;
24877 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024878 for (uint32_t n = 1; n <= 8; n++) {
24879 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024880 GemmMicrokernelTester()
24881 .mr(3)
24882 .nr(8)
24883 .kr(1)
24884 .sr(1)
24885 .m(m)
24886 .n(n)
24887 .k(k)
24888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024889 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024890 }
24891 }
24892 }
24893 }
24894
24895 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
24896 TEST_REQUIRES_ARM_NEON;
24897 for (uint32_t n = 9; n < 16; n++) {
24898 for (size_t k = 1; k <= 40; k += 9) {
24899 GemmMicrokernelTester()
24900 .mr(3)
24901 .nr(8)
24902 .kr(1)
24903 .sr(1)
24904 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024905 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024906 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024907 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024908 }
24909 }
24910 }
24911
24912 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
24913 TEST_REQUIRES_ARM_NEON;
24914 for (uint32_t n = 9; n < 16; n++) {
24915 for (size_t k = 1; k <= 40; k += 9) {
24916 GemmMicrokernelTester()
24917 .mr(3)
24918 .nr(8)
24919 .kr(1)
24920 .sr(1)
24921 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024922 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024923 .k(k)
24924 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024925 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024926 }
24927 }
24928 }
24929
24930 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
24931 TEST_REQUIRES_ARM_NEON;
24932 for (uint32_t n = 9; n < 16; n++) {
24933 for (size_t k = 1; k <= 40; k += 9) {
24934 GemmMicrokernelTester()
24935 .mr(3)
24936 .nr(8)
24937 .kr(1)
24938 .sr(1)
24939 .m(3)
24940 .n(n)
24941 .k(k)
24942 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024943 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024944 }
24945 }
24946 }
24947
24948 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
24949 TEST_REQUIRES_ARM_NEON;
24950 for (uint32_t n = 9; n < 16; n++) {
24951 for (size_t k = 1; k <= 40; k += 9) {
24952 for (uint32_t m = 1; m <= 3; m++) {
24953 GemmMicrokernelTester()
24954 .mr(3)
24955 .nr(8)
24956 .kr(1)
24957 .sr(1)
24958 .m(m)
24959 .n(n)
24960 .k(k)
24961 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024962 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024963 }
24964 }
24965 }
24966 }
24967
24968 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
24969 TEST_REQUIRES_ARM_NEON;
24970 for (uint32_t n = 16; n <= 24; n += 8) {
24971 for (size_t k = 1; k <= 40; k += 9) {
24972 GemmMicrokernelTester()
24973 .mr(3)
24974 .nr(8)
24975 .kr(1)
24976 .sr(1)
24977 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024978 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024979 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024981 }
24982 }
24983 }
24984
24985 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
24986 TEST_REQUIRES_ARM_NEON;
24987 for (uint32_t n = 16; n <= 24; n += 8) {
24988 for (size_t k = 1; k <= 40; k += 9) {
24989 GemmMicrokernelTester()
24990 .mr(3)
24991 .nr(8)
24992 .kr(1)
24993 .sr(1)
24994 .m(3)
24995 .n(n)
24996 .k(k)
24997 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024998 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024999 }
25000 }
25001 }
25002
25003 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
25004 TEST_REQUIRES_ARM_NEON;
25005 for (uint32_t n = 16; n <= 24; n += 8) {
25006 for (size_t k = 1; k <= 40; k += 9) {
25007 GemmMicrokernelTester()
25008 .mr(3)
25009 .nr(8)
25010 .kr(1)
25011 .sr(1)
25012 .m(3)
25013 .n(n)
25014 .k(k)
25015 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025016 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025017 }
25018 }
25019 }
25020
25021 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
25022 TEST_REQUIRES_ARM_NEON;
25023 for (uint32_t n = 16; n <= 24; n += 8) {
25024 for (size_t k = 1; k <= 40; k += 9) {
25025 for (uint32_t m = 1; m <= 3; m++) {
25026 GemmMicrokernelTester()
25027 .mr(3)
25028 .nr(8)
25029 .kr(1)
25030 .sr(1)
25031 .m(m)
25032 .n(n)
25033 .k(k)
25034 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025036 }
25037 }
25038 }
25039 }
25040
25041 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
25042 TEST_REQUIRES_ARM_NEON;
25043 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025044 for (uint32_t n = 1; n <= 8; n++) {
25045 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080025046 GemmMicrokernelTester()
25047 .mr(3)
25048 .nr(8)
25049 .kr(1)
25050 .sr(1)
25051 .m(m)
25052 .n(n)
25053 .k(k)
25054 .cm_stride(11)
25055 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025057 }
25058 }
25059 }
25060 }
25061
25062 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, qmin) {
25063 TEST_REQUIRES_ARM_NEON;
25064 GemmMicrokernelTester()
25065 .mr(3)
25066 .nr(8)
25067 .kr(1)
25068 .sr(1)
25069 .m(3)
25070 .n(8)
25071 .k(8)
25072 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025074 }
25075
25076 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, qmax) {
25077 TEST_REQUIRES_ARM_NEON;
25078 GemmMicrokernelTester()
25079 .mr(3)
25080 .nr(8)
25081 .kr(1)
25082 .sr(1)
25083 .m(3)
25084 .n(8)
25085 .k(8)
25086 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025088 }
25089
25090 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
25091 TEST_REQUIRES_ARM_NEON;
25092 GemmMicrokernelTester()
25093 .mr(3)
25094 .nr(8)
25095 .kr(1)
25096 .sr(1)
25097 .m(3)
25098 .n(8)
25099 .k(8)
25100 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025101 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025102 }
25103#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
25104
25105
25106#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -080025107 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
25108 TEST_REQUIRES_ARM_NEON;
25109 GemmMicrokernelTester()
25110 .mr(6)
25111 .nr(16)
25112 .kr(1)
25113 .sr(1)
25114 .m(6)
25115 .n(16)
25116 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025117 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025118 }
25119
25120 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
25121 TEST_REQUIRES_ARM_NEON;
25122 GemmMicrokernelTester()
25123 .mr(6)
25124 .nr(16)
25125 .kr(1)
25126 .sr(1)
25127 .m(6)
25128 .n(16)
25129 .k(8)
25130 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025131 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025132 }
25133
25134 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
25135 TEST_REQUIRES_ARM_NEON;
25136 GemmMicrokernelTester()
25137 .mr(6)
25138 .nr(16)
25139 .kr(1)
25140 .sr(1)
25141 .m(6)
25142 .n(16)
25143 .k(8)
25144 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025146 }
25147
25148 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
25149 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025150 for (uint32_t n = 1; n <= 16; n++) {
25151 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080025152 GemmMicrokernelTester()
25153 .mr(6)
25154 .nr(16)
25155 .kr(1)
25156 .sr(1)
25157 .m(m)
25158 .n(n)
25159 .k(8)
25160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025162 }
25163 }
25164 }
25165
25166 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
25167 TEST_REQUIRES_ARM_NEON;
25168 for (uint32_t m = 1; m <= 6; m++) {
25169 GemmMicrokernelTester()
25170 .mr(6)
25171 .nr(16)
25172 .kr(1)
25173 .sr(1)
25174 .m(m)
25175 .n(16)
25176 .k(8)
25177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025179 }
25180 }
25181
25182 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
25183 TEST_REQUIRES_ARM_NEON;
25184 for (uint32_t n = 1; n <= 16; n++) {
25185 GemmMicrokernelTester()
25186 .mr(6)
25187 .nr(16)
25188 .kr(1)
25189 .sr(1)
25190 .m(6)
25191 .n(n)
25192 .k(8)
25193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025194 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025195 }
25196 }
25197
25198 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
25199 TEST_REQUIRES_ARM_NEON;
25200 for (size_t k = 1; k < 8; k++) {
25201 GemmMicrokernelTester()
25202 .mr(6)
25203 .nr(16)
25204 .kr(1)
25205 .sr(1)
25206 .m(6)
25207 .n(16)
25208 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025209 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025210 }
25211 }
25212
25213 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
25214 TEST_REQUIRES_ARM_NEON;
25215 for (size_t k = 1; k < 8; k++) {
25216 GemmMicrokernelTester()
25217 .mr(6)
25218 .nr(16)
25219 .kr(1)
25220 .sr(1)
25221 .m(6)
25222 .n(16)
25223 .k(k)
25224 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025225 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025226 }
25227 }
25228
25229 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
25230 TEST_REQUIRES_ARM_NEON;
25231 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025232 for (uint32_t n = 1; n <= 16; n++) {
25233 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080025234 GemmMicrokernelTester()
25235 .mr(6)
25236 .nr(16)
25237 .kr(1)
25238 .sr(1)
25239 .m(m)
25240 .n(n)
25241 .k(k)
25242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025244 }
25245 }
25246 }
25247 }
25248
25249 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
25250 TEST_REQUIRES_ARM_NEON;
25251 for (size_t k = 9; k < 16; k++) {
25252 GemmMicrokernelTester()
25253 .mr(6)
25254 .nr(16)
25255 .kr(1)
25256 .sr(1)
25257 .m(6)
25258 .n(16)
25259 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025260 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025261 }
25262 }
25263
25264 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
25265 TEST_REQUIRES_ARM_NEON;
25266 for (size_t k = 9; k < 16; k++) {
25267 GemmMicrokernelTester()
25268 .mr(6)
25269 .nr(16)
25270 .kr(1)
25271 .sr(1)
25272 .m(6)
25273 .n(16)
25274 .k(k)
25275 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025276 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025277 }
25278 }
25279
25280 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
25281 TEST_REQUIRES_ARM_NEON;
25282 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025283 for (uint32_t n = 1; n <= 16; n++) {
25284 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080025285 GemmMicrokernelTester()
25286 .mr(6)
25287 .nr(16)
25288 .kr(1)
25289 .sr(1)
25290 .m(m)
25291 .n(n)
25292 .k(k)
25293 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025294 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025295 }
25296 }
25297 }
25298 }
25299
25300 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
25301 TEST_REQUIRES_ARM_NEON;
25302 for (size_t k = 16; k <= 80; k += 8) {
25303 GemmMicrokernelTester()
25304 .mr(6)
25305 .nr(16)
25306 .kr(1)
25307 .sr(1)
25308 .m(6)
25309 .n(16)
25310 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025311 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025312 }
25313 }
25314
25315 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
25316 TEST_REQUIRES_ARM_NEON;
25317 for (size_t k = 16; k <= 80; k += 8) {
25318 GemmMicrokernelTester()
25319 .mr(6)
25320 .nr(16)
25321 .kr(1)
25322 .sr(1)
25323 .m(6)
25324 .n(16)
25325 .k(k)
25326 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025327 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025328 }
25329 }
25330
25331 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
25332 TEST_REQUIRES_ARM_NEON;
25333 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025334 for (uint32_t n = 1; n <= 16; n++) {
25335 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080025336 GemmMicrokernelTester()
25337 .mr(6)
25338 .nr(16)
25339 .kr(1)
25340 .sr(1)
25341 .m(m)
25342 .n(n)
25343 .k(k)
25344 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025345 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025346 }
25347 }
25348 }
25349 }
25350
25351 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
25352 TEST_REQUIRES_ARM_NEON;
25353 for (uint32_t n = 17; n < 32; n++) {
25354 for (size_t k = 1; k <= 40; k += 9) {
25355 GemmMicrokernelTester()
25356 .mr(6)
25357 .nr(16)
25358 .kr(1)
25359 .sr(1)
25360 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025361 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080025362 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025363 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025364 }
25365 }
25366 }
25367
25368 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
25369 TEST_REQUIRES_ARM_NEON;
25370 for (uint32_t n = 17; n < 32; n++) {
25371 for (size_t k = 1; k <= 40; k += 9) {
25372 GemmMicrokernelTester()
25373 .mr(6)
25374 .nr(16)
25375 .kr(1)
25376 .sr(1)
25377 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025378 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080025379 .k(k)
25380 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025381 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025382 }
25383 }
25384 }
25385
25386 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
25387 TEST_REQUIRES_ARM_NEON;
25388 for (uint32_t n = 17; n < 32; n++) {
25389 for (size_t k = 1; k <= 40; k += 9) {
25390 GemmMicrokernelTester()
25391 .mr(6)
25392 .nr(16)
25393 .kr(1)
25394 .sr(1)
25395 .m(6)
25396 .n(n)
25397 .k(k)
25398 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025399 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025400 }
25401 }
25402 }
25403
25404 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
25405 TEST_REQUIRES_ARM_NEON;
25406 for (uint32_t n = 17; n < 32; n++) {
25407 for (size_t k = 1; k <= 40; k += 9) {
25408 for (uint32_t m = 1; m <= 6; m++) {
25409 GemmMicrokernelTester()
25410 .mr(6)
25411 .nr(16)
25412 .kr(1)
25413 .sr(1)
25414 .m(m)
25415 .n(n)
25416 .k(k)
25417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025418 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025419 }
25420 }
25421 }
25422 }
25423
25424 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
25425 TEST_REQUIRES_ARM_NEON;
25426 for (uint32_t n = 32; n <= 48; n += 16) {
25427 for (size_t k = 1; k <= 40; k += 9) {
25428 GemmMicrokernelTester()
25429 .mr(6)
25430 .nr(16)
25431 .kr(1)
25432 .sr(1)
25433 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025434 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080025435 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025436 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025437 }
25438 }
25439 }
25440
25441 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
25442 TEST_REQUIRES_ARM_NEON;
25443 for (uint32_t n = 32; n <= 48; n += 16) {
25444 for (size_t k = 1; k <= 40; k += 9) {
25445 GemmMicrokernelTester()
25446 .mr(6)
25447 .nr(16)
25448 .kr(1)
25449 .sr(1)
25450 .m(6)
25451 .n(n)
25452 .k(k)
25453 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025454 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025455 }
25456 }
25457 }
25458
25459 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
25460 TEST_REQUIRES_ARM_NEON;
25461 for (uint32_t n = 32; n <= 48; n += 16) {
25462 for (size_t k = 1; k <= 40; k += 9) {
25463 GemmMicrokernelTester()
25464 .mr(6)
25465 .nr(16)
25466 .kr(1)
25467 .sr(1)
25468 .m(6)
25469 .n(n)
25470 .k(k)
25471 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025472 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025473 }
25474 }
25475 }
25476
25477 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
25478 TEST_REQUIRES_ARM_NEON;
25479 for (uint32_t n = 32; n <= 48; n += 16) {
25480 for (size_t k = 1; k <= 40; k += 9) {
25481 for (uint32_t m = 1; m <= 6; m++) {
25482 GemmMicrokernelTester()
25483 .mr(6)
25484 .nr(16)
25485 .kr(1)
25486 .sr(1)
25487 .m(m)
25488 .n(n)
25489 .k(k)
25490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025492 }
25493 }
25494 }
25495 }
25496
25497 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
25498 TEST_REQUIRES_ARM_NEON;
25499 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025500 for (uint32_t n = 1; n <= 16; n++) {
25501 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080025502 GemmMicrokernelTester()
25503 .mr(6)
25504 .nr(16)
25505 .kr(1)
25506 .sr(1)
25507 .m(m)
25508 .n(n)
25509 .k(k)
25510 .cm_stride(19)
25511 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025513 }
25514 }
25515 }
25516 }
25517
25518 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, qmin) {
25519 TEST_REQUIRES_ARM_NEON;
25520 GemmMicrokernelTester()
25521 .mr(6)
25522 .nr(16)
25523 .kr(1)
25524 .sr(1)
25525 .m(6)
25526 .n(16)
25527 .k(8)
25528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025530 }
25531
25532 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, qmax) {
25533 TEST_REQUIRES_ARM_NEON;
25534 GemmMicrokernelTester()
25535 .mr(6)
25536 .nr(16)
25537 .kr(1)
25538 .sr(1)
25539 .m(6)
25540 .n(16)
25541 .k(8)
25542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025544 }
25545
25546 TEST(QS8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
25547 TEST_REQUIRES_ARM_NEON;
25548 GemmMicrokernelTester()
25549 .mr(6)
25550 .nr(16)
25551 .kr(1)
25552 .sr(1)
25553 .m(6)
25554 .n(16)
25555 .k(8)
25556 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025557 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080025558 }
25559#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
25560
25561
25562#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard510b8e02021-07-26 17:25:18 -070025563 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
25564 TEST_REQUIRES_ARM_NEON;
25565 GemmMicrokernelTester()
25566 .mr(1)
25567 .nr(8)
25568 .kr(1)
25569 .sr(1)
25570 .m(1)
25571 .n(8)
25572 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025573 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025574 }
25575
25576 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
25577 TEST_REQUIRES_ARM_NEON;
25578 GemmMicrokernelTester()
25579 .mr(1)
25580 .nr(8)
25581 .kr(1)
25582 .sr(1)
25583 .m(1)
25584 .n(8)
25585 .k(8)
25586 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025587 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025588 }
25589
25590 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
25591 TEST_REQUIRES_ARM_NEON;
25592 GemmMicrokernelTester()
25593 .mr(1)
25594 .nr(8)
25595 .kr(1)
25596 .sr(1)
25597 .m(1)
25598 .n(8)
25599 .k(8)
25600 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025602 }
25603
25604 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
25605 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025606 for (uint32_t n = 1; n <= 8; n++) {
25607 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025608 GemmMicrokernelTester()
25609 .mr(1)
25610 .nr(8)
25611 .kr(1)
25612 .sr(1)
25613 .m(m)
25614 .n(n)
25615 .k(8)
25616 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025618 }
25619 }
25620 }
25621
25622 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
25623 TEST_REQUIRES_ARM_NEON;
25624 for (uint32_t m = 1; m <= 1; m++) {
25625 GemmMicrokernelTester()
25626 .mr(1)
25627 .nr(8)
25628 .kr(1)
25629 .sr(1)
25630 .m(m)
25631 .n(8)
25632 .k(8)
25633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025635 }
25636 }
25637
25638 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
25639 TEST_REQUIRES_ARM_NEON;
25640 for (uint32_t n = 1; n <= 8; n++) {
25641 GemmMicrokernelTester()
25642 .mr(1)
25643 .nr(8)
25644 .kr(1)
25645 .sr(1)
25646 .m(1)
25647 .n(n)
25648 .k(8)
25649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025650 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025651 }
25652 }
25653
25654 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
25655 TEST_REQUIRES_ARM_NEON;
25656 for (size_t k = 1; k < 8; k++) {
25657 GemmMicrokernelTester()
25658 .mr(1)
25659 .nr(8)
25660 .kr(1)
25661 .sr(1)
25662 .m(1)
25663 .n(8)
25664 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025665 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025666 }
25667 }
25668
25669 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
25670 TEST_REQUIRES_ARM_NEON;
25671 for (size_t k = 1; k < 8; k++) {
25672 GemmMicrokernelTester()
25673 .mr(1)
25674 .nr(8)
25675 .kr(1)
25676 .sr(1)
25677 .m(1)
25678 .n(8)
25679 .k(k)
25680 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025681 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025682 }
25683 }
25684
25685 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
25686 TEST_REQUIRES_ARM_NEON;
25687 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025688 for (uint32_t n = 1; n <= 8; n++) {
25689 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025690 GemmMicrokernelTester()
25691 .mr(1)
25692 .nr(8)
25693 .kr(1)
25694 .sr(1)
25695 .m(m)
25696 .n(n)
25697 .k(k)
25698 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025700 }
25701 }
25702 }
25703 }
25704
25705 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
25706 TEST_REQUIRES_ARM_NEON;
25707 for (size_t k = 9; k < 16; k++) {
25708 GemmMicrokernelTester()
25709 .mr(1)
25710 .nr(8)
25711 .kr(1)
25712 .sr(1)
25713 .m(1)
25714 .n(8)
25715 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025716 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025717 }
25718 }
25719
25720 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
25721 TEST_REQUIRES_ARM_NEON;
25722 for (size_t k = 9; k < 16; k++) {
25723 GemmMicrokernelTester()
25724 .mr(1)
25725 .nr(8)
25726 .kr(1)
25727 .sr(1)
25728 .m(1)
25729 .n(8)
25730 .k(k)
25731 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025732 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025733 }
25734 }
25735
25736 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
25737 TEST_REQUIRES_ARM_NEON;
25738 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025739 for (uint32_t n = 1; n <= 8; n++) {
25740 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025741 GemmMicrokernelTester()
25742 .mr(1)
25743 .nr(8)
25744 .kr(1)
25745 .sr(1)
25746 .m(m)
25747 .n(n)
25748 .k(k)
25749 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025750 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025751 }
25752 }
25753 }
25754 }
25755
25756 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
25757 TEST_REQUIRES_ARM_NEON;
25758 for (size_t k = 16; k <= 80; k += 8) {
25759 GemmMicrokernelTester()
25760 .mr(1)
25761 .nr(8)
25762 .kr(1)
25763 .sr(1)
25764 .m(1)
25765 .n(8)
25766 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025767 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025768 }
25769 }
25770
25771 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
25772 TEST_REQUIRES_ARM_NEON;
25773 for (size_t k = 16; k <= 80; k += 8) {
25774 GemmMicrokernelTester()
25775 .mr(1)
25776 .nr(8)
25777 .kr(1)
25778 .sr(1)
25779 .m(1)
25780 .n(8)
25781 .k(k)
25782 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025783 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025784 }
25785 }
25786
25787 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
25788 TEST_REQUIRES_ARM_NEON;
25789 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025790 for (uint32_t n = 1; n <= 8; n++) {
25791 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025792 GemmMicrokernelTester()
25793 .mr(1)
25794 .nr(8)
25795 .kr(1)
25796 .sr(1)
25797 .m(m)
25798 .n(n)
25799 .k(k)
25800 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025801 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025802 }
25803 }
25804 }
25805 }
25806
25807 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
25808 TEST_REQUIRES_ARM_NEON;
25809 for (uint32_t n = 9; n < 16; n++) {
25810 for (size_t k = 1; k <= 40; k += 9) {
25811 GemmMicrokernelTester()
25812 .mr(1)
25813 .nr(8)
25814 .kr(1)
25815 .sr(1)
25816 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025817 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025818 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025819 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025820 }
25821 }
25822 }
25823
25824 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
25825 TEST_REQUIRES_ARM_NEON;
25826 for (uint32_t n = 9; n < 16; n++) {
25827 for (size_t k = 1; k <= 40; k += 9) {
25828 GemmMicrokernelTester()
25829 .mr(1)
25830 .nr(8)
25831 .kr(1)
25832 .sr(1)
25833 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025834 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025835 .k(k)
25836 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025837 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025838 }
25839 }
25840 }
25841
25842 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
25843 TEST_REQUIRES_ARM_NEON;
25844 for (uint32_t n = 9; n < 16; n++) {
25845 for (size_t k = 1; k <= 40; k += 9) {
25846 GemmMicrokernelTester()
25847 .mr(1)
25848 .nr(8)
25849 .kr(1)
25850 .sr(1)
25851 .m(1)
25852 .n(n)
25853 .k(k)
25854 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025855 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025856 }
25857 }
25858 }
25859
25860 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
25861 TEST_REQUIRES_ARM_NEON;
25862 for (uint32_t n = 9; n < 16; n++) {
25863 for (size_t k = 1; k <= 40; k += 9) {
25864 for (uint32_t m = 1; m <= 1; m++) {
25865 GemmMicrokernelTester()
25866 .mr(1)
25867 .nr(8)
25868 .kr(1)
25869 .sr(1)
25870 .m(m)
25871 .n(n)
25872 .k(k)
25873 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025874 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025875 }
25876 }
25877 }
25878 }
25879
25880 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
25881 TEST_REQUIRES_ARM_NEON;
25882 for (uint32_t n = 16; n <= 24; n += 8) {
25883 for (size_t k = 1; k <= 40; k += 9) {
25884 GemmMicrokernelTester()
25885 .mr(1)
25886 .nr(8)
25887 .kr(1)
25888 .sr(1)
25889 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025890 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025891 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025892 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025893 }
25894 }
25895 }
25896
25897 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
25898 TEST_REQUIRES_ARM_NEON;
25899 for (uint32_t n = 16; n <= 24; n += 8) {
25900 for (size_t k = 1; k <= 40; k += 9) {
25901 GemmMicrokernelTester()
25902 .mr(1)
25903 .nr(8)
25904 .kr(1)
25905 .sr(1)
25906 .m(1)
25907 .n(n)
25908 .k(k)
25909 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025910 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025911 }
25912 }
25913 }
25914
25915 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
25916 TEST_REQUIRES_ARM_NEON;
25917 for (uint32_t n = 16; n <= 24; n += 8) {
25918 for (size_t k = 1; k <= 40; k += 9) {
25919 GemmMicrokernelTester()
25920 .mr(1)
25921 .nr(8)
25922 .kr(1)
25923 .sr(1)
25924 .m(1)
25925 .n(n)
25926 .k(k)
25927 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025928 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025929 }
25930 }
25931 }
25932
25933 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
25934 TEST_REQUIRES_ARM_NEON;
25935 for (uint32_t n = 16; n <= 24; n += 8) {
25936 for (size_t k = 1; k <= 40; k += 9) {
25937 for (uint32_t m = 1; m <= 1; m++) {
25938 GemmMicrokernelTester()
25939 .mr(1)
25940 .nr(8)
25941 .kr(1)
25942 .sr(1)
25943 .m(m)
25944 .n(n)
25945 .k(k)
25946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025948 }
25949 }
25950 }
25951 }
25952
25953 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
25954 TEST_REQUIRES_ARM_NEON;
25955 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025956 for (uint32_t n = 1; n <= 8; n++) {
25957 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025958 GemmMicrokernelTester()
25959 .mr(1)
25960 .nr(8)
25961 .kr(1)
25962 .sr(1)
25963 .m(m)
25964 .n(n)
25965 .k(k)
25966 .cm_stride(11)
25967 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025969 }
25970 }
25971 }
25972 }
25973
25974 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, qmin) {
25975 TEST_REQUIRES_ARM_NEON;
25976 GemmMicrokernelTester()
25977 .mr(1)
25978 .nr(8)
25979 .kr(1)
25980 .sr(1)
25981 .m(1)
25982 .n(8)
25983 .k(8)
25984 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025986 }
25987
25988 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, qmax) {
25989 TEST_REQUIRES_ARM_NEON;
25990 GemmMicrokernelTester()
25991 .mr(1)
25992 .nr(8)
25993 .kr(1)
25994 .sr(1)
25995 .m(1)
25996 .n(8)
25997 .k(8)
25998 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070026000 }
26001
26002 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
26003 TEST_REQUIRES_ARM_NEON;
26004 GemmMicrokernelTester()
26005 .mr(1)
26006 .nr(8)
26007 .kr(1)
26008 .sr(1)
26009 .m(1)
26010 .n(8)
26011 .k(8)
26012 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026013 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070026014 }
26015#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
26016
26017
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026018#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
26019 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
26020 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026021 GemmMicrokernelTester()
26022 .mr(4)
26023 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026024 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026025 .sr(1)
26026 .m(4)
26027 .n(8)
26028 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026029 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026030 }
26031
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026032 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
26033 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026034 GemmMicrokernelTester()
26035 .mr(4)
26036 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026037 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026038 .sr(1)
26039 .m(4)
26040 .n(8)
26041 .k(8)
26042 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026043 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026044 }
26045
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026046 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_strided_a) {
26047 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026048 GemmMicrokernelTester()
26049 .mr(4)
26050 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026051 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026052 .sr(1)
26053 .m(4)
26054 .n(8)
26055 .k(8)
26056 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026057 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026058 }
26059
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026060 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
26061 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026062 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026063 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026064 GemmMicrokernelTester()
26065 .mr(4)
26066 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026067 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026068 .sr(1)
26069 .m(m)
26070 .n(n)
26071 .k(8)
26072 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026073 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026074 }
26075 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026076 }
26077
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026078 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
26079 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026080 for (uint32_t m = 1; m <= 4; m++) {
26081 GemmMicrokernelTester()
26082 .mr(4)
26083 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026084 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026085 .sr(1)
26086 .m(m)
26087 .n(8)
26088 .k(8)
26089 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026090 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026091 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026092 }
26093
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026094 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
26095 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026096 for (uint32_t n = 1; n <= 8; n++) {
26097 GemmMicrokernelTester()
26098 .mr(4)
26099 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026100 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026101 .sr(1)
26102 .m(4)
26103 .n(n)
26104 .k(8)
26105 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026106 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026107 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026108 }
26109
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026110 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
26111 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026112 for (size_t k = 1; k < 8; k++) {
26113 GemmMicrokernelTester()
26114 .mr(4)
26115 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026116 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026117 .sr(1)
26118 .m(4)
26119 .n(8)
26120 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026121 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026122 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026123 }
26124
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026125 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_strided_a) {
26126 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026127 for (size_t k = 1; k < 8; k++) {
26128 GemmMicrokernelTester()
26129 .mr(4)
26130 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026131 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026132 .sr(1)
26133 .m(4)
26134 .n(8)
26135 .k(k)
26136 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026137 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026138 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026139 }
26140
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026141 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
26142 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026143 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026144 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026145 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026146 GemmMicrokernelTester()
26147 .mr(4)
26148 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026149 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026150 .sr(1)
26151 .m(m)
26152 .n(n)
26153 .k(k)
26154 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026155 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026156 }
26157 }
26158 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026159 }
26160
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026161 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
26162 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026163 for (size_t k = 9; k < 16; k++) {
26164 GemmMicrokernelTester()
26165 .mr(4)
26166 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026167 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026168 .sr(1)
26169 .m(4)
26170 .n(8)
26171 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026172 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026173 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026174 }
26175
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026176 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_strided_a) {
26177 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026178 for (size_t k = 9; k < 16; k++) {
26179 GemmMicrokernelTester()
26180 .mr(4)
26181 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026182 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026183 .sr(1)
26184 .m(4)
26185 .n(8)
26186 .k(k)
26187 .a_stride(19)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026188 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026189 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026190 }
26191
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026192 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
26193 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026194 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026195 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026196 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026197 GemmMicrokernelTester()
26198 .mr(4)
26199 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026200 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026201 .sr(1)
26202 .m(m)
26203 .n(n)
26204 .k(k)
26205 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026206 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026207 }
26208 }
26209 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026210 }
26211
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026212 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
26213 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026214 for (size_t k = 16; k <= 80; k += 8) {
26215 GemmMicrokernelTester()
26216 .mr(4)
26217 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026218 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026219 .sr(1)
26220 .m(4)
26221 .n(8)
26222 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026223 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026224 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026225 }
26226
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026227 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_strided_a) {
26228 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026229 for (size_t k = 16; k <= 80; k += 8) {
26230 GemmMicrokernelTester()
26231 .mr(4)
26232 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026233 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026234 .sr(1)
26235 .m(4)
26236 .n(8)
26237 .k(k)
26238 .a_stride(83)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026239 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026240 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026241 }
26242
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026243 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
26244 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026245 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026246 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026247 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026248 GemmMicrokernelTester()
26249 .mr(4)
26250 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026251 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026252 .sr(1)
26253 .m(m)
26254 .n(n)
26255 .k(k)
26256 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026257 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026258 }
26259 }
26260 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026261 }
26262
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026263 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
26264 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026265 for (uint32_t n = 9; n < 16; n++) {
26266 for (size_t k = 1; k <= 40; k += 9) {
26267 GemmMicrokernelTester()
26268 .mr(4)
26269 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026270 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026271 .sr(1)
26272 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026273 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026274 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026275 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026276 }
26277 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026278 }
26279
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026280 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
26281 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026282 for (uint32_t n = 9; n < 16; n++) {
26283 for (size_t k = 1; k <= 40; k += 9) {
26284 GemmMicrokernelTester()
26285 .mr(4)
26286 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026287 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026288 .sr(1)
26289 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026290 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026291 .k(k)
26292 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026293 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026294 }
26295 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026296 }
26297
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026298 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_a) {
26299 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026300 for (uint32_t n = 9; n < 16; n++) {
26301 for (size_t k = 1; k <= 40; k += 9) {
26302 GemmMicrokernelTester()
26303 .mr(4)
26304 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026305 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026306 .sr(1)
26307 .m(4)
26308 .n(n)
26309 .k(k)
26310 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026311 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026312 }
26313 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026314 }
26315
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026316 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
26317 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026318 for (uint32_t n = 9; n < 16; n++) {
26319 for (size_t k = 1; k <= 40; k += 9) {
26320 for (uint32_t m = 1; m <= 4; m++) {
26321 GemmMicrokernelTester()
26322 .mr(4)
26323 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026324 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026325 .sr(1)
26326 .m(m)
26327 .n(n)
26328 .k(k)
26329 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026330 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026331 }
26332 }
26333 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026334 }
26335
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026336 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
26337 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026338 for (uint32_t n = 16; n <= 24; n += 8) {
26339 for (size_t k = 1; k <= 40; k += 9) {
26340 GemmMicrokernelTester()
26341 .mr(4)
26342 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026343 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026344 .sr(1)
26345 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026346 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026347 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026348 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026349 }
26350 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026351 }
26352
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026353 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
26354 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026355 for (uint32_t n = 16; n <= 24; n += 8) {
26356 for (size_t k = 1; k <= 40; k += 9) {
26357 GemmMicrokernelTester()
26358 .mr(4)
26359 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026360 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026361 .sr(1)
26362 .m(4)
26363 .n(n)
26364 .k(k)
26365 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026366 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026367 }
26368 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026369 }
26370
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026371 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_a) {
26372 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026373 for (uint32_t n = 16; n <= 24; n += 8) {
26374 for (size_t k = 1; k <= 40; k += 9) {
26375 GemmMicrokernelTester()
26376 .mr(4)
26377 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026378 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026379 .sr(1)
26380 .m(4)
26381 .n(n)
26382 .k(k)
26383 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026384 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026385 }
26386 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026387 }
26388
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026389 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
26390 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026391 for (uint32_t n = 16; n <= 24; n += 8) {
26392 for (size_t k = 1; k <= 40; k += 9) {
26393 for (uint32_t m = 1; m <= 4; m++) {
26394 GemmMicrokernelTester()
26395 .mr(4)
26396 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026397 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026398 .sr(1)
26399 .m(m)
26400 .n(n)
26401 .k(k)
26402 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026403 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026404 }
26405 }
26406 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026407 }
26408
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026409 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
26410 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026411 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026412 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026413 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026414 GemmMicrokernelTester()
26415 .mr(4)
26416 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026417 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026418 .sr(1)
26419 .m(m)
26420 .n(n)
26421 .k(k)
26422 .cm_stride(11)
26423 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026424 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026425 }
26426 }
26427 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026428 }
26429
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026430 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
26431 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026432 GemmMicrokernelTester()
26433 .mr(4)
26434 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026435 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026436 .sr(1)
26437 .m(4)
26438 .n(8)
26439 .k(8)
26440 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026441 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026442 }
26443
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026444 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
26445 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026446 GemmMicrokernelTester()
26447 .mr(4)
26448 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026449 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026450 .sr(1)
26451 .m(4)
26452 .n(8)
26453 .k(8)
26454 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026455 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026456 }
26457
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026458 TEST(GENERATE_QS8_GEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
26459 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026460 GemmMicrokernelTester()
26461 .mr(4)
26462 .nr(8)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026463 .kr(4)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026464 .sr(1)
26465 .m(4)
26466 .n(8)
26467 .k(8)
26468 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080026469 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080026470 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026471#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT