blob: 05e7cf045888ac49438ac9a335b570ec0d218443 [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-gemminc-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Zhi An Ngb43b47a2021-12-23 16:27:22 -080016#include <xnnpack/allocator.h>
Marat Dukhan1c587112020-04-08 20:04:28 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -070027 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028 TEST_REQUIRES_ARM_NEON_FMA;
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070037 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070038 }
39
Marat Dukhande06f492020-04-09 00:19:31 -070040 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070041 TEST_REQUIRES_ARM_NEON_FMA;
42 GemmMicrokernelTester()
43 .mr(1)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(1)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070051 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070052 }
53
Marat Dukhande06f492020-04-09 00:19:31 -070054 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070055 TEST_REQUIRES_ARM_NEON_FMA;
56 GemmMicrokernelTester()
57 .mr(1)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(1)
62 .n(8)
63 .k(8)
64 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070065 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070066 }
67
Marat Dukhande06f492020-04-09 00:19:31 -070068 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070069 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070072 GemmMicrokernelTester()
73 .mr(1)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(8)
80 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070081 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070082 }
83 }
84 }
85
Marat Dukhande06f492020-04-09 00:19:31 -070086 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070087 TEST_REQUIRES_ARM_NEON_FMA;
88 for (uint32_t m = 1; m <= 1; m++) {
89 GemmMicrokernelTester()
90 .mr(1)
91 .nr(8)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(8)
97 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070098 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070099 }
100 }
101
Marat Dukhande06f492020-04-09 00:19:31 -0700102 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700103 TEST_REQUIRES_ARM_NEON_FMA;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(1)
107 .nr(8)
108 .kr(1)
109 .sr(1)
110 .m(1)
111 .n(n)
112 .k(8)
113 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700114 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700115 }
116 }
117
Marat Dukhande06f492020-04-09 00:19:31 -0700118 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700119 TEST_REQUIRES_ARM_NEON_FMA;
120 GemmMicrokernelTester()
121 .mr(1)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(1)
126 .n(8)
127 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700128 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700129 }
130
Marat Dukhande06f492020-04-09 00:19:31 -0700131 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700132 TEST_REQUIRES_ARM_NEON_FMA;
133 GemmMicrokernelTester()
134 .mr(1)
135 .nr(8)
136 .kr(1)
137 .sr(1)
138 .m(1)
139 .n(8)
140 .k(16)
141 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700142 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700143 }
144
Marat Dukhande06f492020-04-09 00:19:31 -0700145 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700146 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800147 for (uint32_t n = 1; n <= 8; n++) {
148 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700149 GemmMicrokernelTester()
150 .mr(1)
151 .nr(8)
152 .kr(1)
153 .sr(1)
154 .m(m)
155 .n(n)
156 .k(16)
157 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700158 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700159 }
160 }
161 }
162
Marat Dukhande06f492020-04-09 00:19:31 -0700163 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700164 TEST_REQUIRES_ARM_NEON_FMA;
165 for (size_t k = 1; k < 16; k++) {
166 GemmMicrokernelTester()
167 .mr(1)
168 .nr(8)
169 .kr(1)
170 .sr(1)
171 .m(1)
172 .n(8)
173 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700174 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700175 }
176 }
177
Marat Dukhande06f492020-04-09 00:19:31 -0700178 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700179 TEST_REQUIRES_ARM_NEON_FMA;
180 for (size_t k = 1; k < 16; k++) {
181 GemmMicrokernelTester()
182 .mr(1)
183 .nr(8)
184 .kr(1)
185 .sr(1)
186 .m(1)
187 .n(8)
188 .k(k)
189 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700190 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700191 }
192 }
193
Marat Dukhande06f492020-04-09 00:19:31 -0700194 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700195 TEST_REQUIRES_ARM_NEON_FMA;
196 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800197 for (uint32_t n = 1; n <= 8; n++) {
198 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700199 GemmMicrokernelTester()
200 .mr(1)
201 .nr(8)
202 .kr(1)
203 .sr(1)
204 .m(m)
205 .n(n)
206 .k(k)
207 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700208 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700209 }
210 }
211 }
212 }
213
Marat Dukhande06f492020-04-09 00:19:31 -0700214 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700215 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800216 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700217 GemmMicrokernelTester()
218 .mr(1)
219 .nr(8)
220 .kr(1)
221 .sr(1)
222 .m(1)
223 .n(8)
224 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700225 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700226 }
227 }
228
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800229 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700230 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800231 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700232 GemmMicrokernelTester()
233 .mr(1)
234 .nr(8)
235 .kr(1)
236 .sr(1)
237 .m(1)
238 .n(8)
239 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800240 .a_stride(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700241 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700242 }
243 }
244
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800245 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700246 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800247 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800248 for (uint32_t n = 1; n <= 8; n++) {
249 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700250 GemmMicrokernelTester()
251 .mr(1)
252 .nr(8)
253 .kr(1)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700259 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700260 }
261 }
262 }
263 }
264
Marat Dukhande06f492020-04-09 00:19:31 -0700265 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700266 TEST_REQUIRES_ARM_NEON_FMA;
267 for (size_t k = 24; k <= 80; k += 8) {
268 GemmMicrokernelTester()
269 .mr(1)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(1)
274 .n(8)
275 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700276 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700277 }
278 }
279
Marat Dukhande06f492020-04-09 00:19:31 -0700280 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700281 TEST_REQUIRES_ARM_NEON_FMA;
282 for (size_t k = 24; k <= 80; k += 8) {
283 GemmMicrokernelTester()
284 .mr(1)
285 .nr(8)
286 .kr(1)
287 .sr(1)
288 .m(1)
289 .n(8)
290 .k(k)
291 .a_stride(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700292 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700293 }
294 }
295
Marat Dukhande06f492020-04-09 00:19:31 -0700296 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700297 TEST_REQUIRES_ARM_NEON_FMA;
298 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800299 for (uint32_t n = 1; n <= 8; n++) {
300 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700301 GemmMicrokernelTester()
302 .mr(1)
303 .nr(8)
304 .kr(1)
305 .sr(1)
306 .m(m)
307 .n(n)
308 .k(k)
309 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700310 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700311 }
312 }
313 }
314 }
315
Marat Dukhande06f492020-04-09 00:19:31 -0700316 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700317 TEST_REQUIRES_ARM_NEON_FMA;
318 for (uint32_t n = 9; n < 16; n++) {
319 for (size_t k = 1; k <= 40; k += 9) {
320 GemmMicrokernelTester()
321 .mr(1)
322 .nr(8)
323 .kr(1)
324 .sr(1)
325 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800326 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700327 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700328 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700329 }
330 }
331 }
332
Marat Dukhande06f492020-04-09 00:19:31 -0700333 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700334 TEST_REQUIRES_ARM_NEON_FMA;
335 for (uint32_t n = 9; n < 16; n++) {
336 for (size_t k = 1; k <= 40; k += 9) {
337 GemmMicrokernelTester()
338 .mr(1)
339 .nr(8)
340 .kr(1)
341 .sr(1)
342 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800343 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700344 .k(k)
345 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700346 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700347 }
348 }
349 }
350
Marat Dukhande06f492020-04-09 00:19:31 -0700351 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700352 TEST_REQUIRES_ARM_NEON_FMA;
353 for (uint32_t n = 9; n < 16; n++) {
354 for (size_t k = 1; k <= 40; k += 9) {
355 GemmMicrokernelTester()
356 .mr(1)
357 .nr(8)
358 .kr(1)
359 .sr(1)
360 .m(1)
361 .n(n)
362 .k(k)
363 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700364 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700365 }
366 }
367 }
368
Marat Dukhande06f492020-04-09 00:19:31 -0700369 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700370 TEST_REQUIRES_ARM_NEON_FMA;
371 for (uint32_t n = 9; n < 16; n++) {
372 for (size_t k = 1; k <= 40; k += 9) {
373 for (uint32_t m = 1; m <= 1; m++) {
374 GemmMicrokernelTester()
375 .mr(1)
376 .nr(8)
377 .kr(1)
378 .sr(1)
379 .m(m)
380 .n(n)
381 .k(k)
382 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700383 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700384 }
385 }
386 }
387 }
388
Marat Dukhande06f492020-04-09 00:19:31 -0700389 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700390 TEST_REQUIRES_ARM_NEON_FMA;
391 for (uint32_t n = 16; n <= 24; n += 8) {
392 for (size_t k = 1; k <= 40; k += 9) {
393 GemmMicrokernelTester()
394 .mr(1)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800399 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700400 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700401 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700402 }
403 }
404 }
405
Marat Dukhande06f492020-04-09 00:19:31 -0700406 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700407 TEST_REQUIRES_ARM_NEON_FMA;
408 for (uint32_t n = 16; n <= 24; n += 8) {
409 for (size_t k = 1; k <= 40; k += 9) {
410 GemmMicrokernelTester()
411 .mr(1)
412 .nr(8)
413 .kr(1)
414 .sr(1)
415 .m(1)
416 .n(n)
417 .k(k)
418 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700419 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700420 }
421 }
422 }
423
Marat Dukhande06f492020-04-09 00:19:31 -0700424 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700425 TEST_REQUIRES_ARM_NEON_FMA;
426 for (uint32_t n = 16; n <= 24; n += 8) {
427 for (size_t k = 1; k <= 40; k += 9) {
428 GemmMicrokernelTester()
429 .mr(1)
430 .nr(8)
431 .kr(1)
432 .sr(1)
433 .m(1)
434 .n(n)
435 .k(k)
436 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700437 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700438 }
439 }
440 }
441
Marat Dukhande06f492020-04-09 00:19:31 -0700442 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700443 TEST_REQUIRES_ARM_NEON_FMA;
444 for (uint32_t n = 16; n <= 24; n += 8) {
445 for (size_t k = 1; k <= 40; k += 9) {
446 for (uint32_t m = 1; m <= 1; m++) {
447 GemmMicrokernelTester()
448 .mr(1)
449 .nr(8)
450 .kr(1)
451 .sr(1)
452 .m(m)
453 .n(n)
454 .k(k)
455 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700456 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700457 }
458 }
459 }
460 }
461
Marat Dukhande06f492020-04-09 00:19:31 -0700462 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700463 TEST_REQUIRES_ARM_NEON_FMA;
464 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800465 for (uint32_t n = 1; n <= 8; n++) {
466 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(m)
473 .n(n)
474 .k(k)
475 .cm_stride(11)
476 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700477 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700478 }
479 }
480 }
481 }
482
Marat Dukhande06f492020-04-09 00:19:31 -0700483 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700484 TEST_REQUIRES_ARM_NEON_FMA;
485 GemmMicrokernelTester()
486 .mr(1)
487 .nr(8)
488 .kr(1)
489 .sr(1)
490 .m(1)
491 .n(8)
492 .k(8)
493 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700494 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700495 }
496
Marat Dukhande06f492020-04-09 00:19:31 -0700497 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700498 TEST_REQUIRES_ARM_NEON_FMA;
499 GemmMicrokernelTester()
500 .mr(1)
501 .nr(8)
502 .kr(1)
503 .sr(1)
504 .m(1)
505 .n(8)
506 .k(8)
507 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700508 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700509 }
510
Marat Dukhande06f492020-04-09 00:19:31 -0700511 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700512 TEST_REQUIRES_ARM_NEON_FMA;
513 GemmMicrokernelTester()
514 .mr(1)
515 .nr(8)
516 .kr(1)
517 .sr(1)
518 .m(1)
519 .n(8)
520 .k(8)
521 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700522 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700523 }
524#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
525
526
527#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -0700528 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
529 TEST_REQUIRES_ARM_NEON_FMA;
530 GemmMicrokernelTester()
531 .mr(1)
532 .nr(8)
533 .kr(1)
534 .sr(1)
535 .m(1)
536 .n(8)
537 .k(8)
538 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
539 }
540
541 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
542 TEST_REQUIRES_ARM_NEON_FMA;
543 GemmMicrokernelTester()
544 .mr(1)
545 .nr(8)
546 .kr(1)
547 .sr(1)
548 .m(1)
549 .n(8)
550 .k(8)
551 .cn_stride(11)
552 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
553 }
554
555 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
556 TEST_REQUIRES_ARM_NEON_FMA;
557 GemmMicrokernelTester()
558 .mr(1)
559 .nr(8)
560 .kr(1)
561 .sr(1)
562 .m(1)
563 .n(8)
564 .k(8)
565 .a_stride(11)
566 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
567 }
568
569 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
570 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800571 for (uint32_t n = 1; n <= 8; n++) {
572 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700573 GemmMicrokernelTester()
574 .mr(1)
575 .nr(8)
576 .kr(1)
577 .sr(1)
578 .m(m)
579 .n(n)
580 .k(8)
581 .iterations(1)
582 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
583 }
584 }
585 }
586
587 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
588 TEST_REQUIRES_ARM_NEON_FMA;
589 for (uint32_t m = 1; m <= 1; m++) {
590 GemmMicrokernelTester()
591 .mr(1)
592 .nr(8)
593 .kr(1)
594 .sr(1)
595 .m(m)
596 .n(8)
597 .k(8)
598 .iterations(1)
599 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
600 }
601 }
602
603 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
604 TEST_REQUIRES_ARM_NEON_FMA;
605 for (uint32_t n = 1; n <= 8; n++) {
606 GemmMicrokernelTester()
607 .mr(1)
608 .nr(8)
609 .kr(1)
610 .sr(1)
611 .m(1)
612 .n(n)
613 .k(8)
614 .iterations(1)
615 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
616 }
617 }
618
619 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
620 TEST_REQUIRES_ARM_NEON_FMA;
621 GemmMicrokernelTester()
622 .mr(1)
623 .nr(8)
624 .kr(1)
625 .sr(1)
626 .m(1)
627 .n(8)
628 .k(16)
629 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
630 }
631
632 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
633 TEST_REQUIRES_ARM_NEON_FMA;
634 GemmMicrokernelTester()
635 .mr(1)
636 .nr(8)
637 .kr(1)
638 .sr(1)
639 .m(1)
640 .n(8)
641 .k(16)
642 .a_stride(19)
643 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
644 }
645
646 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
647 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800648 for (uint32_t n = 1; n <= 8; n++) {
649 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700650 GemmMicrokernelTester()
651 .mr(1)
652 .nr(8)
653 .kr(1)
654 .sr(1)
655 .m(m)
656 .n(n)
657 .k(16)
658 .iterations(1)
659 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
660 }
661 }
662 }
663
664 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
665 TEST_REQUIRES_ARM_NEON_FMA;
666 for (size_t k = 1; k < 16; k++) {
667 GemmMicrokernelTester()
668 .mr(1)
669 .nr(8)
670 .kr(1)
671 .sr(1)
672 .m(1)
673 .n(8)
674 .k(k)
675 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
676 }
677 }
678
679 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
680 TEST_REQUIRES_ARM_NEON_FMA;
681 for (size_t k = 1; k < 16; k++) {
682 GemmMicrokernelTester()
683 .mr(1)
684 .nr(8)
685 .kr(1)
686 .sr(1)
687 .m(1)
688 .n(8)
689 .k(k)
690 .a_stride(19)
691 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
692 }
693 }
694
695 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
696 TEST_REQUIRES_ARM_NEON_FMA;
697 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800698 for (uint32_t n = 1; n <= 8; n++) {
699 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700700 GemmMicrokernelTester()
701 .mr(1)
702 .nr(8)
703 .kr(1)
704 .sr(1)
705 .m(m)
706 .n(n)
707 .k(k)
708 .iterations(1)
709 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
710 }
711 }
712 }
713 }
714
715 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
716 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800717 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700718 GemmMicrokernelTester()
719 .mr(1)
720 .nr(8)
721 .kr(1)
722 .sr(1)
723 .m(1)
724 .n(8)
725 .k(k)
726 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
727 }
728 }
729
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800730 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
Frank Barchard143a1102021-06-15 09:15:34 -0700731 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800732 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700733 GemmMicrokernelTester()
734 .mr(1)
735 .nr(8)
736 .kr(1)
737 .sr(1)
738 .m(1)
739 .n(8)
740 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800741 .a_stride(37)
Frank Barchard143a1102021-06-15 09:15:34 -0700742 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
743 }
744 }
745
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800746 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
Frank Barchard143a1102021-06-15 09:15:34 -0700747 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -0800748 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800749 for (uint32_t n = 1; n <= 8; n++) {
750 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700751 GemmMicrokernelTester()
752 .mr(1)
753 .nr(8)
754 .kr(1)
755 .sr(1)
756 .m(m)
757 .n(n)
758 .k(k)
759 .iterations(1)
760 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
761 }
762 }
763 }
764 }
765
766 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
767 TEST_REQUIRES_ARM_NEON_FMA;
768 for (size_t k = 24; k <= 80; k += 8) {
769 GemmMicrokernelTester()
770 .mr(1)
771 .nr(8)
772 .kr(1)
773 .sr(1)
774 .m(1)
775 .n(8)
776 .k(k)
777 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
778 }
779 }
780
781 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
782 TEST_REQUIRES_ARM_NEON_FMA;
783 for (size_t k = 24; k <= 80; k += 8) {
784 GemmMicrokernelTester()
785 .mr(1)
786 .nr(8)
787 .kr(1)
788 .sr(1)
789 .m(1)
790 .n(8)
791 .k(k)
792 .a_stride(83)
793 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
794 }
795 }
796
797 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
798 TEST_REQUIRES_ARM_NEON_FMA;
799 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800800 for (uint32_t n = 1; n <= 8; n++) {
801 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700802 GemmMicrokernelTester()
803 .mr(1)
804 .nr(8)
805 .kr(1)
806 .sr(1)
807 .m(m)
808 .n(n)
809 .k(k)
810 .iterations(1)
811 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
812 }
813 }
814 }
815 }
816
817 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
818 TEST_REQUIRES_ARM_NEON_FMA;
819 for (uint32_t n = 9; n < 16; n++) {
820 for (size_t k = 1; k <= 40; k += 9) {
821 GemmMicrokernelTester()
822 .mr(1)
823 .nr(8)
824 .kr(1)
825 .sr(1)
826 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800827 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -0700828 .k(k)
829 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
830 }
831 }
832 }
833
834 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
835 TEST_REQUIRES_ARM_NEON_FMA;
836 for (uint32_t n = 9; n < 16; n++) {
837 for (size_t k = 1; k <= 40; k += 9) {
838 GemmMicrokernelTester()
839 .mr(1)
840 .nr(8)
841 .kr(1)
842 .sr(1)
843 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800844 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -0700845 .k(k)
846 .cn_stride(11)
847 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
848 }
849 }
850 }
851
852 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
853 TEST_REQUIRES_ARM_NEON_FMA;
854 for (uint32_t n = 9; n < 16; n++) {
855 for (size_t k = 1; k <= 40; k += 9) {
856 GemmMicrokernelTester()
857 .mr(1)
858 .nr(8)
859 .kr(1)
860 .sr(1)
861 .m(1)
862 .n(n)
863 .k(k)
864 .a_stride(43)
865 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
866 }
867 }
868 }
869
870 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
871 TEST_REQUIRES_ARM_NEON_FMA;
872 for (uint32_t n = 9; n < 16; n++) {
873 for (size_t k = 1; k <= 40; k += 9) {
874 for (uint32_t m = 1; m <= 1; m++) {
875 GemmMicrokernelTester()
876 .mr(1)
877 .nr(8)
878 .kr(1)
879 .sr(1)
880 .m(m)
881 .n(n)
882 .k(k)
883 .iterations(1)
884 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
885 }
886 }
887 }
888 }
889
890 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
891 TEST_REQUIRES_ARM_NEON_FMA;
892 for (uint32_t n = 16; n <= 24; n += 8) {
893 for (size_t k = 1; k <= 40; k += 9) {
894 GemmMicrokernelTester()
895 .mr(1)
896 .nr(8)
897 .kr(1)
898 .sr(1)
899 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800900 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -0700901 .k(k)
902 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
903 }
904 }
905 }
906
907 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
908 TEST_REQUIRES_ARM_NEON_FMA;
909 for (uint32_t n = 16; n <= 24; n += 8) {
910 for (size_t k = 1; k <= 40; k += 9) {
911 GemmMicrokernelTester()
912 .mr(1)
913 .nr(8)
914 .kr(1)
915 .sr(1)
916 .m(1)
917 .n(n)
918 .k(k)
919 .cn_stride(11)
920 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
921 }
922 }
923 }
924
925 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
926 TEST_REQUIRES_ARM_NEON_FMA;
927 for (uint32_t n = 16; n <= 24; n += 8) {
928 for (size_t k = 1; k <= 40; k += 9) {
929 GemmMicrokernelTester()
930 .mr(1)
931 .nr(8)
932 .kr(1)
933 .sr(1)
934 .m(1)
935 .n(n)
936 .k(k)
937 .a_stride(43)
938 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
939 }
940 }
941 }
942
943 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
944 TEST_REQUIRES_ARM_NEON_FMA;
945 for (uint32_t n = 16; n <= 24; n += 8) {
946 for (size_t k = 1; k <= 40; k += 9) {
947 for (uint32_t m = 1; m <= 1; m++) {
948 GemmMicrokernelTester()
949 .mr(1)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(m)
954 .n(n)
955 .k(k)
956 .iterations(1)
957 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
958 }
959 }
960 }
961 }
962
963 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
964 TEST_REQUIRES_ARM_NEON_FMA;
965 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800966 for (uint32_t n = 1; n <= 8; n++) {
967 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -0700968 GemmMicrokernelTester()
969 .mr(1)
970 .nr(8)
971 .kr(1)
972 .sr(1)
973 .m(m)
974 .n(n)
975 .k(k)
976 .cm_stride(11)
977 .iterations(1)
978 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
979 }
980 }
981 }
982 }
983
984 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
985 TEST_REQUIRES_ARM_NEON_FMA;
986 GemmMicrokernelTester()
987 .mr(1)
988 .nr(8)
989 .kr(1)
990 .sr(1)
991 .m(1)
992 .n(8)
993 .k(8)
994 .qmin(128)
995 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
996 }
997
998 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
999 TEST_REQUIRES_ARM_NEON_FMA;
1000 GemmMicrokernelTester()
1001 .mr(1)
1002 .nr(8)
1003 .kr(1)
1004 .sr(1)
1005 .m(1)
1006 .n(8)
1007 .k(8)
1008 .qmax(128)
1009 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1010 }
1011
1012 TEST(F32_GEMMINC_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
1013 TEST_REQUIRES_ARM_NEON_FMA;
1014 GemmMicrokernelTester()
1015 .mr(1)
1016 .nr(8)
1017 .kr(1)
1018 .sr(1)
1019 .m(1)
1020 .n(8)
1021 .k(8)
1022 .cm_stride(11)
1023 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1024 }
1025#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1026
1027
1028#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07001029 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001030 TEST_REQUIRES_ARM_NEON_FMA;
1031 GemmMicrokernelTester()
1032 .mr(4)
1033 .nr(8)
1034 .kr(1)
1035 .sr(1)
1036 .m(4)
1037 .n(8)
1038 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001039 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001040 }
1041
Marat Dukhande06f492020-04-09 00:19:31 -07001042 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001043 TEST_REQUIRES_ARM_NEON_FMA;
1044 GemmMicrokernelTester()
1045 .mr(4)
1046 .nr(8)
1047 .kr(1)
1048 .sr(1)
1049 .m(4)
1050 .n(8)
1051 .k(4)
1052 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001053 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001054 }
1055
Marat Dukhande06f492020-04-09 00:19:31 -07001056 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001057 TEST_REQUIRES_ARM_NEON_FMA;
1058 GemmMicrokernelTester()
1059 .mr(4)
1060 .nr(8)
1061 .kr(1)
1062 .sr(1)
1063 .m(4)
1064 .n(8)
1065 .k(4)
1066 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001067 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001068 }
1069
Marat Dukhande06f492020-04-09 00:19:31 -07001070 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001071 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001072 for (uint32_t n = 1; n <= 8; n++) {
1073 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001074 GemmMicrokernelTester()
1075 .mr(4)
1076 .nr(8)
1077 .kr(1)
1078 .sr(1)
1079 .m(m)
1080 .n(n)
1081 .k(4)
1082 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001083 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001084 }
1085 }
1086 }
1087
Marat Dukhande06f492020-04-09 00:19:31 -07001088 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001089 TEST_REQUIRES_ARM_NEON_FMA;
1090 for (uint32_t m = 1; m <= 4; m++) {
1091 GemmMicrokernelTester()
1092 .mr(4)
1093 .nr(8)
1094 .kr(1)
1095 .sr(1)
1096 .m(m)
1097 .n(8)
1098 .k(4)
1099 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001100 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001101 }
1102 }
1103
Marat Dukhande06f492020-04-09 00:19:31 -07001104 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001105 TEST_REQUIRES_ARM_NEON_FMA;
1106 for (uint32_t n = 1; n <= 8; n++) {
1107 GemmMicrokernelTester()
1108 .mr(4)
1109 .nr(8)
1110 .kr(1)
1111 .sr(1)
1112 .m(4)
1113 .n(n)
1114 .k(4)
1115 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001116 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001117 }
1118 }
1119
Marat Dukhande06f492020-04-09 00:19:31 -07001120 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001121 TEST_REQUIRES_ARM_NEON_FMA;
1122 GemmMicrokernelTester()
1123 .mr(4)
1124 .nr(8)
1125 .kr(1)
1126 .sr(1)
1127 .m(4)
1128 .n(8)
1129 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001130 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001131 }
1132
Marat Dukhande06f492020-04-09 00:19:31 -07001133 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001134 TEST_REQUIRES_ARM_NEON_FMA;
1135 GemmMicrokernelTester()
1136 .mr(4)
1137 .nr(8)
1138 .kr(1)
1139 .sr(1)
1140 .m(4)
1141 .n(8)
1142 .k(8)
1143 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001144 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001145 }
1146
Marat Dukhande06f492020-04-09 00:19:31 -07001147 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001148 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001149 for (uint32_t n = 1; n <= 8; n++) {
1150 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001151 GemmMicrokernelTester()
1152 .mr(4)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(8)
1159 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001160 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001161 }
1162 }
1163 }
1164
Marat Dukhande06f492020-04-09 00:19:31 -07001165 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001166 TEST_REQUIRES_ARM_NEON_FMA;
1167 for (size_t k = 1; k < 8; k++) {
1168 GemmMicrokernelTester()
1169 .mr(4)
1170 .nr(8)
1171 .kr(1)
1172 .sr(1)
1173 .m(4)
1174 .n(8)
1175 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001176 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001177 }
1178 }
1179
Marat Dukhande06f492020-04-09 00:19:31 -07001180 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001181 TEST_REQUIRES_ARM_NEON_FMA;
1182 for (size_t k = 1; k < 8; k++) {
1183 GemmMicrokernelTester()
1184 .mr(4)
1185 .nr(8)
1186 .kr(1)
1187 .sr(1)
1188 .m(4)
1189 .n(8)
1190 .k(k)
1191 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001192 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001193 }
1194 }
1195
Marat Dukhande06f492020-04-09 00:19:31 -07001196 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001197 TEST_REQUIRES_ARM_NEON_FMA;
1198 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001199 for (uint32_t n = 1; n <= 8; n++) {
1200 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001201 GemmMicrokernelTester()
1202 .mr(4)
1203 .nr(8)
1204 .kr(1)
1205 .sr(1)
1206 .m(m)
1207 .n(n)
1208 .k(k)
1209 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001210 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001211 }
1212 }
1213 }
1214 }
1215
Marat Dukhande06f492020-04-09 00:19:31 -07001216 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001217 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001218 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001219 GemmMicrokernelTester()
1220 .mr(4)
1221 .nr(8)
1222 .kr(1)
1223 .sr(1)
1224 .m(4)
1225 .n(8)
1226 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001227 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001228 }
1229 }
1230
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001231 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001232 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001233 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001234 GemmMicrokernelTester()
1235 .mr(4)
1236 .nr(8)
1237 .kr(1)
1238 .sr(1)
1239 .m(4)
1240 .n(8)
1241 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001242 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001243 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001244 }
1245 }
1246
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001247 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001248 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001249 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001250 for (uint32_t n = 1; n <= 8; n++) {
1251 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001252 GemmMicrokernelTester()
1253 .mr(4)
1254 .nr(8)
1255 .kr(1)
1256 .sr(1)
1257 .m(m)
1258 .n(n)
1259 .k(k)
1260 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001261 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001262 }
1263 }
1264 }
1265 }
1266
Marat Dukhande06f492020-04-09 00:19:31 -07001267 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001268 TEST_REQUIRES_ARM_NEON_FMA;
1269 for (size_t k = 12; k <= 40; k += 4) {
1270 GemmMicrokernelTester()
1271 .mr(4)
1272 .nr(8)
1273 .kr(1)
1274 .sr(1)
1275 .m(4)
1276 .n(8)
1277 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001278 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001279 }
1280 }
1281
Marat Dukhande06f492020-04-09 00:19:31 -07001282 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001283 TEST_REQUIRES_ARM_NEON_FMA;
1284 for (size_t k = 12; k <= 40; k += 4) {
1285 GemmMicrokernelTester()
1286 .mr(4)
1287 .nr(8)
1288 .kr(1)
1289 .sr(1)
1290 .m(4)
1291 .n(8)
1292 .k(k)
1293 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001294 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001295 }
1296 }
1297
Marat Dukhande06f492020-04-09 00:19:31 -07001298 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001299 TEST_REQUIRES_ARM_NEON_FMA;
1300 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001301 for (uint32_t n = 1; n <= 8; n++) {
1302 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001303 GemmMicrokernelTester()
1304 .mr(4)
1305 .nr(8)
1306 .kr(1)
1307 .sr(1)
1308 .m(m)
1309 .n(n)
1310 .k(k)
1311 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001312 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001313 }
1314 }
1315 }
1316 }
1317
Marat Dukhande06f492020-04-09 00:19:31 -07001318 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001319 TEST_REQUIRES_ARM_NEON_FMA;
1320 for (uint32_t n = 9; n < 16; n++) {
1321 for (size_t k = 1; k <= 20; k += 5) {
1322 GemmMicrokernelTester()
1323 .mr(4)
1324 .nr(8)
1325 .kr(1)
1326 .sr(1)
1327 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001328 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001329 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001330 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001331 }
1332 }
1333 }
1334
Marat Dukhande06f492020-04-09 00:19:31 -07001335 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001336 TEST_REQUIRES_ARM_NEON_FMA;
1337 for (uint32_t n = 9; n < 16; n++) {
1338 for (size_t k = 1; k <= 20; k += 5) {
1339 GemmMicrokernelTester()
1340 .mr(4)
1341 .nr(8)
1342 .kr(1)
1343 .sr(1)
1344 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001345 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001346 .k(k)
1347 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001348 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001349 }
1350 }
1351 }
1352
Marat Dukhande06f492020-04-09 00:19:31 -07001353 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001354 TEST_REQUIRES_ARM_NEON_FMA;
1355 for (uint32_t n = 9; n < 16; n++) {
1356 for (size_t k = 1; k <= 20; k += 5) {
1357 GemmMicrokernelTester()
1358 .mr(4)
1359 .nr(8)
1360 .kr(1)
1361 .sr(1)
1362 .m(4)
1363 .n(n)
1364 .k(k)
1365 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001366 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001367 }
1368 }
1369 }
1370
Marat Dukhande06f492020-04-09 00:19:31 -07001371 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001372 TEST_REQUIRES_ARM_NEON_FMA;
1373 for (uint32_t n = 9; n < 16; n++) {
1374 for (size_t k = 1; k <= 20; k += 5) {
1375 for (uint32_t m = 1; m <= 4; m++) {
1376 GemmMicrokernelTester()
1377 .mr(4)
1378 .nr(8)
1379 .kr(1)
1380 .sr(1)
1381 .m(m)
1382 .n(n)
1383 .k(k)
1384 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001385 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001386 }
1387 }
1388 }
1389 }
1390
Marat Dukhande06f492020-04-09 00:19:31 -07001391 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001392 TEST_REQUIRES_ARM_NEON_FMA;
1393 for (uint32_t n = 16; n <= 24; n += 8) {
1394 for (size_t k = 1; k <= 20; k += 5) {
1395 GemmMicrokernelTester()
1396 .mr(4)
1397 .nr(8)
1398 .kr(1)
1399 .sr(1)
1400 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001401 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001402 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001403 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001404 }
1405 }
1406 }
1407
Marat Dukhande06f492020-04-09 00:19:31 -07001408 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001409 TEST_REQUIRES_ARM_NEON_FMA;
1410 for (uint32_t n = 16; n <= 24; n += 8) {
1411 for (size_t k = 1; k <= 20; k += 5) {
1412 GemmMicrokernelTester()
1413 .mr(4)
1414 .nr(8)
1415 .kr(1)
1416 .sr(1)
1417 .m(4)
1418 .n(n)
1419 .k(k)
1420 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001421 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001422 }
1423 }
1424 }
1425
Marat Dukhande06f492020-04-09 00:19:31 -07001426 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001427 TEST_REQUIRES_ARM_NEON_FMA;
1428 for (uint32_t n = 16; n <= 24; n += 8) {
1429 for (size_t k = 1; k <= 20; k += 5) {
1430 GemmMicrokernelTester()
1431 .mr(4)
1432 .nr(8)
1433 .kr(1)
1434 .sr(1)
1435 .m(4)
1436 .n(n)
1437 .k(k)
1438 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001439 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001440 }
1441 }
1442 }
1443
Marat Dukhande06f492020-04-09 00:19:31 -07001444 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001445 TEST_REQUIRES_ARM_NEON_FMA;
1446 for (uint32_t n = 16; n <= 24; n += 8) {
1447 for (size_t k = 1; k <= 20; k += 5) {
1448 for (uint32_t m = 1; m <= 4; m++) {
1449 GemmMicrokernelTester()
1450 .mr(4)
1451 .nr(8)
1452 .kr(1)
1453 .sr(1)
1454 .m(m)
1455 .n(n)
1456 .k(k)
1457 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001458 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001459 }
1460 }
1461 }
1462 }
1463
Marat Dukhande06f492020-04-09 00:19:31 -07001464 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001465 TEST_REQUIRES_ARM_NEON_FMA;
1466 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001467 for (uint32_t n = 1; n <= 8; n++) {
1468 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001469 GemmMicrokernelTester()
1470 .mr(4)
1471 .nr(8)
1472 .kr(1)
1473 .sr(1)
1474 .m(m)
1475 .n(n)
1476 .k(k)
1477 .cm_stride(11)
1478 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001479 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001480 }
1481 }
1482 }
1483 }
1484
Marat Dukhande06f492020-04-09 00:19:31 -07001485 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001486 TEST_REQUIRES_ARM_NEON_FMA;
1487 GemmMicrokernelTester()
1488 .mr(4)
1489 .nr(8)
1490 .kr(1)
1491 .sr(1)
1492 .m(4)
1493 .n(8)
1494 .k(4)
1495 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001496 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001497 }
1498
Marat Dukhande06f492020-04-09 00:19:31 -07001499 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001500 TEST_REQUIRES_ARM_NEON_FMA;
1501 GemmMicrokernelTester()
1502 .mr(4)
1503 .nr(8)
1504 .kr(1)
1505 .sr(1)
1506 .m(4)
1507 .n(8)
1508 .k(4)
1509 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001510 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001511 }
1512
Marat Dukhande06f492020-04-09 00:19:31 -07001513 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001514 TEST_REQUIRES_ARM_NEON_FMA;
1515 GemmMicrokernelTester()
1516 .mr(4)
1517 .nr(8)
1518 .kr(1)
1519 .sr(1)
1520 .m(4)
1521 .n(8)
1522 .k(4)
1523 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001524 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001525 }
1526#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1527
1528
1529#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07001530 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001531 TEST_REQUIRES_ARM_NEON_FMA;
1532 GemmMicrokernelTester()
1533 .mr(4)
1534 .nr(8)
1535 .kr(1)
1536 .sr(1)
1537 .m(4)
1538 .n(8)
1539 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001540 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001541 }
1542
Marat Dukhande06f492020-04-09 00:19:31 -07001543 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001544 TEST_REQUIRES_ARM_NEON_FMA;
1545 GemmMicrokernelTester()
1546 .mr(4)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(4)
1551 .n(8)
1552 .k(8)
1553 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001554 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001555 }
1556
Marat Dukhande06f492020-04-09 00:19:31 -07001557 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001558 TEST_REQUIRES_ARM_NEON_FMA;
1559 GemmMicrokernelTester()
1560 .mr(4)
1561 .nr(8)
1562 .kr(1)
1563 .sr(1)
1564 .m(4)
1565 .n(8)
1566 .k(8)
1567 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001568 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001569 }
1570
Marat Dukhande06f492020-04-09 00:19:31 -07001571 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001572 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001573 for (uint32_t n = 1; n <= 8; n++) {
1574 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001575 GemmMicrokernelTester()
1576 .mr(4)
1577 .nr(8)
1578 .kr(1)
1579 .sr(1)
1580 .m(m)
1581 .n(n)
1582 .k(8)
1583 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001584 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001585 }
1586 }
1587 }
1588
Marat Dukhande06f492020-04-09 00:19:31 -07001589 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001590 TEST_REQUIRES_ARM_NEON_FMA;
1591 for (uint32_t m = 1; m <= 4; m++) {
1592 GemmMicrokernelTester()
1593 .mr(4)
1594 .nr(8)
1595 .kr(1)
1596 .sr(1)
1597 .m(m)
1598 .n(8)
1599 .k(8)
1600 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001601 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001602 }
1603 }
1604
Marat Dukhande06f492020-04-09 00:19:31 -07001605 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001606 TEST_REQUIRES_ARM_NEON_FMA;
1607 for (uint32_t n = 1; n <= 8; n++) {
1608 GemmMicrokernelTester()
1609 .mr(4)
1610 .nr(8)
1611 .kr(1)
1612 .sr(1)
1613 .m(4)
1614 .n(n)
1615 .k(8)
1616 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001617 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001618 }
1619 }
1620
Marat Dukhande06f492020-04-09 00:19:31 -07001621 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001622 TEST_REQUIRES_ARM_NEON_FMA;
1623 GemmMicrokernelTester()
1624 .mr(4)
1625 .nr(8)
1626 .kr(1)
1627 .sr(1)
1628 .m(4)
1629 .n(8)
1630 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001631 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001632 }
1633
Marat Dukhande06f492020-04-09 00:19:31 -07001634 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001635 TEST_REQUIRES_ARM_NEON_FMA;
1636 GemmMicrokernelTester()
1637 .mr(4)
1638 .nr(8)
1639 .kr(1)
1640 .sr(1)
1641 .m(4)
1642 .n(8)
1643 .k(16)
1644 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001645 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001646 }
1647
Marat Dukhande06f492020-04-09 00:19:31 -07001648 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001649 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001650 for (uint32_t n = 1; n <= 8; n++) {
1651 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001652 GemmMicrokernelTester()
1653 .mr(4)
1654 .nr(8)
1655 .kr(1)
1656 .sr(1)
1657 .m(m)
1658 .n(n)
1659 .k(16)
1660 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001661 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001662 }
1663 }
1664 }
1665
Marat Dukhande06f492020-04-09 00:19:31 -07001666 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001667 TEST_REQUIRES_ARM_NEON_FMA;
1668 for (size_t k = 1; k < 16; k++) {
1669 GemmMicrokernelTester()
1670 .mr(4)
1671 .nr(8)
1672 .kr(1)
1673 .sr(1)
1674 .m(4)
1675 .n(8)
1676 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001677 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001678 }
1679 }
1680
Marat Dukhande06f492020-04-09 00:19:31 -07001681 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001682 TEST_REQUIRES_ARM_NEON_FMA;
1683 for (size_t k = 1; k < 16; k++) {
1684 GemmMicrokernelTester()
1685 .mr(4)
1686 .nr(8)
1687 .kr(1)
1688 .sr(1)
1689 .m(4)
1690 .n(8)
1691 .k(k)
1692 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001693 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001694 }
1695 }
1696
Marat Dukhande06f492020-04-09 00:19:31 -07001697 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001698 TEST_REQUIRES_ARM_NEON_FMA;
1699 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001700 for (uint32_t n = 1; n <= 8; n++) {
1701 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001702 GemmMicrokernelTester()
1703 .mr(4)
1704 .nr(8)
1705 .kr(1)
1706 .sr(1)
1707 .m(m)
1708 .n(n)
1709 .k(k)
1710 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001711 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001712 }
1713 }
1714 }
1715 }
1716
Marat Dukhande06f492020-04-09 00:19:31 -07001717 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001718 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001719 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001720 GemmMicrokernelTester()
1721 .mr(4)
1722 .nr(8)
1723 .kr(1)
1724 .sr(1)
1725 .m(4)
1726 .n(8)
1727 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001728 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001729 }
1730 }
1731
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001732 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001733 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001734 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001735 GemmMicrokernelTester()
1736 .mr(4)
1737 .nr(8)
1738 .kr(1)
1739 .sr(1)
1740 .m(4)
1741 .n(8)
1742 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001743 .a_stride(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001744 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001745 }
1746 }
1747
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001748 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001749 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001750 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001751 for (uint32_t n = 1; n <= 8; n++) {
1752 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001753 GemmMicrokernelTester()
1754 .mr(4)
1755 .nr(8)
1756 .kr(1)
1757 .sr(1)
1758 .m(m)
1759 .n(n)
1760 .k(k)
1761 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001762 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001763 }
1764 }
1765 }
1766 }
1767
Marat Dukhande06f492020-04-09 00:19:31 -07001768 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001769 TEST_REQUIRES_ARM_NEON_FMA;
1770 for (size_t k = 24; k <= 80; k += 8) {
1771 GemmMicrokernelTester()
1772 .mr(4)
1773 .nr(8)
1774 .kr(1)
1775 .sr(1)
1776 .m(4)
1777 .n(8)
1778 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001779 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001780 }
1781 }
1782
Marat Dukhande06f492020-04-09 00:19:31 -07001783 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001784 TEST_REQUIRES_ARM_NEON_FMA;
1785 for (size_t k = 24; k <= 80; k += 8) {
1786 GemmMicrokernelTester()
1787 .mr(4)
1788 .nr(8)
1789 .kr(1)
1790 .sr(1)
1791 .m(4)
1792 .n(8)
1793 .k(k)
1794 .a_stride(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001795 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001796 }
1797 }
1798
Marat Dukhande06f492020-04-09 00:19:31 -07001799 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001800 TEST_REQUIRES_ARM_NEON_FMA;
1801 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001802 for (uint32_t n = 1; n <= 8; n++) {
1803 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001804 GemmMicrokernelTester()
1805 .mr(4)
1806 .nr(8)
1807 .kr(1)
1808 .sr(1)
1809 .m(m)
1810 .n(n)
1811 .k(k)
1812 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001813 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001814 }
1815 }
1816 }
1817 }
1818
Marat Dukhande06f492020-04-09 00:19:31 -07001819 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001820 TEST_REQUIRES_ARM_NEON_FMA;
1821 for (uint32_t n = 9; n < 16; n++) {
1822 for (size_t k = 1; k <= 40; k += 9) {
1823 GemmMicrokernelTester()
1824 .mr(4)
1825 .nr(8)
1826 .kr(1)
1827 .sr(1)
1828 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001829 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001830 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001831 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001832 }
1833 }
1834 }
1835
Marat Dukhande06f492020-04-09 00:19:31 -07001836 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001837 TEST_REQUIRES_ARM_NEON_FMA;
1838 for (uint32_t n = 9; n < 16; n++) {
1839 for (size_t k = 1; k <= 40; k += 9) {
1840 GemmMicrokernelTester()
1841 .mr(4)
1842 .nr(8)
1843 .kr(1)
1844 .sr(1)
1845 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001846 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001847 .k(k)
1848 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001849 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001850 }
1851 }
1852 }
1853
Marat Dukhande06f492020-04-09 00:19:31 -07001854 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001855 TEST_REQUIRES_ARM_NEON_FMA;
1856 for (uint32_t n = 9; n < 16; n++) {
1857 for (size_t k = 1; k <= 40; k += 9) {
1858 GemmMicrokernelTester()
1859 .mr(4)
1860 .nr(8)
1861 .kr(1)
1862 .sr(1)
1863 .m(4)
1864 .n(n)
1865 .k(k)
1866 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001867 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001868 }
1869 }
1870 }
1871
Marat Dukhande06f492020-04-09 00:19:31 -07001872 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001873 TEST_REQUIRES_ARM_NEON_FMA;
1874 for (uint32_t n = 9; n < 16; n++) {
1875 for (size_t k = 1; k <= 40; k += 9) {
1876 for (uint32_t m = 1; m <= 4; m++) {
1877 GemmMicrokernelTester()
1878 .mr(4)
1879 .nr(8)
1880 .kr(1)
1881 .sr(1)
1882 .m(m)
1883 .n(n)
1884 .k(k)
1885 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001886 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001887 }
1888 }
1889 }
1890 }
1891
Marat Dukhande06f492020-04-09 00:19:31 -07001892 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001893 TEST_REQUIRES_ARM_NEON_FMA;
1894 for (uint32_t n = 16; n <= 24; n += 8) {
1895 for (size_t k = 1; k <= 40; k += 9) {
1896 GemmMicrokernelTester()
1897 .mr(4)
1898 .nr(8)
1899 .kr(1)
1900 .sr(1)
1901 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001902 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001903 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001904 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001905 }
1906 }
1907 }
1908
Marat Dukhande06f492020-04-09 00:19:31 -07001909 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001910 TEST_REQUIRES_ARM_NEON_FMA;
1911 for (uint32_t n = 16; n <= 24; n += 8) {
1912 for (size_t k = 1; k <= 40; k += 9) {
1913 GemmMicrokernelTester()
1914 .mr(4)
1915 .nr(8)
1916 .kr(1)
1917 .sr(1)
1918 .m(4)
1919 .n(n)
1920 .k(k)
1921 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001922 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001923 }
1924 }
1925 }
1926
Marat Dukhande06f492020-04-09 00:19:31 -07001927 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001928 TEST_REQUIRES_ARM_NEON_FMA;
1929 for (uint32_t n = 16; n <= 24; n += 8) {
1930 for (size_t k = 1; k <= 40; k += 9) {
1931 GemmMicrokernelTester()
1932 .mr(4)
1933 .nr(8)
1934 .kr(1)
1935 .sr(1)
1936 .m(4)
1937 .n(n)
1938 .k(k)
1939 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001940 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001941 }
1942 }
1943 }
1944
Marat Dukhande06f492020-04-09 00:19:31 -07001945 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001946 TEST_REQUIRES_ARM_NEON_FMA;
1947 for (uint32_t n = 16; n <= 24; n += 8) {
1948 for (size_t k = 1; k <= 40; k += 9) {
1949 for (uint32_t m = 1; m <= 4; m++) {
1950 GemmMicrokernelTester()
1951 .mr(4)
1952 .nr(8)
1953 .kr(1)
1954 .sr(1)
1955 .m(m)
1956 .n(n)
1957 .k(k)
1958 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001959 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001960 }
1961 }
1962 }
1963 }
1964
Marat Dukhande06f492020-04-09 00:19:31 -07001965 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001966 TEST_REQUIRES_ARM_NEON_FMA;
1967 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001968 for (uint32_t n = 1; n <= 8; n++) {
1969 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001970 GemmMicrokernelTester()
1971 .mr(4)
1972 .nr(8)
1973 .kr(1)
1974 .sr(1)
1975 .m(m)
1976 .n(n)
1977 .k(k)
1978 .cm_stride(11)
1979 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001980 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001981 }
1982 }
1983 }
1984 }
1985
Marat Dukhande06f492020-04-09 00:19:31 -07001986 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001987 TEST_REQUIRES_ARM_NEON_FMA;
1988 GemmMicrokernelTester()
1989 .mr(4)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(4)
1994 .n(8)
1995 .k(8)
1996 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001997 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001998 }
1999
Marat Dukhande06f492020-04-09 00:19:31 -07002000 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002001 TEST_REQUIRES_ARM_NEON_FMA;
2002 GemmMicrokernelTester()
2003 .mr(4)
2004 .nr(8)
2005 .kr(1)
2006 .sr(1)
2007 .m(4)
2008 .n(8)
2009 .k(8)
2010 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002011 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002012 }
2013
Marat Dukhande06f492020-04-09 00:19:31 -07002014 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002015 TEST_REQUIRES_ARM_NEON_FMA;
2016 GemmMicrokernelTester()
2017 .mr(4)
2018 .nr(8)
2019 .kr(1)
2020 .sr(1)
2021 .m(4)
2022 .n(8)
2023 .k(8)
2024 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002025 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002026 }
2027#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2028
2029
2030#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -07002031 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
2032 TEST_REQUIRES_ARM_NEON_FMA;
2033 GemmMicrokernelTester()
2034 .mr(4)
2035 .nr(8)
2036 .kr(1)
2037 .sr(1)
2038 .m(4)
2039 .n(8)
2040 .k(8)
2041 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2042 }
2043
2044 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
2045 TEST_REQUIRES_ARM_NEON_FMA;
2046 GemmMicrokernelTester()
2047 .mr(4)
2048 .nr(8)
2049 .kr(1)
2050 .sr(1)
2051 .m(4)
2052 .n(8)
2053 .k(8)
2054 .cn_stride(11)
2055 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2056 }
2057
2058 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_strided_a) {
2059 TEST_REQUIRES_ARM_NEON_FMA;
2060 GemmMicrokernelTester()
2061 .mr(4)
2062 .nr(8)
2063 .kr(1)
2064 .sr(1)
2065 .m(4)
2066 .n(8)
2067 .k(8)
2068 .a_stride(11)
2069 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2070 }
2071
2072 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
2073 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002074 for (uint32_t n = 1; n <= 8; n++) {
2075 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002076 GemmMicrokernelTester()
2077 .mr(4)
2078 .nr(8)
2079 .kr(1)
2080 .sr(1)
2081 .m(m)
2082 .n(n)
2083 .k(8)
2084 .iterations(1)
2085 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2086 }
2087 }
2088 }
2089
2090 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
2091 TEST_REQUIRES_ARM_NEON_FMA;
2092 for (uint32_t m = 1; m <= 4; m++) {
2093 GemmMicrokernelTester()
2094 .mr(4)
2095 .nr(8)
2096 .kr(1)
2097 .sr(1)
2098 .m(m)
2099 .n(8)
2100 .k(8)
2101 .iterations(1)
2102 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2103 }
2104 }
2105
2106 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
2107 TEST_REQUIRES_ARM_NEON_FMA;
2108 for (uint32_t n = 1; n <= 8; n++) {
2109 GemmMicrokernelTester()
2110 .mr(4)
2111 .nr(8)
2112 .kr(1)
2113 .sr(1)
2114 .m(4)
2115 .n(n)
2116 .k(8)
2117 .iterations(1)
2118 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2119 }
2120 }
2121
2122 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
2123 TEST_REQUIRES_ARM_NEON_FMA;
2124 GemmMicrokernelTester()
2125 .mr(4)
2126 .nr(8)
2127 .kr(1)
2128 .sr(1)
2129 .m(4)
2130 .n(8)
2131 .k(16)
2132 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2133 }
2134
2135 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_strided_a) {
2136 TEST_REQUIRES_ARM_NEON_FMA;
2137 GemmMicrokernelTester()
2138 .mr(4)
2139 .nr(8)
2140 .kr(1)
2141 .sr(1)
2142 .m(4)
2143 .n(8)
2144 .k(16)
2145 .a_stride(19)
2146 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2147 }
2148
2149 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
2150 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002151 for (uint32_t n = 1; n <= 8; n++) {
2152 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002153 GemmMicrokernelTester()
2154 .mr(4)
2155 .nr(8)
2156 .kr(1)
2157 .sr(1)
2158 .m(m)
2159 .n(n)
2160 .k(16)
2161 .iterations(1)
2162 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2163 }
2164 }
2165 }
2166
2167 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
2168 TEST_REQUIRES_ARM_NEON_FMA;
2169 for (size_t k = 1; k < 16; k++) {
2170 GemmMicrokernelTester()
2171 .mr(4)
2172 .nr(8)
2173 .kr(1)
2174 .sr(1)
2175 .m(4)
2176 .n(8)
2177 .k(k)
2178 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2179 }
2180 }
2181
2182 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_strided_a) {
2183 TEST_REQUIRES_ARM_NEON_FMA;
2184 for (size_t k = 1; k < 16; k++) {
2185 GemmMicrokernelTester()
2186 .mr(4)
2187 .nr(8)
2188 .kr(1)
2189 .sr(1)
2190 .m(4)
2191 .n(8)
2192 .k(k)
2193 .a_stride(19)
2194 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2195 }
2196 }
2197
2198 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
2199 TEST_REQUIRES_ARM_NEON_FMA;
2200 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002201 for (uint32_t n = 1; n <= 8; n++) {
2202 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002203 GemmMicrokernelTester()
2204 .mr(4)
2205 .nr(8)
2206 .kr(1)
2207 .sr(1)
2208 .m(m)
2209 .n(n)
2210 .k(k)
2211 .iterations(1)
2212 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2213 }
2214 }
2215 }
2216 }
2217
2218 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
2219 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002220 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002221 GemmMicrokernelTester()
2222 .mr(4)
2223 .nr(8)
2224 .kr(1)
2225 .sr(1)
2226 .m(4)
2227 .n(8)
2228 .k(k)
2229 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2230 }
2231 }
2232
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002233 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_strided_a) {
Frank Barchard143a1102021-06-15 09:15:34 -07002234 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002235 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002236 GemmMicrokernelTester()
2237 .mr(4)
2238 .nr(8)
2239 .kr(1)
2240 .sr(1)
2241 .m(4)
2242 .n(8)
2243 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002244 .a_stride(37)
Frank Barchard143a1102021-06-15 09:15:34 -07002245 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2246 }
2247 }
2248
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002249 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
Frank Barchard143a1102021-06-15 09:15:34 -07002250 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002251 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002252 for (uint32_t n = 1; n <= 8; n++) {
2253 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002254 GemmMicrokernelTester()
2255 .mr(4)
2256 .nr(8)
2257 .kr(1)
2258 .sr(1)
2259 .m(m)
2260 .n(n)
2261 .k(k)
2262 .iterations(1)
2263 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2264 }
2265 }
2266 }
2267 }
2268
2269 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
2270 TEST_REQUIRES_ARM_NEON_FMA;
2271 for (size_t k = 24; k <= 80; k += 8) {
2272 GemmMicrokernelTester()
2273 .mr(4)
2274 .nr(8)
2275 .kr(1)
2276 .sr(1)
2277 .m(4)
2278 .n(8)
2279 .k(k)
2280 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2281 }
2282 }
2283
2284 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_strided_a) {
2285 TEST_REQUIRES_ARM_NEON_FMA;
2286 for (size_t k = 24; k <= 80; k += 8) {
2287 GemmMicrokernelTester()
2288 .mr(4)
2289 .nr(8)
2290 .kr(1)
2291 .sr(1)
2292 .m(4)
2293 .n(8)
2294 .k(k)
2295 .a_stride(83)
2296 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2297 }
2298 }
2299
2300 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
2301 TEST_REQUIRES_ARM_NEON_FMA;
2302 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002303 for (uint32_t n = 1; n <= 8; n++) {
2304 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002305 GemmMicrokernelTester()
2306 .mr(4)
2307 .nr(8)
2308 .kr(1)
2309 .sr(1)
2310 .m(m)
2311 .n(n)
2312 .k(k)
2313 .iterations(1)
2314 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2315 }
2316 }
2317 }
2318 }
2319
2320 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
2321 TEST_REQUIRES_ARM_NEON_FMA;
2322 for (uint32_t n = 9; n < 16; n++) {
2323 for (size_t k = 1; k <= 40; k += 9) {
2324 GemmMicrokernelTester()
2325 .mr(4)
2326 .nr(8)
2327 .kr(1)
2328 .sr(1)
2329 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002330 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07002331 .k(k)
2332 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2333 }
2334 }
2335 }
2336
2337 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
2338 TEST_REQUIRES_ARM_NEON_FMA;
2339 for (uint32_t n = 9; n < 16; n++) {
2340 for (size_t k = 1; k <= 40; k += 9) {
2341 GemmMicrokernelTester()
2342 .mr(4)
2343 .nr(8)
2344 .kr(1)
2345 .sr(1)
2346 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002347 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07002348 .k(k)
2349 .cn_stride(11)
2350 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2351 }
2352 }
2353 }
2354
2355 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_a) {
2356 TEST_REQUIRES_ARM_NEON_FMA;
2357 for (uint32_t n = 9; n < 16; n++) {
2358 for (size_t k = 1; k <= 40; k += 9) {
2359 GemmMicrokernelTester()
2360 .mr(4)
2361 .nr(8)
2362 .kr(1)
2363 .sr(1)
2364 .m(4)
2365 .n(n)
2366 .k(k)
2367 .a_stride(43)
2368 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2369 }
2370 }
2371 }
2372
2373 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
2374 TEST_REQUIRES_ARM_NEON_FMA;
2375 for (uint32_t n = 9; n < 16; n++) {
2376 for (size_t k = 1; k <= 40; k += 9) {
2377 for (uint32_t m = 1; m <= 4; m++) {
2378 GemmMicrokernelTester()
2379 .mr(4)
2380 .nr(8)
2381 .kr(1)
2382 .sr(1)
2383 .m(m)
2384 .n(n)
2385 .k(k)
2386 .iterations(1)
2387 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2388 }
2389 }
2390 }
2391 }
2392
2393 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
2394 TEST_REQUIRES_ARM_NEON_FMA;
2395 for (uint32_t n = 16; n <= 24; n += 8) {
2396 for (size_t k = 1; k <= 40; k += 9) {
2397 GemmMicrokernelTester()
2398 .mr(4)
2399 .nr(8)
2400 .kr(1)
2401 .sr(1)
2402 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002403 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07002404 .k(k)
2405 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2406 }
2407 }
2408 }
2409
2410 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
2411 TEST_REQUIRES_ARM_NEON_FMA;
2412 for (uint32_t n = 16; n <= 24; n += 8) {
2413 for (size_t k = 1; k <= 40; k += 9) {
2414 GemmMicrokernelTester()
2415 .mr(4)
2416 .nr(8)
2417 .kr(1)
2418 .sr(1)
2419 .m(4)
2420 .n(n)
2421 .k(k)
2422 .cn_stride(11)
2423 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2424 }
2425 }
2426 }
2427
2428 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_a) {
2429 TEST_REQUIRES_ARM_NEON_FMA;
2430 for (uint32_t n = 16; n <= 24; n += 8) {
2431 for (size_t k = 1; k <= 40; k += 9) {
2432 GemmMicrokernelTester()
2433 .mr(4)
2434 .nr(8)
2435 .kr(1)
2436 .sr(1)
2437 .m(4)
2438 .n(n)
2439 .k(k)
2440 .a_stride(43)
2441 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2442 }
2443 }
2444 }
2445
2446 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
2447 TEST_REQUIRES_ARM_NEON_FMA;
2448 for (uint32_t n = 16; n <= 24; n += 8) {
2449 for (size_t k = 1; k <= 40; k += 9) {
2450 for (uint32_t m = 1; m <= 4; m++) {
2451 GemmMicrokernelTester()
2452 .mr(4)
2453 .nr(8)
2454 .kr(1)
2455 .sr(1)
2456 .m(m)
2457 .n(n)
2458 .k(k)
2459 .iterations(1)
2460 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2461 }
2462 }
2463 }
2464 }
2465
2466 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
2467 TEST_REQUIRES_ARM_NEON_FMA;
2468 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002469 for (uint32_t n = 1; n <= 8; n++) {
2470 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07002471 GemmMicrokernelTester()
2472 .mr(4)
2473 .nr(8)
2474 .kr(1)
2475 .sr(1)
2476 .m(m)
2477 .n(n)
2478 .k(k)
2479 .cm_stride(11)
2480 .iterations(1)
2481 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2482 }
2483 }
2484 }
2485 }
2486
2487 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
2488 TEST_REQUIRES_ARM_NEON_FMA;
2489 GemmMicrokernelTester()
2490 .mr(4)
2491 .nr(8)
2492 .kr(1)
2493 .sr(1)
2494 .m(4)
2495 .n(8)
2496 .k(8)
2497 .qmin(128)
2498 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2499 }
2500
2501 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
2502 TEST_REQUIRES_ARM_NEON_FMA;
2503 GemmMicrokernelTester()
2504 .mr(4)
2505 .nr(8)
2506 .kr(1)
2507 .sr(1)
2508 .m(4)
2509 .n(8)
2510 .k(8)
2511 .qmax(128)
2512 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2513 }
2514
2515 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
2516 TEST_REQUIRES_ARM_NEON_FMA;
2517 GemmMicrokernelTester()
2518 .mr(4)
2519 .nr(8)
2520 .kr(1)
2521 .sr(1)
2522 .m(4)
2523 .n(8)
2524 .k(8)
2525 .cm_stride(11)
2526 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2527 }
2528#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2529
2530
2531#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07002532 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002533 TEST_REQUIRES_ARM_NEON_FMA;
2534 GemmMicrokernelTester()
2535 .mr(6)
2536 .nr(8)
2537 .kr(1)
2538 .sr(1)
2539 .m(6)
2540 .n(8)
2541 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002542 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002543 }
2544
Marat Dukhande06f492020-04-09 00:19:31 -07002545 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002546 TEST_REQUIRES_ARM_NEON_FMA;
2547 GemmMicrokernelTester()
2548 .mr(6)
2549 .nr(8)
2550 .kr(1)
2551 .sr(1)
2552 .m(6)
2553 .n(8)
2554 .k(4)
2555 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002556 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002557 }
2558
Marat Dukhande06f492020-04-09 00:19:31 -07002559 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002560 TEST_REQUIRES_ARM_NEON_FMA;
2561 GemmMicrokernelTester()
2562 .mr(6)
2563 .nr(8)
2564 .kr(1)
2565 .sr(1)
2566 .m(6)
2567 .n(8)
2568 .k(4)
2569 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002570 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002571 }
2572
Marat Dukhande06f492020-04-09 00:19:31 -07002573 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002574 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002575 for (uint32_t n = 1; n <= 8; n++) {
2576 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002577 GemmMicrokernelTester()
2578 .mr(6)
2579 .nr(8)
2580 .kr(1)
2581 .sr(1)
2582 .m(m)
2583 .n(n)
2584 .k(4)
2585 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002586 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002587 }
2588 }
2589 }
2590
Marat Dukhande06f492020-04-09 00:19:31 -07002591 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002592 TEST_REQUIRES_ARM_NEON_FMA;
2593 for (uint32_t m = 1; m <= 6; m++) {
2594 GemmMicrokernelTester()
2595 .mr(6)
2596 .nr(8)
2597 .kr(1)
2598 .sr(1)
2599 .m(m)
2600 .n(8)
2601 .k(4)
2602 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002603 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002604 }
2605 }
2606
Marat Dukhande06f492020-04-09 00:19:31 -07002607 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002608 TEST_REQUIRES_ARM_NEON_FMA;
2609 for (uint32_t n = 1; n <= 8; n++) {
2610 GemmMicrokernelTester()
2611 .mr(6)
2612 .nr(8)
2613 .kr(1)
2614 .sr(1)
2615 .m(6)
2616 .n(n)
2617 .k(4)
2618 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002619 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002620 }
2621 }
2622
Marat Dukhande06f492020-04-09 00:19:31 -07002623 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002624 TEST_REQUIRES_ARM_NEON_FMA;
2625 GemmMicrokernelTester()
2626 .mr(6)
2627 .nr(8)
2628 .kr(1)
2629 .sr(1)
2630 .m(6)
2631 .n(8)
2632 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002633 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002634 }
2635
Marat Dukhande06f492020-04-09 00:19:31 -07002636 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002637 TEST_REQUIRES_ARM_NEON_FMA;
2638 GemmMicrokernelTester()
2639 .mr(6)
2640 .nr(8)
2641 .kr(1)
2642 .sr(1)
2643 .m(6)
2644 .n(8)
2645 .k(8)
2646 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002647 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002648 }
2649
Marat Dukhande06f492020-04-09 00:19:31 -07002650 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002651 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002652 for (uint32_t n = 1; n <= 8; n++) {
2653 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002654 GemmMicrokernelTester()
2655 .mr(6)
2656 .nr(8)
2657 .kr(1)
2658 .sr(1)
2659 .m(m)
2660 .n(n)
2661 .k(8)
2662 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002663 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002664 }
2665 }
2666 }
2667
Marat Dukhande06f492020-04-09 00:19:31 -07002668 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002669 TEST_REQUIRES_ARM_NEON_FMA;
2670 for (size_t k = 1; k < 8; k++) {
2671 GemmMicrokernelTester()
2672 .mr(6)
2673 .nr(8)
2674 .kr(1)
2675 .sr(1)
2676 .m(6)
2677 .n(8)
2678 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002679 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002680 }
2681 }
2682
Marat Dukhande06f492020-04-09 00:19:31 -07002683 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002684 TEST_REQUIRES_ARM_NEON_FMA;
2685 for (size_t k = 1; k < 8; k++) {
2686 GemmMicrokernelTester()
2687 .mr(6)
2688 .nr(8)
2689 .kr(1)
2690 .sr(1)
2691 .m(6)
2692 .n(8)
2693 .k(k)
2694 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002695 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002696 }
2697 }
2698
Marat Dukhande06f492020-04-09 00:19:31 -07002699 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002700 TEST_REQUIRES_ARM_NEON_FMA;
2701 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002702 for (uint32_t n = 1; n <= 8; n++) {
2703 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002704 GemmMicrokernelTester()
2705 .mr(6)
2706 .nr(8)
2707 .kr(1)
2708 .sr(1)
2709 .m(m)
2710 .n(n)
2711 .k(k)
2712 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002713 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002714 }
2715 }
2716 }
2717 }
2718
Marat Dukhande06f492020-04-09 00:19:31 -07002719 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002720 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002721 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002722 GemmMicrokernelTester()
2723 .mr(6)
2724 .nr(8)
2725 .kr(1)
2726 .sr(1)
2727 .m(6)
2728 .n(8)
2729 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002730 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002731 }
2732 }
2733
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002734 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002735 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002736 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002737 GemmMicrokernelTester()
2738 .mr(6)
2739 .nr(8)
2740 .kr(1)
2741 .sr(1)
2742 .m(6)
2743 .n(8)
2744 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002745 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002746 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002747 }
2748 }
2749
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002750 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002751 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002752 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002753 for (uint32_t n = 1; n <= 8; n++) {
2754 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002755 GemmMicrokernelTester()
2756 .mr(6)
2757 .nr(8)
2758 .kr(1)
2759 .sr(1)
2760 .m(m)
2761 .n(n)
2762 .k(k)
2763 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002764 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002765 }
2766 }
2767 }
2768 }
2769
Marat Dukhande06f492020-04-09 00:19:31 -07002770 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002771 TEST_REQUIRES_ARM_NEON_FMA;
2772 for (size_t k = 12; k <= 40; k += 4) {
2773 GemmMicrokernelTester()
2774 .mr(6)
2775 .nr(8)
2776 .kr(1)
2777 .sr(1)
2778 .m(6)
2779 .n(8)
2780 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002781 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002782 }
2783 }
2784
Marat Dukhande06f492020-04-09 00:19:31 -07002785 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002786 TEST_REQUIRES_ARM_NEON_FMA;
2787 for (size_t k = 12; k <= 40; k += 4) {
2788 GemmMicrokernelTester()
2789 .mr(6)
2790 .nr(8)
2791 .kr(1)
2792 .sr(1)
2793 .m(6)
2794 .n(8)
2795 .k(k)
2796 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002797 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002798 }
2799 }
2800
Marat Dukhande06f492020-04-09 00:19:31 -07002801 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002802 TEST_REQUIRES_ARM_NEON_FMA;
2803 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002804 for (uint32_t n = 1; n <= 8; n++) {
2805 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002806 GemmMicrokernelTester()
2807 .mr(6)
2808 .nr(8)
2809 .kr(1)
2810 .sr(1)
2811 .m(m)
2812 .n(n)
2813 .k(k)
2814 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002815 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002816 }
2817 }
2818 }
2819 }
2820
Marat Dukhande06f492020-04-09 00:19:31 -07002821 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002822 TEST_REQUIRES_ARM_NEON_FMA;
2823 for (uint32_t n = 9; n < 16; n++) {
2824 for (size_t k = 1; k <= 20; k += 5) {
2825 GemmMicrokernelTester()
2826 .mr(6)
2827 .nr(8)
2828 .kr(1)
2829 .sr(1)
2830 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002831 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002832 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002833 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002834 }
2835 }
2836 }
2837
Marat Dukhande06f492020-04-09 00:19:31 -07002838 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002839 TEST_REQUIRES_ARM_NEON_FMA;
2840 for (uint32_t n = 9; n < 16; n++) {
2841 for (size_t k = 1; k <= 20; k += 5) {
2842 GemmMicrokernelTester()
2843 .mr(6)
2844 .nr(8)
2845 .kr(1)
2846 .sr(1)
2847 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002848 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002849 .k(k)
2850 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002851 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002852 }
2853 }
2854 }
2855
Marat Dukhande06f492020-04-09 00:19:31 -07002856 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002857 TEST_REQUIRES_ARM_NEON_FMA;
2858 for (uint32_t n = 9; n < 16; n++) {
2859 for (size_t k = 1; k <= 20; k += 5) {
2860 GemmMicrokernelTester()
2861 .mr(6)
2862 .nr(8)
2863 .kr(1)
2864 .sr(1)
2865 .m(6)
2866 .n(n)
2867 .k(k)
2868 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002869 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002870 }
2871 }
2872 }
2873
Marat Dukhande06f492020-04-09 00:19:31 -07002874 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002875 TEST_REQUIRES_ARM_NEON_FMA;
2876 for (uint32_t n = 9; n < 16; n++) {
2877 for (size_t k = 1; k <= 20; k += 5) {
2878 for (uint32_t m = 1; m <= 6; m++) {
2879 GemmMicrokernelTester()
2880 .mr(6)
2881 .nr(8)
2882 .kr(1)
2883 .sr(1)
2884 .m(m)
2885 .n(n)
2886 .k(k)
2887 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002888 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002889 }
2890 }
2891 }
2892 }
2893
Marat Dukhande06f492020-04-09 00:19:31 -07002894 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002895 TEST_REQUIRES_ARM_NEON_FMA;
2896 for (uint32_t n = 16; n <= 24; n += 8) {
2897 for (size_t k = 1; k <= 20; k += 5) {
2898 GemmMicrokernelTester()
2899 .mr(6)
2900 .nr(8)
2901 .kr(1)
2902 .sr(1)
2903 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002904 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002905 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002906 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002907 }
2908 }
2909 }
2910
Marat Dukhande06f492020-04-09 00:19:31 -07002911 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002912 TEST_REQUIRES_ARM_NEON_FMA;
2913 for (uint32_t n = 16; n <= 24; n += 8) {
2914 for (size_t k = 1; k <= 20; k += 5) {
2915 GemmMicrokernelTester()
2916 .mr(6)
2917 .nr(8)
2918 .kr(1)
2919 .sr(1)
2920 .m(6)
2921 .n(n)
2922 .k(k)
2923 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002924 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002925 }
2926 }
2927 }
2928
Marat Dukhande06f492020-04-09 00:19:31 -07002929 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002930 TEST_REQUIRES_ARM_NEON_FMA;
2931 for (uint32_t n = 16; n <= 24; n += 8) {
2932 for (size_t k = 1; k <= 20; k += 5) {
2933 GemmMicrokernelTester()
2934 .mr(6)
2935 .nr(8)
2936 .kr(1)
2937 .sr(1)
2938 .m(6)
2939 .n(n)
2940 .k(k)
2941 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002942 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002943 }
2944 }
2945 }
2946
Marat Dukhande06f492020-04-09 00:19:31 -07002947 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002948 TEST_REQUIRES_ARM_NEON_FMA;
2949 for (uint32_t n = 16; n <= 24; n += 8) {
2950 for (size_t k = 1; k <= 20; k += 5) {
2951 for (uint32_t m = 1; m <= 6; m++) {
2952 GemmMicrokernelTester()
2953 .mr(6)
2954 .nr(8)
2955 .kr(1)
2956 .sr(1)
2957 .m(m)
2958 .n(n)
2959 .k(k)
2960 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002961 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002962 }
2963 }
2964 }
2965 }
2966
Marat Dukhande06f492020-04-09 00:19:31 -07002967 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002968 TEST_REQUIRES_ARM_NEON_FMA;
2969 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002970 for (uint32_t n = 1; n <= 8; n++) {
2971 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002972 GemmMicrokernelTester()
2973 .mr(6)
2974 .nr(8)
2975 .kr(1)
2976 .sr(1)
2977 .m(m)
2978 .n(n)
2979 .k(k)
2980 .cm_stride(11)
2981 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002982 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002983 }
2984 }
2985 }
2986 }
2987
Marat Dukhande06f492020-04-09 00:19:31 -07002988 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002989 TEST_REQUIRES_ARM_NEON_FMA;
2990 GemmMicrokernelTester()
2991 .mr(6)
2992 .nr(8)
2993 .kr(1)
2994 .sr(1)
2995 .m(6)
2996 .n(8)
2997 .k(4)
2998 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002999 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003000 }
3001
Marat Dukhande06f492020-04-09 00:19:31 -07003002 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003003 TEST_REQUIRES_ARM_NEON_FMA;
3004 GemmMicrokernelTester()
3005 .mr(6)
3006 .nr(8)
3007 .kr(1)
3008 .sr(1)
3009 .m(6)
3010 .n(8)
3011 .k(4)
3012 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003013 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003014 }
3015
Marat Dukhande06f492020-04-09 00:19:31 -07003016 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003017 TEST_REQUIRES_ARM_NEON_FMA;
3018 GemmMicrokernelTester()
3019 .mr(6)
3020 .nr(8)
3021 .kr(1)
3022 .sr(1)
3023 .m(6)
3024 .n(8)
3025 .k(4)
3026 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003027 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003028 }
3029#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3030
3031
3032#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -07003033 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003034 TEST_REQUIRES_ARM_NEON_FMA;
3035 GemmMicrokernelTester()
3036 .mr(6)
3037 .nr(8)
3038 .kr(1)
3039 .sr(1)
3040 .m(6)
3041 .n(8)
3042 .k(4)
Frank Barchard143a1102021-06-15 09:15:34 -07003043 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003044 }
3045
Frank Barchard143a1102021-06-15 09:15:34 -07003046 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003047 TEST_REQUIRES_ARM_NEON_FMA;
3048 GemmMicrokernelTester()
3049 .mr(6)
3050 .nr(8)
3051 .kr(1)
3052 .sr(1)
3053 .m(6)
3054 .n(8)
3055 .k(4)
3056 .cn_stride(11)
Frank Barchard143a1102021-06-15 09:15:34 -07003057 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003058 }
3059
Frank Barchard143a1102021-06-15 09:15:34 -07003060 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003061 TEST_REQUIRES_ARM_NEON_FMA;
3062 GemmMicrokernelTester()
3063 .mr(6)
3064 .nr(8)
3065 .kr(1)
3066 .sr(1)
3067 .m(6)
3068 .n(8)
3069 .k(4)
3070 .a_stride(7)
Frank Barchard143a1102021-06-15 09:15:34 -07003071 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003072 }
3073
Frank Barchard143a1102021-06-15 09:15:34 -07003074 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003075 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003076 for (uint32_t n = 1; n <= 8; n++) {
3077 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003078 GemmMicrokernelTester()
3079 .mr(6)
3080 .nr(8)
3081 .kr(1)
3082 .sr(1)
3083 .m(m)
3084 .n(n)
3085 .k(4)
3086 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003087 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003088 }
3089 }
3090 }
3091
Frank Barchard143a1102021-06-15 09:15:34 -07003092 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003093 TEST_REQUIRES_ARM_NEON_FMA;
3094 for (uint32_t m = 1; m <= 6; m++) {
3095 GemmMicrokernelTester()
3096 .mr(6)
3097 .nr(8)
3098 .kr(1)
3099 .sr(1)
3100 .m(m)
3101 .n(8)
3102 .k(4)
3103 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003104 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003105 }
3106 }
3107
Frank Barchard143a1102021-06-15 09:15:34 -07003108 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003109 TEST_REQUIRES_ARM_NEON_FMA;
3110 for (uint32_t n = 1; n <= 8; n++) {
3111 GemmMicrokernelTester()
3112 .mr(6)
3113 .nr(8)
3114 .kr(1)
3115 .sr(1)
3116 .m(6)
3117 .n(n)
3118 .k(4)
3119 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003120 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003121 }
3122 }
3123
Frank Barchard143a1102021-06-15 09:15:34 -07003124 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003125 TEST_REQUIRES_ARM_NEON_FMA;
3126 for (size_t k = 1; k < 4; k++) {
3127 GemmMicrokernelTester()
3128 .mr(6)
3129 .nr(8)
3130 .kr(1)
3131 .sr(1)
3132 .m(6)
3133 .n(8)
3134 .k(k)
Frank Barchard143a1102021-06-15 09:15:34 -07003135 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003136 }
3137 }
3138
Frank Barchard143a1102021-06-15 09:15:34 -07003139 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003140 TEST_REQUIRES_ARM_NEON_FMA;
3141 for (size_t k = 1; k < 4; k++) {
3142 GemmMicrokernelTester()
3143 .mr(6)
3144 .nr(8)
3145 .kr(1)
3146 .sr(1)
3147 .m(6)
3148 .n(8)
3149 .k(k)
3150 .a_stride(7)
Frank Barchard143a1102021-06-15 09:15:34 -07003151 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003152 }
3153 }
3154
Frank Barchard143a1102021-06-15 09:15:34 -07003155 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003156 TEST_REQUIRES_ARM_NEON_FMA;
3157 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003158 for (uint32_t n = 1; n <= 8; n++) {
3159 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003160 GemmMicrokernelTester()
3161 .mr(6)
3162 .nr(8)
3163 .kr(1)
3164 .sr(1)
3165 .m(m)
3166 .n(n)
3167 .k(k)
3168 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003169 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003170 }
3171 }
3172 }
3173 }
3174
Frank Barchard143a1102021-06-15 09:15:34 -07003175 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003176 TEST_REQUIRES_ARM_NEON_FMA;
3177 for (size_t k = 5; k < 8; k++) {
3178 GemmMicrokernelTester()
3179 .mr(6)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(6)
3184 .n(8)
3185 .k(k)
Frank Barchard143a1102021-06-15 09:15:34 -07003186 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003187 }
3188 }
3189
Frank Barchard143a1102021-06-15 09:15:34 -07003190 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003191 TEST_REQUIRES_ARM_NEON_FMA;
3192 for (size_t k = 5; k < 8; k++) {
3193 GemmMicrokernelTester()
3194 .mr(6)
3195 .nr(8)
3196 .kr(1)
3197 .sr(1)
3198 .m(6)
3199 .n(8)
3200 .k(k)
3201 .a_stride(11)
Frank Barchard143a1102021-06-15 09:15:34 -07003202 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003203 }
3204 }
3205
Frank Barchard143a1102021-06-15 09:15:34 -07003206 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003207 TEST_REQUIRES_ARM_NEON_FMA;
3208 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003209 for (uint32_t n = 1; n <= 8; n++) {
3210 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003211 GemmMicrokernelTester()
3212 .mr(6)
3213 .nr(8)
3214 .kr(1)
3215 .sr(1)
3216 .m(m)
3217 .n(n)
3218 .k(k)
3219 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003220 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003221 }
3222 }
3223 }
3224 }
3225
Frank Barchard143a1102021-06-15 09:15:34 -07003226 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003227 TEST_REQUIRES_ARM_NEON_FMA;
3228 for (size_t k = 8; k <= 40; k += 4) {
3229 GemmMicrokernelTester()
3230 .mr(6)
3231 .nr(8)
3232 .kr(1)
3233 .sr(1)
3234 .m(6)
3235 .n(8)
3236 .k(k)
Frank Barchard143a1102021-06-15 09:15:34 -07003237 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003238 }
3239 }
3240
Frank Barchard143a1102021-06-15 09:15:34 -07003241 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003242 TEST_REQUIRES_ARM_NEON_FMA;
3243 for (size_t k = 8; k <= 40; k += 4) {
3244 GemmMicrokernelTester()
3245 .mr(6)
3246 .nr(8)
3247 .kr(1)
3248 .sr(1)
3249 .m(6)
3250 .n(8)
3251 .k(k)
3252 .a_stride(43)
Frank Barchard143a1102021-06-15 09:15:34 -07003253 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003254 }
3255 }
3256
Frank Barchard143a1102021-06-15 09:15:34 -07003257 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003258 TEST_REQUIRES_ARM_NEON_FMA;
3259 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003260 for (uint32_t n = 1; n <= 8; n++) {
3261 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003262 GemmMicrokernelTester()
3263 .mr(6)
3264 .nr(8)
3265 .kr(1)
3266 .sr(1)
3267 .m(m)
3268 .n(n)
3269 .k(k)
3270 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003271 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003272 }
3273 }
3274 }
3275 }
3276
Frank Barchard143a1102021-06-15 09:15:34 -07003277 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003278 TEST_REQUIRES_ARM_NEON_FMA;
3279 for (uint32_t n = 9; n < 16; n++) {
3280 for (size_t k = 1; k <= 20; k += 5) {
3281 GemmMicrokernelTester()
3282 .mr(6)
3283 .nr(8)
3284 .kr(1)
3285 .sr(1)
3286 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003287 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003288 .k(k)
Frank Barchard143a1102021-06-15 09:15:34 -07003289 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003290 }
3291 }
3292 }
3293
Frank Barchard143a1102021-06-15 09:15:34 -07003294 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003295 TEST_REQUIRES_ARM_NEON_FMA;
3296 for (uint32_t n = 9; n < 16; n++) {
3297 for (size_t k = 1; k <= 20; k += 5) {
3298 GemmMicrokernelTester()
3299 .mr(6)
3300 .nr(8)
3301 .kr(1)
3302 .sr(1)
3303 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003304 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003305 .k(k)
3306 .cn_stride(11)
Frank Barchard143a1102021-06-15 09:15:34 -07003307 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003308 }
3309 }
3310 }
3311
Frank Barchard143a1102021-06-15 09:15:34 -07003312 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003313 TEST_REQUIRES_ARM_NEON_FMA;
3314 for (uint32_t n = 9; n < 16; n++) {
3315 for (size_t k = 1; k <= 20; k += 5) {
3316 GemmMicrokernelTester()
3317 .mr(6)
3318 .nr(8)
3319 .kr(1)
3320 .sr(1)
3321 .m(6)
3322 .n(n)
3323 .k(k)
3324 .a_stride(23)
Frank Barchard143a1102021-06-15 09:15:34 -07003325 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003326 }
3327 }
3328 }
3329
Frank Barchard143a1102021-06-15 09:15:34 -07003330 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003331 TEST_REQUIRES_ARM_NEON_FMA;
3332 for (uint32_t n = 9; n < 16; n++) {
3333 for (size_t k = 1; k <= 20; k += 5) {
3334 for (uint32_t m = 1; m <= 6; m++) {
3335 GemmMicrokernelTester()
3336 .mr(6)
3337 .nr(8)
3338 .kr(1)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(k)
3343 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003344 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003345 }
3346 }
3347 }
3348 }
3349
Frank Barchard143a1102021-06-15 09:15:34 -07003350 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003351 TEST_REQUIRES_ARM_NEON_FMA;
3352 for (uint32_t n = 16; n <= 24; n += 8) {
3353 for (size_t k = 1; k <= 20; k += 5) {
3354 GemmMicrokernelTester()
3355 .mr(6)
3356 .nr(8)
3357 .kr(1)
3358 .sr(1)
3359 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003360 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003361 .k(k)
Frank Barchard143a1102021-06-15 09:15:34 -07003362 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003363 }
3364 }
3365 }
3366
Frank Barchard143a1102021-06-15 09:15:34 -07003367 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003368 TEST_REQUIRES_ARM_NEON_FMA;
3369 for (uint32_t n = 16; n <= 24; n += 8) {
3370 for (size_t k = 1; k <= 20; k += 5) {
3371 GemmMicrokernelTester()
3372 .mr(6)
3373 .nr(8)
3374 .kr(1)
3375 .sr(1)
3376 .m(6)
3377 .n(n)
3378 .k(k)
3379 .cn_stride(11)
Frank Barchard143a1102021-06-15 09:15:34 -07003380 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003381 }
3382 }
3383 }
3384
Frank Barchard143a1102021-06-15 09:15:34 -07003385 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003386 TEST_REQUIRES_ARM_NEON_FMA;
3387 for (uint32_t n = 16; n <= 24; n += 8) {
3388 for (size_t k = 1; k <= 20; k += 5) {
3389 GemmMicrokernelTester()
3390 .mr(6)
3391 .nr(8)
3392 .kr(1)
3393 .sr(1)
3394 .m(6)
3395 .n(n)
3396 .k(k)
3397 .a_stride(23)
Frank Barchard143a1102021-06-15 09:15:34 -07003398 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003399 }
3400 }
3401 }
3402
Frank Barchard143a1102021-06-15 09:15:34 -07003403 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003404 TEST_REQUIRES_ARM_NEON_FMA;
3405 for (uint32_t n = 16; n <= 24; n += 8) {
3406 for (size_t k = 1; k <= 20; k += 5) {
3407 for (uint32_t m = 1; m <= 6; m++) {
3408 GemmMicrokernelTester()
3409 .mr(6)
3410 .nr(8)
3411 .kr(1)
3412 .sr(1)
3413 .m(m)
3414 .n(n)
3415 .k(k)
3416 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003417 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003418 }
3419 }
3420 }
3421 }
3422
Frank Barchard143a1102021-06-15 09:15:34 -07003423 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003424 TEST_REQUIRES_ARM_NEON_FMA;
3425 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003426 for (uint32_t n = 1; n <= 8; n++) {
3427 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003428 GemmMicrokernelTester()
3429 .mr(6)
3430 .nr(8)
3431 .kr(1)
3432 .sr(1)
3433 .m(m)
3434 .n(n)
3435 .k(k)
3436 .cm_stride(11)
3437 .iterations(1)
Frank Barchard143a1102021-06-15 09:15:34 -07003438 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003439 }
3440 }
3441 }
3442 }
3443
Frank Barchard143a1102021-06-15 09:15:34 -07003444 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003445 TEST_REQUIRES_ARM_NEON_FMA;
3446 GemmMicrokernelTester()
3447 .mr(6)
3448 .nr(8)
3449 .kr(1)
3450 .sr(1)
3451 .m(6)
3452 .n(8)
3453 .k(4)
3454 .qmin(128)
Frank Barchard143a1102021-06-15 09:15:34 -07003455 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003456 }
3457
Frank Barchard143a1102021-06-15 09:15:34 -07003458 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003459 TEST_REQUIRES_ARM_NEON_FMA;
3460 GemmMicrokernelTester()
3461 .mr(6)
3462 .nr(8)
3463 .kr(1)
3464 .sr(1)
3465 .m(6)
3466 .n(8)
3467 .k(4)
3468 .qmax(128)
Frank Barchard143a1102021-06-15 09:15:34 -07003469 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003470 }
3471
Frank Barchard143a1102021-06-15 09:15:34 -07003472 TEST(F32_GEMMINC_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003473 TEST_REQUIRES_ARM_NEON_FMA;
3474 GemmMicrokernelTester()
3475 .mr(6)
3476 .nr(8)
3477 .kr(1)
3478 .sr(1)
3479 .m(6)
3480 .n(8)
3481 .k(4)
3482 .cm_stride(11)
Frank Barchard143a1102021-06-15 09:15:34 -07003483 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003484 }
3485#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3486
3487
3488#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07003489 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003490 TEST_REQUIRES_ARM_NEON_FMA;
3491 GemmMicrokernelTester()
3492 .mr(1)
3493 .nr(12)
3494 .kr(1)
3495 .sr(1)
3496 .m(1)
3497 .n(12)
3498 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003499 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003500 }
3501
Marat Dukhande06f492020-04-09 00:19:31 -07003502 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003503 TEST_REQUIRES_ARM_NEON_FMA;
3504 GemmMicrokernelTester()
3505 .mr(1)
3506 .nr(12)
3507 .kr(1)
3508 .sr(1)
3509 .m(1)
3510 .n(12)
3511 .k(4)
3512 .cn_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003513 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003514 }
3515
Marat Dukhande06f492020-04-09 00:19:31 -07003516 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003517 TEST_REQUIRES_ARM_NEON_FMA;
3518 GemmMicrokernelTester()
3519 .mr(1)
3520 .nr(12)
3521 .kr(1)
3522 .sr(1)
3523 .m(1)
3524 .n(12)
3525 .k(4)
3526 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003527 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003528 }
3529
Marat Dukhande06f492020-04-09 00:19:31 -07003530 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003531 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003532 for (uint32_t n = 1; n <= 12; n++) {
3533 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003534 GemmMicrokernelTester()
3535 .mr(1)
3536 .nr(12)
3537 .kr(1)
3538 .sr(1)
3539 .m(m)
3540 .n(n)
3541 .k(4)
3542 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003543 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003544 }
3545 }
3546 }
3547
Marat Dukhande06f492020-04-09 00:19:31 -07003548 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003549 TEST_REQUIRES_ARM_NEON_FMA;
3550 for (uint32_t m = 1; m <= 1; m++) {
3551 GemmMicrokernelTester()
3552 .mr(1)
3553 .nr(12)
3554 .kr(1)
3555 .sr(1)
3556 .m(m)
3557 .n(12)
3558 .k(4)
3559 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003560 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003561 }
3562 }
3563
Marat Dukhande06f492020-04-09 00:19:31 -07003564 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003565 TEST_REQUIRES_ARM_NEON_FMA;
3566 for (uint32_t n = 1; n <= 12; n++) {
3567 GemmMicrokernelTester()
3568 .mr(1)
3569 .nr(12)
3570 .kr(1)
3571 .sr(1)
3572 .m(1)
3573 .n(n)
3574 .k(4)
3575 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003576 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003577 }
3578 }
3579
Marat Dukhande06f492020-04-09 00:19:31 -07003580 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003581 TEST_REQUIRES_ARM_NEON_FMA;
3582 GemmMicrokernelTester()
3583 .mr(1)
3584 .nr(12)
3585 .kr(1)
3586 .sr(1)
3587 .m(1)
3588 .n(12)
3589 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003590 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003591 }
3592
Marat Dukhande06f492020-04-09 00:19:31 -07003593 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003594 TEST_REQUIRES_ARM_NEON_FMA;
3595 GemmMicrokernelTester()
3596 .mr(1)
3597 .nr(12)
3598 .kr(1)
3599 .sr(1)
3600 .m(1)
3601 .n(12)
3602 .k(8)
3603 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003604 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003605 }
3606
Marat Dukhande06f492020-04-09 00:19:31 -07003607 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003608 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003609 for (uint32_t n = 1; n <= 12; n++) {
3610 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003611 GemmMicrokernelTester()
3612 .mr(1)
3613 .nr(12)
3614 .kr(1)
3615 .sr(1)
3616 .m(m)
3617 .n(n)
3618 .k(8)
3619 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003620 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003621 }
3622 }
3623 }
3624
Marat Dukhande06f492020-04-09 00:19:31 -07003625 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003626 TEST_REQUIRES_ARM_NEON_FMA;
3627 for (size_t k = 1; k < 8; k++) {
3628 GemmMicrokernelTester()
3629 .mr(1)
3630 .nr(12)
3631 .kr(1)
3632 .sr(1)
3633 .m(1)
3634 .n(12)
3635 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003636 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003637 }
3638 }
3639
Marat Dukhande06f492020-04-09 00:19:31 -07003640 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003641 TEST_REQUIRES_ARM_NEON_FMA;
3642 for (size_t k = 1; k < 8; k++) {
3643 GemmMicrokernelTester()
3644 .mr(1)
3645 .nr(12)
3646 .kr(1)
3647 .sr(1)
3648 .m(1)
3649 .n(12)
3650 .k(k)
3651 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003652 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003653 }
3654 }
3655
Marat Dukhande06f492020-04-09 00:19:31 -07003656 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003657 TEST_REQUIRES_ARM_NEON_FMA;
3658 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003659 for (uint32_t n = 1; n <= 12; n++) {
3660 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003661 GemmMicrokernelTester()
3662 .mr(1)
3663 .nr(12)
3664 .kr(1)
3665 .sr(1)
3666 .m(m)
3667 .n(n)
3668 .k(k)
3669 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003670 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003671 }
3672 }
3673 }
3674 }
3675
Marat Dukhande06f492020-04-09 00:19:31 -07003676 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003677 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003678 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003679 GemmMicrokernelTester()
3680 .mr(1)
3681 .nr(12)
3682 .kr(1)
3683 .sr(1)
3684 .m(1)
3685 .n(12)
3686 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003687 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003688 }
3689 }
3690
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003691 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003692 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003693 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003694 GemmMicrokernelTester()
3695 .mr(1)
3696 .nr(12)
3697 .kr(1)
3698 .sr(1)
3699 .m(1)
3700 .n(12)
3701 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003702 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003703 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003704 }
3705 }
3706
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003707 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003708 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003709 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003710 for (uint32_t n = 1; n <= 12; n++) {
3711 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003712 GemmMicrokernelTester()
3713 .mr(1)
3714 .nr(12)
3715 .kr(1)
3716 .sr(1)
3717 .m(m)
3718 .n(n)
3719 .k(k)
3720 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003721 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003722 }
3723 }
3724 }
3725 }
3726
Marat Dukhande06f492020-04-09 00:19:31 -07003727 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003728 TEST_REQUIRES_ARM_NEON_FMA;
3729 for (size_t k = 12; k <= 40; k += 4) {
3730 GemmMicrokernelTester()
3731 .mr(1)
3732 .nr(12)
3733 .kr(1)
3734 .sr(1)
3735 .m(1)
3736 .n(12)
3737 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003738 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003739 }
3740 }
3741
Marat Dukhande06f492020-04-09 00:19:31 -07003742 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003743 TEST_REQUIRES_ARM_NEON_FMA;
3744 for (size_t k = 12; k <= 40; k += 4) {
3745 GemmMicrokernelTester()
3746 .mr(1)
3747 .nr(12)
3748 .kr(1)
3749 .sr(1)
3750 .m(1)
3751 .n(12)
3752 .k(k)
3753 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003754 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003755 }
3756 }
3757
Marat Dukhande06f492020-04-09 00:19:31 -07003758 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003759 TEST_REQUIRES_ARM_NEON_FMA;
3760 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003761 for (uint32_t n = 1; n <= 12; n++) {
3762 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003763 GemmMicrokernelTester()
3764 .mr(1)
3765 .nr(12)
3766 .kr(1)
3767 .sr(1)
3768 .m(m)
3769 .n(n)
3770 .k(k)
3771 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003772 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003773 }
3774 }
3775 }
3776 }
3777
Marat Dukhande06f492020-04-09 00:19:31 -07003778 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003779 TEST_REQUIRES_ARM_NEON_FMA;
3780 for (uint32_t n = 13; n < 24; n++) {
3781 for (size_t k = 1; k <= 20; k += 5) {
3782 GemmMicrokernelTester()
3783 .mr(1)
3784 .nr(12)
3785 .kr(1)
3786 .sr(1)
3787 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003788 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003789 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003790 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003791 }
3792 }
3793 }
3794
Marat Dukhande06f492020-04-09 00:19:31 -07003795 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003796 TEST_REQUIRES_ARM_NEON_FMA;
3797 for (uint32_t n = 13; n < 24; n++) {
3798 for (size_t k = 1; k <= 20; k += 5) {
3799 GemmMicrokernelTester()
3800 .mr(1)
3801 .nr(12)
3802 .kr(1)
3803 .sr(1)
3804 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003805 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003806 .k(k)
3807 .cn_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003808 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003809 }
3810 }
3811 }
3812
Marat Dukhande06f492020-04-09 00:19:31 -07003813 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003814 TEST_REQUIRES_ARM_NEON_FMA;
3815 for (uint32_t n = 13; n < 24; n++) {
3816 for (size_t k = 1; k <= 20; k += 5) {
3817 GemmMicrokernelTester()
3818 .mr(1)
3819 .nr(12)
3820 .kr(1)
3821 .sr(1)
3822 .m(1)
3823 .n(n)
3824 .k(k)
3825 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003826 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003827 }
3828 }
3829 }
3830
Marat Dukhande06f492020-04-09 00:19:31 -07003831 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003832 TEST_REQUIRES_ARM_NEON_FMA;
3833 for (uint32_t n = 13; n < 24; n++) {
3834 for (size_t k = 1; k <= 20; k += 5) {
3835 for (uint32_t m = 1; m <= 1; m++) {
3836 GemmMicrokernelTester()
3837 .mr(1)
3838 .nr(12)
3839 .kr(1)
3840 .sr(1)
3841 .m(m)
3842 .n(n)
3843 .k(k)
3844 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003845 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003846 }
3847 }
3848 }
3849 }
3850
Marat Dukhande06f492020-04-09 00:19:31 -07003851 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003852 TEST_REQUIRES_ARM_NEON_FMA;
3853 for (uint32_t n = 24; n <= 36; n += 12) {
3854 for (size_t k = 1; k <= 20; k += 5) {
3855 GemmMicrokernelTester()
3856 .mr(1)
3857 .nr(12)
3858 .kr(1)
3859 .sr(1)
3860 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003861 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003862 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003863 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003864 }
3865 }
3866 }
3867
Marat Dukhande06f492020-04-09 00:19:31 -07003868 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003869 TEST_REQUIRES_ARM_NEON_FMA;
3870 for (uint32_t n = 24; n <= 36; n += 12) {
3871 for (size_t k = 1; k <= 20; k += 5) {
3872 GemmMicrokernelTester()
3873 .mr(1)
3874 .nr(12)
3875 .kr(1)
3876 .sr(1)
3877 .m(1)
3878 .n(n)
3879 .k(k)
3880 .cn_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003881 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003882 }
3883 }
3884 }
3885
Marat Dukhande06f492020-04-09 00:19:31 -07003886 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003887 TEST_REQUIRES_ARM_NEON_FMA;
3888 for (uint32_t n = 24; n <= 36; n += 12) {
3889 for (size_t k = 1; k <= 20; k += 5) {
3890 GemmMicrokernelTester()
3891 .mr(1)
3892 .nr(12)
3893 .kr(1)
3894 .sr(1)
3895 .m(1)
3896 .n(n)
3897 .k(k)
3898 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003899 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003900 }
3901 }
3902 }
3903
Marat Dukhande06f492020-04-09 00:19:31 -07003904 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003905 TEST_REQUIRES_ARM_NEON_FMA;
3906 for (uint32_t n = 24; n <= 36; n += 12) {
3907 for (size_t k = 1; k <= 20; k += 5) {
3908 for (uint32_t m = 1; m <= 1; m++) {
3909 GemmMicrokernelTester()
3910 .mr(1)
3911 .nr(12)
3912 .kr(1)
3913 .sr(1)
3914 .m(m)
3915 .n(n)
3916 .k(k)
3917 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003918 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003919 }
3920 }
3921 }
3922 }
3923
Marat Dukhande06f492020-04-09 00:19:31 -07003924 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003925 TEST_REQUIRES_ARM_NEON_FMA;
3926 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003927 for (uint32_t n = 1; n <= 12; n++) {
3928 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003929 GemmMicrokernelTester()
3930 .mr(1)
3931 .nr(12)
3932 .kr(1)
3933 .sr(1)
3934 .m(m)
3935 .n(n)
3936 .k(k)
3937 .cm_stride(17)
3938 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003939 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003940 }
3941 }
3942 }
3943 }
3944
Marat Dukhande06f492020-04-09 00:19:31 -07003945 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003946 TEST_REQUIRES_ARM_NEON_FMA;
3947 GemmMicrokernelTester()
3948 .mr(1)
3949 .nr(12)
3950 .kr(1)
3951 .sr(1)
3952 .m(1)
3953 .n(12)
3954 .k(4)
3955 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003956 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003957 }
3958
Marat Dukhande06f492020-04-09 00:19:31 -07003959 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003960 TEST_REQUIRES_ARM_NEON_FMA;
3961 GemmMicrokernelTester()
3962 .mr(1)
3963 .nr(12)
3964 .kr(1)
3965 .sr(1)
3966 .m(1)
3967 .n(12)
3968 .k(4)
3969 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003970 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003971 }
3972
Marat Dukhande06f492020-04-09 00:19:31 -07003973 TEST(F32_GEMMINC_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003974 TEST_REQUIRES_ARM_NEON_FMA;
3975 GemmMicrokernelTester()
3976 .mr(1)
3977 .nr(12)
3978 .kr(1)
3979 .sr(1)
3980 .m(1)
3981 .n(12)
3982 .k(4)
3983 .cm_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003984 .Test(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003985 }
3986#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3987
3988
3989#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07003990 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003991 TEST_REQUIRES_ARM_NEON_FMA;
3992 GemmMicrokernelTester()
3993 .mr(4)
3994 .nr(12)
3995 .kr(1)
3996 .sr(1)
3997 .m(4)
3998 .n(12)
3999 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004000 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004001 }
4002
Marat Dukhande06f492020-04-09 00:19:31 -07004003 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004004 TEST_REQUIRES_ARM_NEON_FMA;
4005 GemmMicrokernelTester()
4006 .mr(4)
4007 .nr(12)
4008 .kr(1)
4009 .sr(1)
4010 .m(4)
4011 .n(12)
4012 .k(4)
4013 .cn_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004014 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004015 }
4016
Marat Dukhande06f492020-04-09 00:19:31 -07004017 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004018 TEST_REQUIRES_ARM_NEON_FMA;
4019 GemmMicrokernelTester()
4020 .mr(4)
4021 .nr(12)
4022 .kr(1)
4023 .sr(1)
4024 .m(4)
4025 .n(12)
4026 .k(4)
4027 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004028 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004029 }
4030
Marat Dukhande06f492020-04-09 00:19:31 -07004031 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004032 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004033 for (uint32_t n = 1; n <= 12; n++) {
4034 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004035 GemmMicrokernelTester()
4036 .mr(4)
4037 .nr(12)
4038 .kr(1)
4039 .sr(1)
4040 .m(m)
4041 .n(n)
4042 .k(4)
4043 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004044 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004045 }
4046 }
4047 }
4048
Marat Dukhande06f492020-04-09 00:19:31 -07004049 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004050 TEST_REQUIRES_ARM_NEON_FMA;
4051 for (uint32_t m = 1; m <= 4; m++) {
4052 GemmMicrokernelTester()
4053 .mr(4)
4054 .nr(12)
4055 .kr(1)
4056 .sr(1)
4057 .m(m)
4058 .n(12)
4059 .k(4)
4060 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004061 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004062 }
4063 }
4064
Marat Dukhande06f492020-04-09 00:19:31 -07004065 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004066 TEST_REQUIRES_ARM_NEON_FMA;
4067 for (uint32_t n = 1; n <= 12; n++) {
4068 GemmMicrokernelTester()
4069 .mr(4)
4070 .nr(12)
4071 .kr(1)
4072 .sr(1)
4073 .m(4)
4074 .n(n)
4075 .k(4)
4076 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004077 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004078 }
4079 }
4080
Marat Dukhande06f492020-04-09 00:19:31 -07004081 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004082 TEST_REQUIRES_ARM_NEON_FMA;
4083 GemmMicrokernelTester()
4084 .mr(4)
4085 .nr(12)
4086 .kr(1)
4087 .sr(1)
4088 .m(4)
4089 .n(12)
4090 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004091 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004092 }
4093
Marat Dukhande06f492020-04-09 00:19:31 -07004094 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004095 TEST_REQUIRES_ARM_NEON_FMA;
4096 GemmMicrokernelTester()
4097 .mr(4)
4098 .nr(12)
4099 .kr(1)
4100 .sr(1)
4101 .m(4)
4102 .n(12)
4103 .k(8)
4104 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004105 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004106 }
4107
Marat Dukhande06f492020-04-09 00:19:31 -07004108 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004109 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004110 for (uint32_t n = 1; n <= 12; n++) {
4111 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004112 GemmMicrokernelTester()
4113 .mr(4)
4114 .nr(12)
4115 .kr(1)
4116 .sr(1)
4117 .m(m)
4118 .n(n)
4119 .k(8)
4120 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004121 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004122 }
4123 }
4124 }
4125
Marat Dukhande06f492020-04-09 00:19:31 -07004126 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004127 TEST_REQUIRES_ARM_NEON_FMA;
4128 for (size_t k = 1; k < 8; k++) {
4129 GemmMicrokernelTester()
4130 .mr(4)
4131 .nr(12)
4132 .kr(1)
4133 .sr(1)
4134 .m(4)
4135 .n(12)
4136 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004137 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004138 }
4139 }
4140
Marat Dukhande06f492020-04-09 00:19:31 -07004141 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004142 TEST_REQUIRES_ARM_NEON_FMA;
4143 for (size_t k = 1; k < 8; k++) {
4144 GemmMicrokernelTester()
4145 .mr(4)
4146 .nr(12)
4147 .kr(1)
4148 .sr(1)
4149 .m(4)
4150 .n(12)
4151 .k(k)
4152 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004153 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004154 }
4155 }
4156
Marat Dukhande06f492020-04-09 00:19:31 -07004157 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004158 TEST_REQUIRES_ARM_NEON_FMA;
4159 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004160 for (uint32_t n = 1; n <= 12; n++) {
4161 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004162 GemmMicrokernelTester()
4163 .mr(4)
4164 .nr(12)
4165 .kr(1)
4166 .sr(1)
4167 .m(m)
4168 .n(n)
4169 .k(k)
4170 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004171 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004172 }
4173 }
4174 }
4175 }
4176
Marat Dukhande06f492020-04-09 00:19:31 -07004177 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004178 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004179 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004180 GemmMicrokernelTester()
4181 .mr(4)
4182 .nr(12)
4183 .kr(1)
4184 .sr(1)
4185 .m(4)
4186 .n(12)
4187 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004188 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004189 }
4190 }
4191
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004192 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004193 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004194 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004195 GemmMicrokernelTester()
4196 .mr(4)
4197 .nr(12)
4198 .kr(1)
4199 .sr(1)
4200 .m(4)
4201 .n(12)
4202 .k(k)
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004203 .a_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004204 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004205 }
4206 }
4207
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004208 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004209 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004210 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004211 for (uint32_t n = 1; n <= 12; n++) {
4212 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004213 GemmMicrokernelTester()
4214 .mr(4)
4215 .nr(12)
4216 .kr(1)
4217 .sr(1)
4218 .m(m)
4219 .n(n)
4220 .k(k)
4221 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004222 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004223 }
4224 }
4225 }
4226 }
4227
Marat Dukhande06f492020-04-09 00:19:31 -07004228 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004229 TEST_REQUIRES_ARM_NEON_FMA;
4230 for (size_t k = 12; k <= 40; k += 4) {
4231 GemmMicrokernelTester()
4232 .mr(4)
4233 .nr(12)
4234 .kr(1)
4235 .sr(1)
4236 .m(4)
4237 .n(12)
4238 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004239 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004240 }
4241 }
4242
Marat Dukhande06f492020-04-09 00:19:31 -07004243 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004244 TEST_REQUIRES_ARM_NEON_FMA;
4245 for (size_t k = 12; k <= 40; k += 4) {
4246 GemmMicrokernelTester()
4247 .mr(4)
4248 .nr(12)
4249 .kr(1)
4250 .sr(1)
4251 .m(4)
4252 .n(12)
4253 .k(k)
4254 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004255 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004256 }
4257 }
4258
Marat Dukhande06f492020-04-09 00:19:31 -07004259 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004260 TEST_REQUIRES_ARM_NEON_FMA;
4261 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004262 for (uint32_t n = 1; n <= 12; n++) {
4263 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004264 GemmMicrokernelTester()
4265 .mr(4)
4266 .nr(12)
4267 .kr(1)
4268 .sr(1)
4269 .m(m)
4270 .n(n)
4271 .k(k)
4272 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004273 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004274 }
4275 }
4276 }
4277 }
4278
Marat Dukhande06f492020-04-09 00:19:31 -07004279 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004280 TEST_REQUIRES_ARM_NEON_FMA;
4281 for (uint32_t n = 13; n < 24; n++) {
4282 for (size_t k = 1; k <= 20; k += 5) {
4283 GemmMicrokernelTester()
4284 .mr(4)
4285 .nr(12)
4286 .kr(1)
4287 .sr(1)
4288 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004289 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004290 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004291 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004292 }
4293 }
4294 }
4295
Marat Dukhande06f492020-04-09 00:19:31 -07004296 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004297 TEST_REQUIRES_ARM_NEON_FMA;
4298 for (uint32_t n = 13; n < 24; n++) {
4299 for (size_t k = 1; k <= 20; k += 5) {
4300 GemmMicrokernelTester()
4301 .mr(4)
4302 .nr(12)
4303 .kr(1)
4304 .sr(1)
4305 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004306 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004307 .k(k)
4308 .cn_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004309 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004310 }
4311 }
4312 }
4313
Marat Dukhande06f492020-04-09 00:19:31 -07004314 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004315 TEST_REQUIRES_ARM_NEON_FMA;
4316 for (uint32_t n = 13; n < 24; n++) {
4317 for (size_t k = 1; k <= 20; k += 5) {
4318 GemmMicrokernelTester()
4319 .mr(4)
4320 .nr(12)
4321 .kr(1)
4322 .sr(1)
4323 .m(4)
4324 .n(n)
4325 .k(k)
4326 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004327 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004328 }
4329 }
4330 }
4331
Marat Dukhande06f492020-04-09 00:19:31 -07004332 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004333 TEST_REQUIRES_ARM_NEON_FMA;
4334 for (uint32_t n = 13; n < 24; n++) {
4335 for (size_t k = 1; k <= 20; k += 5) {
4336 for (uint32_t m = 1; m <= 4; m++) {
4337 GemmMicrokernelTester()
4338 .mr(4)
4339 .nr(12)
4340 .kr(1)
4341 .sr(1)
4342 .m(m)
4343 .n(n)
4344 .k(k)
4345 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004346 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004347 }
4348 }
4349 }
4350 }
4351
Marat Dukhande06f492020-04-09 00:19:31 -07004352 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004353 TEST_REQUIRES_ARM_NEON_FMA;
4354 for (uint32_t n = 24; n <= 36; n += 12) {
4355 for (size_t k = 1; k <= 20; k += 5) {
4356 GemmMicrokernelTester()
4357 .mr(4)
4358 .nr(12)
4359 .kr(1)
4360 .sr(1)
4361 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004362 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004363 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004364 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004365 }
4366 }
4367 }
4368
Marat Dukhande06f492020-04-09 00:19:31 -07004369 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004370 TEST_REQUIRES_ARM_NEON_FMA;
4371 for (uint32_t n = 24; n <= 36; n += 12) {
4372 for (size_t k = 1; k <= 20; k += 5) {
4373 GemmMicrokernelTester()
4374 .mr(4)
4375 .nr(12)
4376 .kr(1)
4377 .sr(1)
4378 .m(4)
4379 .n(n)
4380 .k(k)
4381 .cn_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004382 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004383 }
4384 }
4385 }
4386
Marat Dukhande06f492020-04-09 00:19:31 -07004387 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004388 TEST_REQUIRES_ARM_NEON_FMA;
4389 for (uint32_t n = 24; n <= 36; n += 12) {
4390 for (size_t k = 1; k <= 20; k += 5) {
4391 GemmMicrokernelTester()
4392 .mr(4)
4393 .nr(12)
4394 .kr(1)
4395 .sr(1)
4396 .m(4)
4397 .n(n)
4398 .k(k)
4399 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004400 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004401 }
4402 }
4403 }
4404
Marat Dukhande06f492020-04-09 00:19:31 -07004405 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004406 TEST_REQUIRES_ARM_NEON_FMA;
4407 for (uint32_t n = 24; n <= 36; n += 12) {
4408 for (size_t k = 1; k <= 20; k += 5) {
4409 for (uint32_t m = 1; m <= 4; m++) {
4410 GemmMicrokernelTester()
4411 .mr(4)
4412 .nr(12)
4413 .kr(1)
4414 .sr(1)
4415 .m(m)
4416 .n(n)
4417 .k(k)
4418 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004419 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004420 }
4421 }
4422 }
4423 }
4424
Marat Dukhande06f492020-04-09 00:19:31 -07004425 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004426 TEST_REQUIRES_ARM_NEON_FMA;
4427 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004428 for (uint32_t n = 1; n <= 12; n++) {
4429 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004430 GemmMicrokernelTester()
4431 .mr(4)
4432 .nr(12)
4433 .kr(1)
4434 .sr(1)
4435 .m(m)
4436 .n(n)
4437 .k(k)
4438 .cm_stride(17)
4439 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004440 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004441 }
4442 }
4443 }
4444 }
4445
Marat Dukhande06f492020-04-09 00:19:31 -07004446 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004447 TEST_REQUIRES_ARM_NEON_FMA;
4448 GemmMicrokernelTester()
4449 .mr(4)
4450 .nr(12)
4451 .kr(1)
4452 .sr(1)
4453 .m(4)
4454 .n(12)
4455 .k(4)
4456 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004457 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004458 }
4459
Marat Dukhande06f492020-04-09 00:19:31 -07004460 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004461 TEST_REQUIRES_ARM_NEON_FMA;
4462 GemmMicrokernelTester()
4463 .mr(4)
4464 .nr(12)
4465 .kr(1)
4466 .sr(1)
4467 .m(4)
4468 .n(12)
4469 .k(4)
4470 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004471 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004472 }
4473
Marat Dukhande06f492020-04-09 00:19:31 -07004474 TEST(F32_GEMMINC_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004475 TEST_REQUIRES_ARM_NEON_FMA;
4476 GemmMicrokernelTester()
4477 .mr(4)
4478 .nr(12)
4479 .kr(1)
4480 .sr(1)
4481 .m(4)
4482 .n(12)
4483 .k(4)
4484 .cm_stride(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004485 .Test(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004486 }
4487#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4488
4489
4490#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07004491 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004492 TEST_REQUIRES_ARM_NEON_FMA;
4493 GemmMicrokernelTester()
4494 .mr(4)
4495 .nr(8)
4496 .kr(1)
4497 .sr(1)
4498 .m(4)
4499 .n(8)
4500 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004501 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004502 }
4503
Marat Dukhande06f492020-04-09 00:19:31 -07004504 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004505 TEST_REQUIRES_ARM_NEON_FMA;
4506 GemmMicrokernelTester()
4507 .mr(4)
4508 .nr(8)
4509 .kr(1)
4510 .sr(1)
4511 .m(4)
4512 .n(8)
4513 .k(2)
4514 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004515 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004516 }
4517
Marat Dukhande06f492020-04-09 00:19:31 -07004518 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004519 TEST_REQUIRES_ARM_NEON_FMA;
4520 GemmMicrokernelTester()
4521 .mr(4)
4522 .nr(8)
4523 .kr(1)
4524 .sr(1)
4525 .m(4)
4526 .n(8)
4527 .k(2)
4528 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004529 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004530 }
4531
Marat Dukhande06f492020-04-09 00:19:31 -07004532 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004533 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004534 for (uint32_t n = 1; n <= 8; n++) {
4535 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004536 GemmMicrokernelTester()
4537 .mr(4)
4538 .nr(8)
4539 .kr(1)
4540 .sr(1)
4541 .m(m)
4542 .n(n)
4543 .k(2)
4544 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004545 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004546 }
4547 }
4548 }
4549
Marat Dukhande06f492020-04-09 00:19:31 -07004550 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004551 TEST_REQUIRES_ARM_NEON_FMA;
4552 for (uint32_t m = 1; m <= 4; m++) {
4553 GemmMicrokernelTester()
4554 .mr(4)
4555 .nr(8)
4556 .kr(1)
4557 .sr(1)
4558 .m(m)
4559 .n(8)
4560 .k(2)
4561 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004562 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004563 }
4564 }
4565
Marat Dukhande06f492020-04-09 00:19:31 -07004566 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004567 TEST_REQUIRES_ARM_NEON_FMA;
4568 for (uint32_t n = 1; n <= 8; n++) {
4569 GemmMicrokernelTester()
4570 .mr(4)
4571 .nr(8)
4572 .kr(1)
4573 .sr(1)
4574 .m(4)
4575 .n(n)
4576 .k(2)
4577 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004578 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004579 }
4580 }
4581
Marat Dukhande06f492020-04-09 00:19:31 -07004582 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004583 TEST_REQUIRES_ARM_NEON_FMA;
4584 for (size_t k = 1; k < 2; k++) {
4585 GemmMicrokernelTester()
4586 .mr(4)
4587 .nr(8)
4588 .kr(1)
4589 .sr(1)
4590 .m(4)
4591 .n(8)
4592 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004593 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004594 }
4595 }
4596
Marat Dukhande06f492020-04-09 00:19:31 -07004597 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004598 TEST_REQUIRES_ARM_NEON_FMA;
4599 for (size_t k = 1; k < 2; k++) {
4600 GemmMicrokernelTester()
4601 .mr(4)
4602 .nr(8)
4603 .kr(1)
4604 .sr(1)
4605 .m(4)
4606 .n(8)
4607 .k(k)
4608 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004609 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004610 }
4611 }
4612
Marat Dukhande06f492020-04-09 00:19:31 -07004613 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004614 TEST_REQUIRES_ARM_NEON_FMA;
4615 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004616 for (uint32_t n = 1; n <= 8; n++) {
4617 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004618 GemmMicrokernelTester()
4619 .mr(4)
4620 .nr(8)
4621 .kr(1)
4622 .sr(1)
4623 .m(m)
4624 .n(n)
4625 .k(k)
4626 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004627 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004628 }
4629 }
4630 }
4631 }
4632
Marat Dukhande06f492020-04-09 00:19:31 -07004633 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004634 TEST_REQUIRES_ARM_NEON_FMA;
4635 for (size_t k = 3; k < 4; k++) {
4636 GemmMicrokernelTester()
4637 .mr(4)
4638 .nr(8)
4639 .kr(1)
4640 .sr(1)
4641 .m(4)
4642 .n(8)
4643 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004644 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004645 }
4646 }
4647
Marat Dukhande06f492020-04-09 00:19:31 -07004648 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004649 TEST_REQUIRES_ARM_NEON_FMA;
4650 for (size_t k = 3; k < 4; k++) {
4651 GemmMicrokernelTester()
4652 .mr(4)
4653 .nr(8)
4654 .kr(1)
4655 .sr(1)
4656 .m(4)
4657 .n(8)
4658 .k(k)
4659 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004660 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004661 }
4662 }
4663
Marat Dukhande06f492020-04-09 00:19:31 -07004664 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004665 TEST_REQUIRES_ARM_NEON_FMA;
4666 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004667 for (uint32_t n = 1; n <= 8; n++) {
4668 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004669 GemmMicrokernelTester()
4670 .mr(4)
4671 .nr(8)
4672 .kr(1)
4673 .sr(1)
4674 .m(m)
4675 .n(n)
4676 .k(k)
4677 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004678 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004679 }
4680 }
4681 }
4682 }
4683
Marat Dukhande06f492020-04-09 00:19:31 -07004684 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004685 TEST_REQUIRES_ARM_NEON_FMA;
4686 for (size_t k = 4; k <= 20; k += 2) {
4687 GemmMicrokernelTester()
4688 .mr(4)
4689 .nr(8)
4690 .kr(1)
4691 .sr(1)
4692 .m(4)
4693 .n(8)
4694 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004695 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004696 }
4697 }
4698
Marat Dukhande06f492020-04-09 00:19:31 -07004699 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004700 TEST_REQUIRES_ARM_NEON_FMA;
4701 for (size_t k = 4; k <= 20; k += 2) {
4702 GemmMicrokernelTester()
4703 .mr(4)
4704 .nr(8)
4705 .kr(1)
4706 .sr(1)
4707 .m(4)
4708 .n(8)
4709 .k(k)
4710 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004711 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004712 }
4713 }
4714
Marat Dukhande06f492020-04-09 00:19:31 -07004715 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004716 TEST_REQUIRES_ARM_NEON_FMA;
4717 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004718 for (uint32_t n = 1; n <= 8; n++) {
4719 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004720 GemmMicrokernelTester()
4721 .mr(4)
4722 .nr(8)
4723 .kr(1)
4724 .sr(1)
4725 .m(m)
4726 .n(n)
4727 .k(k)
4728 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004729 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004730 }
4731 }
4732 }
4733 }
4734
Marat Dukhande06f492020-04-09 00:19:31 -07004735 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004736 TEST_REQUIRES_ARM_NEON_FMA;
4737 for (uint32_t n = 9; n < 16; n++) {
4738 for (size_t k = 1; k <= 10; k += 3) {
4739 GemmMicrokernelTester()
4740 .mr(4)
4741 .nr(8)
4742 .kr(1)
4743 .sr(1)
4744 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004745 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004746 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004747 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004748 }
4749 }
4750 }
4751
Marat Dukhande06f492020-04-09 00:19:31 -07004752 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004753 TEST_REQUIRES_ARM_NEON_FMA;
4754 for (uint32_t n = 9; n < 16; n++) {
4755 for (size_t k = 1; k <= 10; k += 3) {
4756 GemmMicrokernelTester()
4757 .mr(4)
4758 .nr(8)
4759 .kr(1)
4760 .sr(1)
4761 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004762 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004763 .k(k)
4764 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004765 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004766 }
4767 }
4768 }
4769
Marat Dukhande06f492020-04-09 00:19:31 -07004770 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004771 TEST_REQUIRES_ARM_NEON_FMA;
4772 for (uint32_t n = 9; n < 16; n++) {
4773 for (size_t k = 1; k <= 10; k += 3) {
4774 GemmMicrokernelTester()
4775 .mr(4)
4776 .nr(8)
4777 .kr(1)
4778 .sr(1)
4779 .m(4)
4780 .n(n)
4781 .k(k)
4782 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004783 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004784 }
4785 }
4786 }
4787
Marat Dukhande06f492020-04-09 00:19:31 -07004788 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004789 TEST_REQUIRES_ARM_NEON_FMA;
4790 for (uint32_t n = 9; n < 16; n++) {
4791 for (size_t k = 1; k <= 10; k += 3) {
4792 for (uint32_t m = 1; m <= 4; m++) {
4793 GemmMicrokernelTester()
4794 .mr(4)
4795 .nr(8)
4796 .kr(1)
4797 .sr(1)
4798 .m(m)
4799 .n(n)
4800 .k(k)
4801 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004802 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004803 }
4804 }
4805 }
4806 }
4807
Marat Dukhande06f492020-04-09 00:19:31 -07004808 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004809 TEST_REQUIRES_ARM_NEON_FMA;
4810 for (uint32_t n = 16; n <= 24; n += 8) {
4811 for (size_t k = 1; k <= 10; k += 3) {
4812 GemmMicrokernelTester()
4813 .mr(4)
4814 .nr(8)
4815 .kr(1)
4816 .sr(1)
4817 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004818 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004819 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004820 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004821 }
4822 }
4823 }
4824
Marat Dukhande06f492020-04-09 00:19:31 -07004825 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004826 TEST_REQUIRES_ARM_NEON_FMA;
4827 for (uint32_t n = 16; n <= 24; n += 8) {
4828 for (size_t k = 1; k <= 10; k += 3) {
4829 GemmMicrokernelTester()
4830 .mr(4)
4831 .nr(8)
4832 .kr(1)
4833 .sr(1)
4834 .m(4)
4835 .n(n)
4836 .k(k)
4837 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004838 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004839 }
4840 }
4841 }
4842
Marat Dukhande06f492020-04-09 00:19:31 -07004843 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004844 TEST_REQUIRES_ARM_NEON_FMA;
4845 for (uint32_t n = 16; n <= 24; n += 8) {
4846 for (size_t k = 1; k <= 10; k += 3) {
4847 GemmMicrokernelTester()
4848 .mr(4)
4849 .nr(8)
4850 .kr(1)
4851 .sr(1)
4852 .m(4)
4853 .n(n)
4854 .k(k)
4855 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004856 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004857 }
4858 }
4859 }
4860
Marat Dukhande06f492020-04-09 00:19:31 -07004861 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004862 TEST_REQUIRES_ARM_NEON_FMA;
4863 for (uint32_t n = 16; n <= 24; n += 8) {
4864 for (size_t k = 1; k <= 10; k += 3) {
4865 for (uint32_t m = 1; m <= 4; m++) {
4866 GemmMicrokernelTester()
4867 .mr(4)
4868 .nr(8)
4869 .kr(1)
4870 .sr(1)
4871 .m(m)
4872 .n(n)
4873 .k(k)
4874 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004875 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004876 }
4877 }
4878 }
4879 }
4880
Marat Dukhande06f492020-04-09 00:19:31 -07004881 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004882 TEST_REQUIRES_ARM_NEON_FMA;
4883 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004884 for (uint32_t n = 1; n <= 8; n++) {
4885 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004886 GemmMicrokernelTester()
4887 .mr(4)
4888 .nr(8)
4889 .kr(1)
4890 .sr(1)
4891 .m(m)
4892 .n(n)
4893 .k(k)
4894 .cm_stride(11)
4895 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004896 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004897 }
4898 }
4899 }
4900 }
4901
Marat Dukhande06f492020-04-09 00:19:31 -07004902 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004903 TEST_REQUIRES_ARM_NEON_FMA;
4904 GemmMicrokernelTester()
4905 .mr(4)
4906 .nr(8)
4907 .kr(1)
4908 .sr(1)
4909 .m(4)
4910 .n(8)
4911 .k(2)
4912 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004913 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004914 }
4915
Marat Dukhande06f492020-04-09 00:19:31 -07004916 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004917 TEST_REQUIRES_ARM_NEON_FMA;
4918 GemmMicrokernelTester()
4919 .mr(4)
4920 .nr(8)
4921 .kr(1)
4922 .sr(1)
4923 .m(4)
4924 .n(8)
4925 .k(2)
4926 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004927 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004928 }
4929
Marat Dukhande06f492020-04-09 00:19:31 -07004930 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004931 TEST_REQUIRES_ARM_NEON_FMA;
4932 GemmMicrokernelTester()
4933 .mr(4)
4934 .nr(8)
4935 .kr(1)
4936 .sr(1)
4937 .m(4)
4938 .n(8)
4939 .k(2)
4940 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004941 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004942 }
4943#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4944
4945
4946#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07004947 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004948 TEST_REQUIRES_ARM_NEON_FMA;
4949 GemmMicrokernelTester()
4950 .mr(4)
4951 .nr(8)
4952 .kr(1)
4953 .sr(1)
4954 .m(4)
4955 .n(8)
4956 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004957 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004958 }
4959
Marat Dukhande06f492020-04-09 00:19:31 -07004960 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004961 TEST_REQUIRES_ARM_NEON_FMA;
4962 GemmMicrokernelTester()
4963 .mr(4)
4964 .nr(8)
4965 .kr(1)
4966 .sr(1)
4967 .m(4)
4968 .n(8)
4969 .k(4)
4970 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004971 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004972 }
4973
Marat Dukhande06f492020-04-09 00:19:31 -07004974 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004975 TEST_REQUIRES_ARM_NEON_FMA;
4976 GemmMicrokernelTester()
4977 .mr(4)
4978 .nr(8)
4979 .kr(1)
4980 .sr(1)
4981 .m(4)
4982 .n(8)
4983 .k(4)
4984 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004985 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004986 }
4987
Marat Dukhande06f492020-04-09 00:19:31 -07004988 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004989 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004990 for (uint32_t n = 1; n <= 8; n++) {
4991 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004992 GemmMicrokernelTester()
4993 .mr(4)
4994 .nr(8)
4995 .kr(1)
4996 .sr(1)
4997 .m(m)
4998 .n(n)
4999 .k(4)
5000 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005001 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005002 }
5003 }
5004 }
5005
Marat Dukhande06f492020-04-09 00:19:31 -07005006 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005007 TEST_REQUIRES_ARM_NEON_FMA;
5008 for (uint32_t m = 1; m <= 4; m++) {
5009 GemmMicrokernelTester()
5010 .mr(4)
5011 .nr(8)
5012 .kr(1)
5013 .sr(1)
5014 .m(m)
5015 .n(8)
5016 .k(4)
5017 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005018 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005019 }
5020 }
5021
Marat Dukhande06f492020-04-09 00:19:31 -07005022 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005023 TEST_REQUIRES_ARM_NEON_FMA;
5024 for (uint32_t n = 1; n <= 8; n++) {
5025 GemmMicrokernelTester()
5026 .mr(4)
5027 .nr(8)
5028 .kr(1)
5029 .sr(1)
5030 .m(4)
5031 .n(n)
5032 .k(4)
5033 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005034 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005035 }
5036 }
5037
Marat Dukhande06f492020-04-09 00:19:31 -07005038 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005039 TEST_REQUIRES_ARM_NEON_FMA;
5040 for (size_t k = 1; k < 4; k++) {
5041 GemmMicrokernelTester()
5042 .mr(4)
5043 .nr(8)
5044 .kr(1)
5045 .sr(1)
5046 .m(4)
5047 .n(8)
5048 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005049 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005050 }
5051 }
5052
Marat Dukhande06f492020-04-09 00:19:31 -07005053 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005054 TEST_REQUIRES_ARM_NEON_FMA;
5055 for (size_t k = 1; k < 4; k++) {
5056 GemmMicrokernelTester()
5057 .mr(4)
5058 .nr(8)
5059 .kr(1)
5060 .sr(1)
5061 .m(4)
5062 .n(8)
5063 .k(k)
5064 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005065 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005066 }
5067 }
5068
Marat Dukhande06f492020-04-09 00:19:31 -07005069 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005070 TEST_REQUIRES_ARM_NEON_FMA;
5071 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005072 for (uint32_t n = 1; n <= 8; n++) {
5073 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005074 GemmMicrokernelTester()
5075 .mr(4)
5076 .nr(8)
5077 .kr(1)
5078 .sr(1)
5079 .m(m)
5080 .n(n)
5081 .k(k)
5082 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005083 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005084 }
5085 }
5086 }
5087 }
5088
Marat Dukhande06f492020-04-09 00:19:31 -07005089 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005090 TEST_REQUIRES_ARM_NEON_FMA;
5091 for (size_t k = 5; k < 8; k++) {
5092 GemmMicrokernelTester()
5093 .mr(4)
5094 .nr(8)
5095 .kr(1)
5096 .sr(1)
5097 .m(4)
5098 .n(8)
5099 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005100 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005101 }
5102 }
5103
Marat Dukhande06f492020-04-09 00:19:31 -07005104 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005105 TEST_REQUIRES_ARM_NEON_FMA;
5106 for (size_t k = 5; k < 8; k++) {
5107 GemmMicrokernelTester()
5108 .mr(4)
5109 .nr(8)
5110 .kr(1)
5111 .sr(1)
5112 .m(4)
5113 .n(8)
5114 .k(k)
5115 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005116 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005117 }
5118 }
5119
Marat Dukhande06f492020-04-09 00:19:31 -07005120 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005121 TEST_REQUIRES_ARM_NEON_FMA;
5122 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005123 for (uint32_t n = 1; n <= 8; n++) {
5124 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005125 GemmMicrokernelTester()
5126 .mr(4)
5127 .nr(8)
5128 .kr(1)
5129 .sr(1)
5130 .m(m)
5131 .n(n)
5132 .k(k)
5133 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005134 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005135 }
5136 }
5137 }
5138 }
5139
Marat Dukhande06f492020-04-09 00:19:31 -07005140 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005141 TEST_REQUIRES_ARM_NEON_FMA;
5142 for (size_t k = 8; k <= 40; k += 4) {
5143 GemmMicrokernelTester()
5144 .mr(4)
5145 .nr(8)
5146 .kr(1)
5147 .sr(1)
5148 .m(4)
5149 .n(8)
5150 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005151 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005152 }
5153 }
5154
Marat Dukhande06f492020-04-09 00:19:31 -07005155 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005156 TEST_REQUIRES_ARM_NEON_FMA;
5157 for (size_t k = 8; k <= 40; k += 4) {
5158 GemmMicrokernelTester()
5159 .mr(4)
5160 .nr(8)
5161 .kr(1)
5162 .sr(1)
5163 .m(4)
5164 .n(8)
5165 .k(k)
5166 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005167 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005168 }
5169 }
5170
Marat Dukhande06f492020-04-09 00:19:31 -07005171 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005172 TEST_REQUIRES_ARM_NEON_FMA;
5173 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005174 for (uint32_t n = 1; n <= 8; n++) {
5175 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005176 GemmMicrokernelTester()
5177 .mr(4)
5178 .nr(8)
5179 .kr(1)
5180 .sr(1)
5181 .m(m)
5182 .n(n)
5183 .k(k)
5184 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005185 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005186 }
5187 }
5188 }
5189 }
5190
Marat Dukhande06f492020-04-09 00:19:31 -07005191 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005192 TEST_REQUIRES_ARM_NEON_FMA;
5193 for (uint32_t n = 9; n < 16; n++) {
5194 for (size_t k = 1; k <= 20; k += 5) {
5195 GemmMicrokernelTester()
5196 .mr(4)
5197 .nr(8)
5198 .kr(1)
5199 .sr(1)
5200 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005201 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005202 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005203 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005204 }
5205 }
5206 }
5207
Marat Dukhande06f492020-04-09 00:19:31 -07005208 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005209 TEST_REQUIRES_ARM_NEON_FMA;
5210 for (uint32_t n = 9; n < 16; n++) {
5211 for (size_t k = 1; k <= 20; k += 5) {
5212 GemmMicrokernelTester()
5213 .mr(4)
5214 .nr(8)
5215 .kr(1)
5216 .sr(1)
5217 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005218 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005219 .k(k)
5220 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005221 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005222 }
5223 }
5224 }
5225
Marat Dukhande06f492020-04-09 00:19:31 -07005226 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005227 TEST_REQUIRES_ARM_NEON_FMA;
5228 for (uint32_t n = 9; n < 16; n++) {
5229 for (size_t k = 1; k <= 20; k += 5) {
5230 GemmMicrokernelTester()
5231 .mr(4)
5232 .nr(8)
5233 .kr(1)
5234 .sr(1)
5235 .m(4)
5236 .n(n)
5237 .k(k)
5238 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005239 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005240 }
5241 }
5242 }
5243
Marat Dukhande06f492020-04-09 00:19:31 -07005244 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005245 TEST_REQUIRES_ARM_NEON_FMA;
5246 for (uint32_t n = 9; n < 16; n++) {
5247 for (size_t k = 1; k <= 20; k += 5) {
5248 for (uint32_t m = 1; m <= 4; m++) {
5249 GemmMicrokernelTester()
5250 .mr(4)
5251 .nr(8)
5252 .kr(1)
5253 .sr(1)
5254 .m(m)
5255 .n(n)
5256 .k(k)
5257 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005258 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005259 }
5260 }
5261 }
5262 }
5263
Marat Dukhande06f492020-04-09 00:19:31 -07005264 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005265 TEST_REQUIRES_ARM_NEON_FMA;
5266 for (uint32_t n = 16; n <= 24; n += 8) {
5267 for (size_t k = 1; k <= 20; k += 5) {
5268 GemmMicrokernelTester()
5269 .mr(4)
5270 .nr(8)
5271 .kr(1)
5272 .sr(1)
5273 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005274 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005275 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005276 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005277 }
5278 }
5279 }
5280
Marat Dukhande06f492020-04-09 00:19:31 -07005281 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005282 TEST_REQUIRES_ARM_NEON_FMA;
5283 for (uint32_t n = 16; n <= 24; n += 8) {
5284 for (size_t k = 1; k <= 20; k += 5) {
5285 GemmMicrokernelTester()
5286 .mr(4)
5287 .nr(8)
5288 .kr(1)
5289 .sr(1)
5290 .m(4)
5291 .n(n)
5292 .k(k)
5293 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005294 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005295 }
5296 }
5297 }
5298
Marat Dukhande06f492020-04-09 00:19:31 -07005299 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005300 TEST_REQUIRES_ARM_NEON_FMA;
5301 for (uint32_t n = 16; n <= 24; n += 8) {
5302 for (size_t k = 1; k <= 20; k += 5) {
5303 GemmMicrokernelTester()
5304 .mr(4)
5305 .nr(8)
5306 .kr(1)
5307 .sr(1)
5308 .m(4)
5309 .n(n)
5310 .k(k)
5311 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005312 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005313 }
5314 }
5315 }
5316
Marat Dukhande06f492020-04-09 00:19:31 -07005317 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005318 TEST_REQUIRES_ARM_NEON_FMA;
5319 for (uint32_t n = 16; n <= 24; n += 8) {
5320 for (size_t k = 1; k <= 20; k += 5) {
5321 for (uint32_t m = 1; m <= 4; m++) {
5322 GemmMicrokernelTester()
5323 .mr(4)
5324 .nr(8)
5325 .kr(1)
5326 .sr(1)
5327 .m(m)
5328 .n(n)
5329 .k(k)
5330 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005331 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005332 }
5333 }
5334 }
5335 }
5336
Marat Dukhande06f492020-04-09 00:19:31 -07005337 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005338 TEST_REQUIRES_ARM_NEON_FMA;
5339 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005340 for (uint32_t n = 1; n <= 8; n++) {
5341 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005342 GemmMicrokernelTester()
5343 .mr(4)
5344 .nr(8)
5345 .kr(1)
5346 .sr(1)
5347 .m(m)
5348 .n(n)
5349 .k(k)
5350 .cm_stride(11)
5351 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005352 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005353 }
5354 }
5355 }
5356 }
5357
Marat Dukhande06f492020-04-09 00:19:31 -07005358 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005359 TEST_REQUIRES_ARM_NEON_FMA;
5360 GemmMicrokernelTester()
5361 .mr(4)
5362 .nr(8)
5363 .kr(1)
5364 .sr(1)
5365 .m(4)
5366 .n(8)
5367 .k(4)
5368 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005369 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005370 }
5371
Marat Dukhande06f492020-04-09 00:19:31 -07005372 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005373 TEST_REQUIRES_ARM_NEON_FMA;
5374 GemmMicrokernelTester()
5375 .mr(4)
5376 .nr(8)
5377 .kr(1)
5378 .sr(1)
5379 .m(4)
5380 .n(8)
5381 .k(4)
5382 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005383 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005384 }
5385
Marat Dukhande06f492020-04-09 00:19:31 -07005386 TEST(F32_GEMMINC_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005387 TEST_REQUIRES_ARM_NEON_FMA;
5388 GemmMicrokernelTester()
5389 .mr(4)
5390 .nr(8)
5391 .kr(1)
5392 .sr(1)
5393 .m(4)
5394 .n(8)
5395 .k(4)
5396 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005397 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005398 }
5399#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5400
5401
Marat Dukhan1c587112020-04-08 20:04:28 -07005402#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07005403 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005404 TEST_REQUIRES_ARM_NEON;
5405 GemmMicrokernelTester()
5406 .mr(6)
5407 .nr(8)
5408 .kr(1)
5409 .sr(1)
5410 .m(6)
5411 .n(8)
5412 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005413 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005414 }
5415
Marat Dukhande06f492020-04-09 00:19:31 -07005416 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005417 TEST_REQUIRES_ARM_NEON;
5418 GemmMicrokernelTester()
5419 .mr(6)
5420 .nr(8)
5421 .kr(1)
5422 .sr(1)
5423 .m(6)
5424 .n(8)
5425 .k(2)
5426 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005427 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005428 }
5429
Marat Dukhande06f492020-04-09 00:19:31 -07005430 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005431 TEST_REQUIRES_ARM_NEON;
5432 GemmMicrokernelTester()
5433 .mr(6)
5434 .nr(8)
5435 .kr(1)
5436 .sr(1)
5437 .m(6)
5438 .n(8)
5439 .k(2)
5440 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005441 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005442 }
5443
Marat Dukhande06f492020-04-09 00:19:31 -07005444 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005445 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005446 for (uint32_t n = 1; n <= 8; n++) {
5447 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005448 GemmMicrokernelTester()
5449 .mr(6)
5450 .nr(8)
5451 .kr(1)
5452 .sr(1)
5453 .m(m)
5454 .n(n)
5455 .k(2)
5456 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005457 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005458 }
5459 }
5460 }
5461
Marat Dukhande06f492020-04-09 00:19:31 -07005462 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005463 TEST_REQUIRES_ARM_NEON;
5464 for (uint32_t m = 1; m <= 6; m++) {
5465 GemmMicrokernelTester()
5466 .mr(6)
5467 .nr(8)
5468 .kr(1)
5469 .sr(1)
5470 .m(m)
5471 .n(8)
5472 .k(2)
5473 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005474 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005475 }
5476 }
5477
Marat Dukhande06f492020-04-09 00:19:31 -07005478 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005479 TEST_REQUIRES_ARM_NEON;
5480 for (uint32_t n = 1; n <= 8; n++) {
5481 GemmMicrokernelTester()
5482 .mr(6)
5483 .nr(8)
5484 .kr(1)
5485 .sr(1)
5486 .m(6)
5487 .n(n)
5488 .k(2)
5489 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005490 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005491 }
5492 }
5493
Marat Dukhande06f492020-04-09 00:19:31 -07005494 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005495 TEST_REQUIRES_ARM_NEON;
5496 for (size_t k = 1; k < 2; k++) {
5497 GemmMicrokernelTester()
5498 .mr(6)
5499 .nr(8)
5500 .kr(1)
5501 .sr(1)
5502 .m(6)
5503 .n(8)
5504 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005505 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005506 }
5507 }
5508
Marat Dukhande06f492020-04-09 00:19:31 -07005509 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005510 TEST_REQUIRES_ARM_NEON;
5511 for (size_t k = 1; k < 2; k++) {
5512 GemmMicrokernelTester()
5513 .mr(6)
5514 .nr(8)
5515 .kr(1)
5516 .sr(1)
5517 .m(6)
5518 .n(8)
5519 .k(k)
5520 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005521 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005522 }
5523 }
5524
Marat Dukhande06f492020-04-09 00:19:31 -07005525 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005526 TEST_REQUIRES_ARM_NEON;
5527 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005528 for (uint32_t n = 1; n <= 8; n++) {
5529 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005530 GemmMicrokernelTester()
5531 .mr(6)
5532 .nr(8)
5533 .kr(1)
5534 .sr(1)
5535 .m(m)
5536 .n(n)
5537 .k(k)
5538 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005539 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005540 }
5541 }
5542 }
5543 }
5544
Marat Dukhande06f492020-04-09 00:19:31 -07005545 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005546 TEST_REQUIRES_ARM_NEON;
5547 for (size_t k = 3; k < 4; k++) {
5548 GemmMicrokernelTester()
5549 .mr(6)
5550 .nr(8)
5551 .kr(1)
5552 .sr(1)
5553 .m(6)
5554 .n(8)
5555 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005556 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005557 }
5558 }
5559
Marat Dukhande06f492020-04-09 00:19:31 -07005560 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005561 TEST_REQUIRES_ARM_NEON;
5562 for (size_t k = 3; k < 4; k++) {
5563 GemmMicrokernelTester()
5564 .mr(6)
5565 .nr(8)
5566 .kr(1)
5567 .sr(1)
5568 .m(6)
5569 .n(8)
5570 .k(k)
5571 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005572 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005573 }
5574 }
5575
Marat Dukhande06f492020-04-09 00:19:31 -07005576 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005577 TEST_REQUIRES_ARM_NEON;
5578 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005579 for (uint32_t n = 1; n <= 8; n++) {
5580 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005581 GemmMicrokernelTester()
5582 .mr(6)
5583 .nr(8)
5584 .kr(1)
5585 .sr(1)
5586 .m(m)
5587 .n(n)
5588 .k(k)
5589 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005590 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005591 }
5592 }
5593 }
5594 }
5595
Marat Dukhande06f492020-04-09 00:19:31 -07005596 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005597 TEST_REQUIRES_ARM_NEON;
5598 for (size_t k = 4; k <= 20; k += 2) {
5599 GemmMicrokernelTester()
5600 .mr(6)
5601 .nr(8)
5602 .kr(1)
5603 .sr(1)
5604 .m(6)
5605 .n(8)
5606 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005607 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005608 }
5609 }
5610
Marat Dukhande06f492020-04-09 00:19:31 -07005611 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005612 TEST_REQUIRES_ARM_NEON;
5613 for (size_t k = 4; k <= 20; k += 2) {
5614 GemmMicrokernelTester()
5615 .mr(6)
5616 .nr(8)
5617 .kr(1)
5618 .sr(1)
5619 .m(6)
5620 .n(8)
5621 .k(k)
5622 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005623 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005624 }
5625 }
5626
Marat Dukhande06f492020-04-09 00:19:31 -07005627 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005628 TEST_REQUIRES_ARM_NEON;
5629 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005630 for (uint32_t n = 1; n <= 8; n++) {
5631 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005632 GemmMicrokernelTester()
5633 .mr(6)
5634 .nr(8)
5635 .kr(1)
5636 .sr(1)
5637 .m(m)
5638 .n(n)
5639 .k(k)
5640 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005641 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005642 }
5643 }
5644 }
5645 }
5646
Marat Dukhande06f492020-04-09 00:19:31 -07005647 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005648 TEST_REQUIRES_ARM_NEON;
5649 for (uint32_t n = 9; n < 16; n++) {
5650 for (size_t k = 1; k <= 10; k += 3) {
5651 GemmMicrokernelTester()
5652 .mr(6)
5653 .nr(8)
5654 .kr(1)
5655 .sr(1)
5656 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005657 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005658 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005659 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005660 }
5661 }
5662 }
5663
Marat Dukhande06f492020-04-09 00:19:31 -07005664 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005665 TEST_REQUIRES_ARM_NEON;
5666 for (uint32_t n = 9; n < 16; n++) {
5667 for (size_t k = 1; k <= 10; k += 3) {
5668 GemmMicrokernelTester()
5669 .mr(6)
5670 .nr(8)
5671 .kr(1)
5672 .sr(1)
5673 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005674 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005675 .k(k)
5676 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005677 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005678 }
5679 }
5680 }
5681
Marat Dukhande06f492020-04-09 00:19:31 -07005682 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005683 TEST_REQUIRES_ARM_NEON;
5684 for (uint32_t n = 9; n < 16; n++) {
5685 for (size_t k = 1; k <= 10; k += 3) {
5686 GemmMicrokernelTester()
5687 .mr(6)
5688 .nr(8)
5689 .kr(1)
5690 .sr(1)
5691 .m(6)
5692 .n(n)
5693 .k(k)
5694 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005695 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005696 }
5697 }
5698 }
5699
Marat Dukhande06f492020-04-09 00:19:31 -07005700 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005701 TEST_REQUIRES_ARM_NEON;
5702 for (uint32_t n = 9; n < 16; n++) {
5703 for (size_t k = 1; k <= 10; k += 3) {
5704 for (uint32_t m = 1; m <= 6; m++) {
5705 GemmMicrokernelTester()
5706 .mr(6)
5707 .nr(8)
5708 .kr(1)
5709 .sr(1)
5710 .m(m)
5711 .n(n)
5712 .k(k)
5713 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005714 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005715 }
5716 }
5717 }
5718 }
5719
Marat Dukhande06f492020-04-09 00:19:31 -07005720 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005721 TEST_REQUIRES_ARM_NEON;
5722 for (uint32_t n = 16; n <= 24; n += 8) {
5723 for (size_t k = 1; k <= 10; k += 3) {
5724 GemmMicrokernelTester()
5725 .mr(6)
5726 .nr(8)
5727 .kr(1)
5728 .sr(1)
5729 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005730 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005731 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005732 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005733 }
5734 }
5735 }
5736
Marat Dukhande06f492020-04-09 00:19:31 -07005737 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005738 TEST_REQUIRES_ARM_NEON;
5739 for (uint32_t n = 16; n <= 24; n += 8) {
5740 for (size_t k = 1; k <= 10; k += 3) {
5741 GemmMicrokernelTester()
5742 .mr(6)
5743 .nr(8)
5744 .kr(1)
5745 .sr(1)
5746 .m(6)
5747 .n(n)
5748 .k(k)
5749 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005750 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005751 }
5752 }
5753 }
5754
Marat Dukhande06f492020-04-09 00:19:31 -07005755 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005756 TEST_REQUIRES_ARM_NEON;
5757 for (uint32_t n = 16; n <= 24; n += 8) {
5758 for (size_t k = 1; k <= 10; k += 3) {
5759 GemmMicrokernelTester()
5760 .mr(6)
5761 .nr(8)
5762 .kr(1)
5763 .sr(1)
5764 .m(6)
5765 .n(n)
5766 .k(k)
5767 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005768 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005769 }
5770 }
5771 }
5772
Marat Dukhande06f492020-04-09 00:19:31 -07005773 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005774 TEST_REQUIRES_ARM_NEON;
5775 for (uint32_t n = 16; n <= 24; n += 8) {
5776 for (size_t k = 1; k <= 10; k += 3) {
5777 for (uint32_t m = 1; m <= 6; m++) {
5778 GemmMicrokernelTester()
5779 .mr(6)
5780 .nr(8)
5781 .kr(1)
5782 .sr(1)
5783 .m(m)
5784 .n(n)
5785 .k(k)
5786 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005787 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005788 }
5789 }
5790 }
5791 }
5792
Marat Dukhande06f492020-04-09 00:19:31 -07005793 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005794 TEST_REQUIRES_ARM_NEON;
5795 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005796 for (uint32_t n = 1; n <= 8; n++) {
5797 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005798 GemmMicrokernelTester()
5799 .mr(6)
5800 .nr(8)
5801 .kr(1)
5802 .sr(1)
5803 .m(m)
5804 .n(n)
5805 .k(k)
5806 .cm_stride(11)
5807 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005808 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005809 }
5810 }
5811 }
5812 }
5813
Marat Dukhande06f492020-04-09 00:19:31 -07005814 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005815 TEST_REQUIRES_ARM_NEON;
5816 GemmMicrokernelTester()
5817 .mr(6)
5818 .nr(8)
5819 .kr(1)
5820 .sr(1)
5821 .m(6)
5822 .n(8)
5823 .k(2)
5824 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005825 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005826 }
5827
Marat Dukhande06f492020-04-09 00:19:31 -07005828 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005829 TEST_REQUIRES_ARM_NEON;
5830 GemmMicrokernelTester()
5831 .mr(6)
5832 .nr(8)
5833 .kr(1)
5834 .sr(1)
5835 .m(6)
5836 .n(8)
5837 .k(2)
5838 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005839 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005840 }
5841
Marat Dukhande06f492020-04-09 00:19:31 -07005842 TEST(F32_GEMMINC_MINMAX_6X8__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005843 TEST_REQUIRES_ARM_NEON;
5844 GemmMicrokernelTester()
5845 .mr(6)
5846 .nr(8)
5847 .kr(1)
5848 .sr(1)
5849 .m(6)
5850 .n(8)
5851 .k(2)
5852 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005853 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005854 }
5855#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5856
5857
Marat Dukhan1c587112020-04-08 20:04:28 -07005858#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07005859 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005860 TEST_REQUIRES_ARM_NEON_FMA;
5861 GemmMicrokernelTester()
5862 .mr(1)
5863 .nr(8)
5864 .kr(1)
5865 .sr(1)
5866 .m(1)
5867 .n(8)
5868 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005869 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005870 }
5871
Marat Dukhande06f492020-04-09 00:19:31 -07005872 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005873 TEST_REQUIRES_ARM_NEON_FMA;
5874 GemmMicrokernelTester()
5875 .mr(1)
5876 .nr(8)
5877 .kr(1)
5878 .sr(1)
5879 .m(1)
5880 .n(8)
5881 .k(2)
5882 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005883 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005884 }
5885
Marat Dukhande06f492020-04-09 00:19:31 -07005886 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005887 TEST_REQUIRES_ARM_NEON_FMA;
5888 GemmMicrokernelTester()
5889 .mr(1)
5890 .nr(8)
5891 .kr(1)
5892 .sr(1)
5893 .m(1)
5894 .n(8)
5895 .k(2)
5896 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005897 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005898 }
5899
Marat Dukhande06f492020-04-09 00:19:31 -07005900 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005901 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005902 for (uint32_t n = 1; n <= 8; n++) {
5903 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005904 GemmMicrokernelTester()
5905 .mr(1)
5906 .nr(8)
5907 .kr(1)
5908 .sr(1)
5909 .m(m)
5910 .n(n)
5911 .k(2)
5912 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005913 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005914 }
5915 }
5916 }
5917
Marat Dukhande06f492020-04-09 00:19:31 -07005918 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005919 TEST_REQUIRES_ARM_NEON_FMA;
5920 for (uint32_t m = 1; m <= 1; m++) {
5921 GemmMicrokernelTester()
5922 .mr(1)
5923 .nr(8)
5924 .kr(1)
5925 .sr(1)
5926 .m(m)
5927 .n(8)
5928 .k(2)
5929 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005930 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005931 }
5932 }
5933
Marat Dukhande06f492020-04-09 00:19:31 -07005934 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005935 TEST_REQUIRES_ARM_NEON_FMA;
5936 for (uint32_t n = 1; n <= 8; n++) {
5937 GemmMicrokernelTester()
5938 .mr(1)
5939 .nr(8)
5940 .kr(1)
5941 .sr(1)
5942 .m(1)
5943 .n(n)
5944 .k(2)
5945 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005946 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005947 }
5948 }
5949
Marat Dukhande06f492020-04-09 00:19:31 -07005950 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005951 TEST_REQUIRES_ARM_NEON_FMA;
5952 for (size_t k = 1; k < 2; k++) {
5953 GemmMicrokernelTester()
5954 .mr(1)
5955 .nr(8)
5956 .kr(1)
5957 .sr(1)
5958 .m(1)
5959 .n(8)
5960 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005961 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005962 }
5963 }
5964
Marat Dukhande06f492020-04-09 00:19:31 -07005965 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005966 TEST_REQUIRES_ARM_NEON_FMA;
5967 for (size_t k = 1; k < 2; k++) {
5968 GemmMicrokernelTester()
5969 .mr(1)
5970 .nr(8)
5971 .kr(1)
5972 .sr(1)
5973 .m(1)
5974 .n(8)
5975 .k(k)
5976 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005977 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005978 }
5979 }
5980
Marat Dukhande06f492020-04-09 00:19:31 -07005981 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005982 TEST_REQUIRES_ARM_NEON_FMA;
5983 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005984 for (uint32_t n = 1; n <= 8; n++) {
5985 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005986 GemmMicrokernelTester()
5987 .mr(1)
5988 .nr(8)
5989 .kr(1)
5990 .sr(1)
5991 .m(m)
5992 .n(n)
5993 .k(k)
5994 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005995 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005996 }
5997 }
5998 }
5999 }
6000
Marat Dukhande06f492020-04-09 00:19:31 -07006001 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006002 TEST_REQUIRES_ARM_NEON_FMA;
6003 for (size_t k = 3; k < 4; k++) {
6004 GemmMicrokernelTester()
6005 .mr(1)
6006 .nr(8)
6007 .kr(1)
6008 .sr(1)
6009 .m(1)
6010 .n(8)
6011 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006012 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006013 }
6014 }
6015
Marat Dukhande06f492020-04-09 00:19:31 -07006016 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006017 TEST_REQUIRES_ARM_NEON_FMA;
6018 for (size_t k = 3; k < 4; k++) {
6019 GemmMicrokernelTester()
6020 .mr(1)
6021 .nr(8)
6022 .kr(1)
6023 .sr(1)
6024 .m(1)
6025 .n(8)
6026 .k(k)
6027 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006028 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006029 }
6030 }
6031
Marat Dukhande06f492020-04-09 00:19:31 -07006032 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006033 TEST_REQUIRES_ARM_NEON_FMA;
6034 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006035 for (uint32_t n = 1; n <= 8; n++) {
6036 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006037 GemmMicrokernelTester()
6038 .mr(1)
6039 .nr(8)
6040 .kr(1)
6041 .sr(1)
6042 .m(m)
6043 .n(n)
6044 .k(k)
6045 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006046 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006047 }
6048 }
6049 }
6050 }
6051
Marat Dukhande06f492020-04-09 00:19:31 -07006052 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006053 TEST_REQUIRES_ARM_NEON_FMA;
6054 for (size_t k = 4; k <= 20; k += 2) {
6055 GemmMicrokernelTester()
6056 .mr(1)
6057 .nr(8)
6058 .kr(1)
6059 .sr(1)
6060 .m(1)
6061 .n(8)
6062 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006063 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006064 }
6065 }
6066
Marat Dukhande06f492020-04-09 00:19:31 -07006067 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006068 TEST_REQUIRES_ARM_NEON_FMA;
6069 for (size_t k = 4; k <= 20; k += 2) {
6070 GemmMicrokernelTester()
6071 .mr(1)
6072 .nr(8)
6073 .kr(1)
6074 .sr(1)
6075 .m(1)
6076 .n(8)
6077 .k(k)
6078 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006079 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006080 }
6081 }
6082
Marat Dukhande06f492020-04-09 00:19:31 -07006083 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006084 TEST_REQUIRES_ARM_NEON_FMA;
6085 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006086 for (uint32_t n = 1; n <= 8; n++) {
6087 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006088 GemmMicrokernelTester()
6089 .mr(1)
6090 .nr(8)
6091 .kr(1)
6092 .sr(1)
6093 .m(m)
6094 .n(n)
6095 .k(k)
6096 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006097 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006098 }
6099 }
6100 }
6101 }
6102
Marat Dukhande06f492020-04-09 00:19:31 -07006103 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006104 TEST_REQUIRES_ARM_NEON_FMA;
6105 for (uint32_t n = 9; n < 16; n++) {
6106 for (size_t k = 1; k <= 10; k += 3) {
6107 GemmMicrokernelTester()
6108 .mr(1)
6109 .nr(8)
6110 .kr(1)
6111 .sr(1)
6112 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006113 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006114 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006115 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006116 }
6117 }
6118 }
6119
Marat Dukhande06f492020-04-09 00:19:31 -07006120 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006121 TEST_REQUIRES_ARM_NEON_FMA;
6122 for (uint32_t n = 9; n < 16; n++) {
6123 for (size_t k = 1; k <= 10; k += 3) {
6124 GemmMicrokernelTester()
6125 .mr(1)
6126 .nr(8)
6127 .kr(1)
6128 .sr(1)
6129 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006130 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006131 .k(k)
6132 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006133 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006134 }
6135 }
6136 }
6137
Marat Dukhande06f492020-04-09 00:19:31 -07006138 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006139 TEST_REQUIRES_ARM_NEON_FMA;
6140 for (uint32_t n = 9; n < 16; n++) {
6141 for (size_t k = 1; k <= 10; k += 3) {
6142 GemmMicrokernelTester()
6143 .mr(1)
6144 .nr(8)
6145 .kr(1)
6146 .sr(1)
6147 .m(1)
6148 .n(n)
6149 .k(k)
6150 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006151 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006152 }
6153 }
6154 }
6155
Marat Dukhande06f492020-04-09 00:19:31 -07006156 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006157 TEST_REQUIRES_ARM_NEON_FMA;
6158 for (uint32_t n = 9; n < 16; n++) {
6159 for (size_t k = 1; k <= 10; k += 3) {
6160 for (uint32_t m = 1; m <= 1; m++) {
6161 GemmMicrokernelTester()
6162 .mr(1)
6163 .nr(8)
6164 .kr(1)
6165 .sr(1)
6166 .m(m)
6167 .n(n)
6168 .k(k)
6169 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006170 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006171 }
6172 }
6173 }
6174 }
6175
Marat Dukhande06f492020-04-09 00:19:31 -07006176 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006177 TEST_REQUIRES_ARM_NEON_FMA;
6178 for (uint32_t n = 16; n <= 24; n += 8) {
6179 for (size_t k = 1; k <= 10; k += 3) {
6180 GemmMicrokernelTester()
6181 .mr(1)
6182 .nr(8)
6183 .kr(1)
6184 .sr(1)
6185 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006186 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006187 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006188 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006189 }
6190 }
6191 }
6192
Marat Dukhande06f492020-04-09 00:19:31 -07006193 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006194 TEST_REQUIRES_ARM_NEON_FMA;
6195 for (uint32_t n = 16; n <= 24; n += 8) {
6196 for (size_t k = 1; k <= 10; k += 3) {
6197 GemmMicrokernelTester()
6198 .mr(1)
6199 .nr(8)
6200 .kr(1)
6201 .sr(1)
6202 .m(1)
6203 .n(n)
6204 .k(k)
6205 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006206 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006207 }
6208 }
6209 }
6210
Marat Dukhande06f492020-04-09 00:19:31 -07006211 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006212 TEST_REQUIRES_ARM_NEON_FMA;
6213 for (uint32_t n = 16; n <= 24; n += 8) {
6214 for (size_t k = 1; k <= 10; k += 3) {
6215 GemmMicrokernelTester()
6216 .mr(1)
6217 .nr(8)
6218 .kr(1)
6219 .sr(1)
6220 .m(1)
6221 .n(n)
6222 .k(k)
6223 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006224 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006225 }
6226 }
6227 }
6228
Marat Dukhande06f492020-04-09 00:19:31 -07006229 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006230 TEST_REQUIRES_ARM_NEON_FMA;
6231 for (uint32_t n = 16; n <= 24; n += 8) {
6232 for (size_t k = 1; k <= 10; k += 3) {
6233 for (uint32_t m = 1; m <= 1; m++) {
6234 GemmMicrokernelTester()
6235 .mr(1)
6236 .nr(8)
6237 .kr(1)
6238 .sr(1)
6239 .m(m)
6240 .n(n)
6241 .k(k)
6242 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006243 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006244 }
6245 }
6246 }
6247 }
6248
Marat Dukhande06f492020-04-09 00:19:31 -07006249 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006250 TEST_REQUIRES_ARM_NEON_FMA;
6251 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006252 for (uint32_t n = 1; n <= 8; n++) {
6253 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006254 GemmMicrokernelTester()
6255 .mr(1)
6256 .nr(8)
6257 .kr(1)
6258 .sr(1)
6259 .m(m)
6260 .n(n)
6261 .k(k)
6262 .cm_stride(11)
6263 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006264 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006265 }
6266 }
6267 }
6268 }
6269
Marat Dukhande06f492020-04-09 00:19:31 -07006270 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006271 TEST_REQUIRES_ARM_NEON_FMA;
6272 GemmMicrokernelTester()
6273 .mr(1)
6274 .nr(8)
6275 .kr(1)
6276 .sr(1)
6277 .m(1)
6278 .n(8)
6279 .k(2)
6280 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006281 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006282 }
6283
Marat Dukhande06f492020-04-09 00:19:31 -07006284 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006285 TEST_REQUIRES_ARM_NEON_FMA;
6286 GemmMicrokernelTester()
6287 .mr(1)
6288 .nr(8)
6289 .kr(1)
6290 .sr(1)
6291 .m(1)
6292 .n(8)
6293 .k(2)
6294 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006295 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006296 }
6297
Marat Dukhande06f492020-04-09 00:19:31 -07006298 TEST(F32_GEMMINC_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006299 TEST_REQUIRES_ARM_NEON_FMA;
6300 GemmMicrokernelTester()
6301 .mr(1)
6302 .nr(8)
6303 .kr(1)
6304 .sr(1)
6305 .m(1)
6306 .n(8)
6307 .k(2)
6308 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006309 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006310 }
6311#endif // XNN_ARCH_ARM64
6312
6313
6314#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07006315 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006316 TEST_REQUIRES_ARM_NEON_FMA;
6317 GemmMicrokernelTester()
6318 .mr(4)
6319 .nr(8)
6320 .kr(1)
6321 .sr(1)
6322 .m(4)
6323 .n(8)
6324 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006325 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006326 }
6327
Marat Dukhande06f492020-04-09 00:19:31 -07006328 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006329 TEST_REQUIRES_ARM_NEON_FMA;
6330 GemmMicrokernelTester()
6331 .mr(4)
6332 .nr(8)
6333 .kr(1)
6334 .sr(1)
6335 .m(4)
6336 .n(8)
6337 .k(2)
6338 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006339 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006340 }
6341
Marat Dukhande06f492020-04-09 00:19:31 -07006342 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006343 TEST_REQUIRES_ARM_NEON_FMA;
6344 GemmMicrokernelTester()
6345 .mr(4)
6346 .nr(8)
6347 .kr(1)
6348 .sr(1)
6349 .m(4)
6350 .n(8)
6351 .k(2)
6352 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006353 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006354 }
6355
Marat Dukhande06f492020-04-09 00:19:31 -07006356 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006357 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006358 for (uint32_t n = 1; n <= 8; n++) {
6359 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006360 GemmMicrokernelTester()
6361 .mr(4)
6362 .nr(8)
6363 .kr(1)
6364 .sr(1)
6365 .m(m)
6366 .n(n)
6367 .k(2)
6368 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006369 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006370 }
6371 }
6372 }
6373
Marat Dukhande06f492020-04-09 00:19:31 -07006374 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006375 TEST_REQUIRES_ARM_NEON_FMA;
6376 for (uint32_t m = 1; m <= 4; m++) {
6377 GemmMicrokernelTester()
6378 .mr(4)
6379 .nr(8)
6380 .kr(1)
6381 .sr(1)
6382 .m(m)
6383 .n(8)
6384 .k(2)
6385 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006386 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006387 }
6388 }
6389
Marat Dukhande06f492020-04-09 00:19:31 -07006390 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006391 TEST_REQUIRES_ARM_NEON_FMA;
6392 for (uint32_t n = 1; n <= 8; n++) {
6393 GemmMicrokernelTester()
6394 .mr(4)
6395 .nr(8)
6396 .kr(1)
6397 .sr(1)
6398 .m(4)
6399 .n(n)
6400 .k(2)
6401 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006402 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006403 }
6404 }
6405
Marat Dukhande06f492020-04-09 00:19:31 -07006406 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006407 TEST_REQUIRES_ARM_NEON_FMA;
6408 for (size_t k = 1; k < 2; k++) {
6409 GemmMicrokernelTester()
6410 .mr(4)
6411 .nr(8)
6412 .kr(1)
6413 .sr(1)
6414 .m(4)
6415 .n(8)
6416 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006417 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006418 }
6419 }
6420
Marat Dukhande06f492020-04-09 00:19:31 -07006421 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006422 TEST_REQUIRES_ARM_NEON_FMA;
6423 for (size_t k = 1; k < 2; k++) {
6424 GemmMicrokernelTester()
6425 .mr(4)
6426 .nr(8)
6427 .kr(1)
6428 .sr(1)
6429 .m(4)
6430 .n(8)
6431 .k(k)
6432 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006433 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006434 }
6435 }
6436
Marat Dukhande06f492020-04-09 00:19:31 -07006437 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006438 TEST_REQUIRES_ARM_NEON_FMA;
6439 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006440 for (uint32_t n = 1; n <= 8; n++) {
6441 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006442 GemmMicrokernelTester()
6443 .mr(4)
6444 .nr(8)
6445 .kr(1)
6446 .sr(1)
6447 .m(m)
6448 .n(n)
6449 .k(k)
6450 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006451 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006452 }
6453 }
6454 }
6455 }
6456
Marat Dukhande06f492020-04-09 00:19:31 -07006457 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006458 TEST_REQUIRES_ARM_NEON_FMA;
6459 for (size_t k = 3; k < 4; k++) {
6460 GemmMicrokernelTester()
6461 .mr(4)
6462 .nr(8)
6463 .kr(1)
6464 .sr(1)
6465 .m(4)
6466 .n(8)
6467 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006468 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006469 }
6470 }
6471
Marat Dukhande06f492020-04-09 00:19:31 -07006472 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006473 TEST_REQUIRES_ARM_NEON_FMA;
6474 for (size_t k = 3; k < 4; k++) {
6475 GemmMicrokernelTester()
6476 .mr(4)
6477 .nr(8)
6478 .kr(1)
6479 .sr(1)
6480 .m(4)
6481 .n(8)
6482 .k(k)
6483 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006484 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006485 }
6486 }
6487
Marat Dukhande06f492020-04-09 00:19:31 -07006488 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006489 TEST_REQUIRES_ARM_NEON_FMA;
6490 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006491 for (uint32_t n = 1; n <= 8; n++) {
6492 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006493 GemmMicrokernelTester()
6494 .mr(4)
6495 .nr(8)
6496 .kr(1)
6497 .sr(1)
6498 .m(m)
6499 .n(n)
6500 .k(k)
6501 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006502 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006503 }
6504 }
6505 }
6506 }
6507
Marat Dukhande06f492020-04-09 00:19:31 -07006508 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006509 TEST_REQUIRES_ARM_NEON_FMA;
6510 for (size_t k = 4; k <= 20; k += 2) {
6511 GemmMicrokernelTester()
6512 .mr(4)
6513 .nr(8)
6514 .kr(1)
6515 .sr(1)
6516 .m(4)
6517 .n(8)
6518 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006519 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006520 }
6521 }
6522
Marat Dukhande06f492020-04-09 00:19:31 -07006523 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006524 TEST_REQUIRES_ARM_NEON_FMA;
6525 for (size_t k = 4; k <= 20; k += 2) {
6526 GemmMicrokernelTester()
6527 .mr(4)
6528 .nr(8)
6529 .kr(1)
6530 .sr(1)
6531 .m(4)
6532 .n(8)
6533 .k(k)
6534 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006535 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006536 }
6537 }
6538
Marat Dukhande06f492020-04-09 00:19:31 -07006539 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006540 TEST_REQUIRES_ARM_NEON_FMA;
6541 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006542 for (uint32_t n = 1; n <= 8; n++) {
6543 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006544 GemmMicrokernelTester()
6545 .mr(4)
6546 .nr(8)
6547 .kr(1)
6548 .sr(1)
6549 .m(m)
6550 .n(n)
6551 .k(k)
6552 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006553 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006554 }
6555 }
6556 }
6557 }
6558
Marat Dukhande06f492020-04-09 00:19:31 -07006559 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006560 TEST_REQUIRES_ARM_NEON_FMA;
6561 for (uint32_t n = 9; n < 16; n++) {
6562 for (size_t k = 1; k <= 10; k += 3) {
6563 GemmMicrokernelTester()
6564 .mr(4)
6565 .nr(8)
6566 .kr(1)
6567 .sr(1)
6568 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006569 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006570 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006571 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006572 }
6573 }
6574 }
6575
Marat Dukhande06f492020-04-09 00:19:31 -07006576 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006577 TEST_REQUIRES_ARM_NEON_FMA;
6578 for (uint32_t n = 9; n < 16; n++) {
6579 for (size_t k = 1; k <= 10; k += 3) {
6580 GemmMicrokernelTester()
6581 .mr(4)
6582 .nr(8)
6583 .kr(1)
6584 .sr(1)
6585 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006586 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006587 .k(k)
6588 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006589 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006590 }
6591 }
6592 }
6593
Marat Dukhande06f492020-04-09 00:19:31 -07006594 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006595 TEST_REQUIRES_ARM_NEON_FMA;
6596 for (uint32_t n = 9; n < 16; n++) {
6597 for (size_t k = 1; k <= 10; k += 3) {
6598 GemmMicrokernelTester()
6599 .mr(4)
6600 .nr(8)
6601 .kr(1)
6602 .sr(1)
6603 .m(4)
6604 .n(n)
6605 .k(k)
6606 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006607 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006608 }
6609 }
6610 }
6611
Marat Dukhande06f492020-04-09 00:19:31 -07006612 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006613 TEST_REQUIRES_ARM_NEON_FMA;
6614 for (uint32_t n = 9; n < 16; n++) {
6615 for (size_t k = 1; k <= 10; k += 3) {
6616 for (uint32_t m = 1; m <= 4; m++) {
6617 GemmMicrokernelTester()
6618 .mr(4)
6619 .nr(8)
6620 .kr(1)
6621 .sr(1)
6622 .m(m)
6623 .n(n)
6624 .k(k)
6625 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006626 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006627 }
6628 }
6629 }
6630 }
6631
Marat Dukhande06f492020-04-09 00:19:31 -07006632 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006633 TEST_REQUIRES_ARM_NEON_FMA;
6634 for (uint32_t n = 16; n <= 24; n += 8) {
6635 for (size_t k = 1; k <= 10; k += 3) {
6636 GemmMicrokernelTester()
6637 .mr(4)
6638 .nr(8)
6639 .kr(1)
6640 .sr(1)
6641 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006642 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006643 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006644 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006645 }
6646 }
6647 }
6648
Marat Dukhande06f492020-04-09 00:19:31 -07006649 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006650 TEST_REQUIRES_ARM_NEON_FMA;
6651 for (uint32_t n = 16; n <= 24; n += 8) {
6652 for (size_t k = 1; k <= 10; k += 3) {
6653 GemmMicrokernelTester()
6654 .mr(4)
6655 .nr(8)
6656 .kr(1)
6657 .sr(1)
6658 .m(4)
6659 .n(n)
6660 .k(k)
6661 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006662 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006663 }
6664 }
6665 }
6666
Marat Dukhande06f492020-04-09 00:19:31 -07006667 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006668 TEST_REQUIRES_ARM_NEON_FMA;
6669 for (uint32_t n = 16; n <= 24; n += 8) {
6670 for (size_t k = 1; k <= 10; k += 3) {
6671 GemmMicrokernelTester()
6672 .mr(4)
6673 .nr(8)
6674 .kr(1)
6675 .sr(1)
6676 .m(4)
6677 .n(n)
6678 .k(k)
6679 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006680 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006681 }
6682 }
6683 }
6684
Marat Dukhande06f492020-04-09 00:19:31 -07006685 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006686 TEST_REQUIRES_ARM_NEON_FMA;
6687 for (uint32_t n = 16; n <= 24; n += 8) {
6688 for (size_t k = 1; k <= 10; k += 3) {
6689 for (uint32_t m = 1; m <= 4; m++) {
6690 GemmMicrokernelTester()
6691 .mr(4)
6692 .nr(8)
6693 .kr(1)
6694 .sr(1)
6695 .m(m)
6696 .n(n)
6697 .k(k)
6698 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006699 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006700 }
6701 }
6702 }
6703 }
6704
Marat Dukhande06f492020-04-09 00:19:31 -07006705 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006706 TEST_REQUIRES_ARM_NEON_FMA;
6707 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006708 for (uint32_t n = 1; n <= 8; n++) {
6709 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006710 GemmMicrokernelTester()
6711 .mr(4)
6712 .nr(8)
6713 .kr(1)
6714 .sr(1)
6715 .m(m)
6716 .n(n)
6717 .k(k)
6718 .cm_stride(11)
6719 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006720 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006721 }
6722 }
6723 }
6724 }
6725
Marat Dukhande06f492020-04-09 00:19:31 -07006726 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006727 TEST_REQUIRES_ARM_NEON_FMA;
6728 GemmMicrokernelTester()
6729 .mr(4)
6730 .nr(8)
6731 .kr(1)
6732 .sr(1)
6733 .m(4)
6734 .n(8)
6735 .k(2)
6736 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006737 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006738 }
6739
Marat Dukhande06f492020-04-09 00:19:31 -07006740 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006741 TEST_REQUIRES_ARM_NEON_FMA;
6742 GemmMicrokernelTester()
6743 .mr(4)
6744 .nr(8)
6745 .kr(1)
6746 .sr(1)
6747 .m(4)
6748 .n(8)
6749 .k(2)
6750 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006751 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006752 }
6753
Marat Dukhande06f492020-04-09 00:19:31 -07006754 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006755 TEST_REQUIRES_ARM_NEON_FMA;
6756 GemmMicrokernelTester()
6757 .mr(4)
6758 .nr(8)
6759 .kr(1)
6760 .sr(1)
6761 .m(4)
6762 .n(8)
6763 .k(2)
6764 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006765 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006766 }
6767#endif // XNN_ARCH_ARM64
6768
6769
6770#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07006771 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006772 TEST_REQUIRES_ARM_NEON_FMA;
6773 GemmMicrokernelTester()
6774 .mr(5)
6775 .nr(8)
6776 .kr(1)
6777 .sr(1)
6778 .m(5)
6779 .n(8)
6780 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006781 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006782 }
6783
Marat Dukhande06f492020-04-09 00:19:31 -07006784 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006785 TEST_REQUIRES_ARM_NEON_FMA;
6786 GemmMicrokernelTester()
6787 .mr(5)
6788 .nr(8)
6789 .kr(1)
6790 .sr(1)
6791 .m(5)
6792 .n(8)
6793 .k(2)
6794 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006795 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006796 }
6797
Marat Dukhande06f492020-04-09 00:19:31 -07006798 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006799 TEST_REQUIRES_ARM_NEON_FMA;
6800 GemmMicrokernelTester()
6801 .mr(5)
6802 .nr(8)
6803 .kr(1)
6804 .sr(1)
6805 .m(5)
6806 .n(8)
6807 .k(2)
6808 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006809 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006810 }
6811
Marat Dukhande06f492020-04-09 00:19:31 -07006812 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006813 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006814 for (uint32_t n = 1; n <= 8; n++) {
6815 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006816 GemmMicrokernelTester()
6817 .mr(5)
6818 .nr(8)
6819 .kr(1)
6820 .sr(1)
6821 .m(m)
6822 .n(n)
6823 .k(2)
6824 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006825 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006826 }
6827 }
6828 }
6829
Marat Dukhande06f492020-04-09 00:19:31 -07006830 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006831 TEST_REQUIRES_ARM_NEON_FMA;
6832 for (uint32_t m = 1; m <= 5; m++) {
6833 GemmMicrokernelTester()
6834 .mr(5)
6835 .nr(8)
6836 .kr(1)
6837 .sr(1)
6838 .m(m)
6839 .n(8)
6840 .k(2)
6841 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006842 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006843 }
6844 }
6845
Marat Dukhande06f492020-04-09 00:19:31 -07006846 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006847 TEST_REQUIRES_ARM_NEON_FMA;
6848 for (uint32_t n = 1; n <= 8; n++) {
6849 GemmMicrokernelTester()
6850 .mr(5)
6851 .nr(8)
6852 .kr(1)
6853 .sr(1)
6854 .m(5)
6855 .n(n)
6856 .k(2)
6857 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006858 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006859 }
6860 }
6861
Marat Dukhande06f492020-04-09 00:19:31 -07006862 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006863 TEST_REQUIRES_ARM_NEON_FMA;
6864 for (size_t k = 1; k < 2; k++) {
6865 GemmMicrokernelTester()
6866 .mr(5)
6867 .nr(8)
6868 .kr(1)
6869 .sr(1)
6870 .m(5)
6871 .n(8)
6872 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006873 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006874 }
6875 }
6876
Marat Dukhande06f492020-04-09 00:19:31 -07006877 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006878 TEST_REQUIRES_ARM_NEON_FMA;
6879 for (size_t k = 1; k < 2; k++) {
6880 GemmMicrokernelTester()
6881 .mr(5)
6882 .nr(8)
6883 .kr(1)
6884 .sr(1)
6885 .m(5)
6886 .n(8)
6887 .k(k)
6888 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006889 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006890 }
6891 }
6892
Marat Dukhande06f492020-04-09 00:19:31 -07006893 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006894 TEST_REQUIRES_ARM_NEON_FMA;
6895 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006896 for (uint32_t n = 1; n <= 8; n++) {
6897 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006898 GemmMicrokernelTester()
6899 .mr(5)
6900 .nr(8)
6901 .kr(1)
6902 .sr(1)
6903 .m(m)
6904 .n(n)
6905 .k(k)
6906 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006907 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006908 }
6909 }
6910 }
6911 }
6912
Marat Dukhande06f492020-04-09 00:19:31 -07006913 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006914 TEST_REQUIRES_ARM_NEON_FMA;
6915 for (size_t k = 3; k < 4; k++) {
6916 GemmMicrokernelTester()
6917 .mr(5)
6918 .nr(8)
6919 .kr(1)
6920 .sr(1)
6921 .m(5)
6922 .n(8)
6923 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006924 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006925 }
6926 }
6927
Marat Dukhande06f492020-04-09 00:19:31 -07006928 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006929 TEST_REQUIRES_ARM_NEON_FMA;
6930 for (size_t k = 3; k < 4; k++) {
6931 GemmMicrokernelTester()
6932 .mr(5)
6933 .nr(8)
6934 .kr(1)
6935 .sr(1)
6936 .m(5)
6937 .n(8)
6938 .k(k)
6939 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006940 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006941 }
6942 }
6943
Marat Dukhande06f492020-04-09 00:19:31 -07006944 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006945 TEST_REQUIRES_ARM_NEON_FMA;
6946 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006947 for (uint32_t n = 1; n <= 8; n++) {
6948 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006949 GemmMicrokernelTester()
6950 .mr(5)
6951 .nr(8)
6952 .kr(1)
6953 .sr(1)
6954 .m(m)
6955 .n(n)
6956 .k(k)
6957 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006958 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006959 }
6960 }
6961 }
6962 }
6963
Marat Dukhande06f492020-04-09 00:19:31 -07006964 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006965 TEST_REQUIRES_ARM_NEON_FMA;
6966 for (size_t k = 4; k <= 20; k += 2) {
6967 GemmMicrokernelTester()
6968 .mr(5)
6969 .nr(8)
6970 .kr(1)
6971 .sr(1)
6972 .m(5)
6973 .n(8)
6974 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006975 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006976 }
6977 }
6978
Marat Dukhande06f492020-04-09 00:19:31 -07006979 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006980 TEST_REQUIRES_ARM_NEON_FMA;
6981 for (size_t k = 4; k <= 20; k += 2) {
6982 GemmMicrokernelTester()
6983 .mr(5)
6984 .nr(8)
6985 .kr(1)
6986 .sr(1)
6987 .m(5)
6988 .n(8)
6989 .k(k)
6990 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006991 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006992 }
6993 }
6994
Marat Dukhande06f492020-04-09 00:19:31 -07006995 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006996 TEST_REQUIRES_ARM_NEON_FMA;
6997 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006998 for (uint32_t n = 1; n <= 8; n++) {
6999 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007000 GemmMicrokernelTester()
7001 .mr(5)
7002 .nr(8)
7003 .kr(1)
7004 .sr(1)
7005 .m(m)
7006 .n(n)
7007 .k(k)
7008 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007009 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007010 }
7011 }
7012 }
7013 }
7014
Marat Dukhande06f492020-04-09 00:19:31 -07007015 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007016 TEST_REQUIRES_ARM_NEON_FMA;
7017 for (uint32_t n = 9; n < 16; n++) {
7018 for (size_t k = 1; k <= 10; k += 3) {
7019 GemmMicrokernelTester()
7020 .mr(5)
7021 .nr(8)
7022 .kr(1)
7023 .sr(1)
7024 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007025 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007026 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007027 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007028 }
7029 }
7030 }
7031
Marat Dukhande06f492020-04-09 00:19:31 -07007032 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007033 TEST_REQUIRES_ARM_NEON_FMA;
7034 for (uint32_t n = 9; n < 16; n++) {
7035 for (size_t k = 1; k <= 10; k += 3) {
7036 GemmMicrokernelTester()
7037 .mr(5)
7038 .nr(8)
7039 .kr(1)
7040 .sr(1)
7041 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007042 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007043 .k(k)
7044 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007045 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007046 }
7047 }
7048 }
7049
Marat Dukhande06f492020-04-09 00:19:31 -07007050 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007051 TEST_REQUIRES_ARM_NEON_FMA;
7052 for (uint32_t n = 9; n < 16; n++) {
7053 for (size_t k = 1; k <= 10; k += 3) {
7054 GemmMicrokernelTester()
7055 .mr(5)
7056 .nr(8)
7057 .kr(1)
7058 .sr(1)
7059 .m(5)
7060 .n(n)
7061 .k(k)
7062 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007063 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007064 }
7065 }
7066 }
7067
Marat Dukhande06f492020-04-09 00:19:31 -07007068 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007069 TEST_REQUIRES_ARM_NEON_FMA;
7070 for (uint32_t n = 9; n < 16; n++) {
7071 for (size_t k = 1; k <= 10; k += 3) {
7072 for (uint32_t m = 1; m <= 5; m++) {
7073 GemmMicrokernelTester()
7074 .mr(5)
7075 .nr(8)
7076 .kr(1)
7077 .sr(1)
7078 .m(m)
7079 .n(n)
7080 .k(k)
7081 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007082 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007083 }
7084 }
7085 }
7086 }
7087
Marat Dukhande06f492020-04-09 00:19:31 -07007088 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007089 TEST_REQUIRES_ARM_NEON_FMA;
7090 for (uint32_t n = 16; n <= 24; n += 8) {
7091 for (size_t k = 1; k <= 10; k += 3) {
7092 GemmMicrokernelTester()
7093 .mr(5)
7094 .nr(8)
7095 .kr(1)
7096 .sr(1)
7097 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007098 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007099 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007100 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007101 }
7102 }
7103 }
7104
Marat Dukhande06f492020-04-09 00:19:31 -07007105 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007106 TEST_REQUIRES_ARM_NEON_FMA;
7107 for (uint32_t n = 16; n <= 24; n += 8) {
7108 for (size_t k = 1; k <= 10; k += 3) {
7109 GemmMicrokernelTester()
7110 .mr(5)
7111 .nr(8)
7112 .kr(1)
7113 .sr(1)
7114 .m(5)
7115 .n(n)
7116 .k(k)
7117 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007118 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007119 }
7120 }
7121 }
7122
Marat Dukhande06f492020-04-09 00:19:31 -07007123 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007124 TEST_REQUIRES_ARM_NEON_FMA;
7125 for (uint32_t n = 16; n <= 24; n += 8) {
7126 for (size_t k = 1; k <= 10; k += 3) {
7127 GemmMicrokernelTester()
7128 .mr(5)
7129 .nr(8)
7130 .kr(1)
7131 .sr(1)
7132 .m(5)
7133 .n(n)
7134 .k(k)
7135 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007136 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007137 }
7138 }
7139 }
7140
Marat Dukhande06f492020-04-09 00:19:31 -07007141 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007142 TEST_REQUIRES_ARM_NEON_FMA;
7143 for (uint32_t n = 16; n <= 24; n += 8) {
7144 for (size_t k = 1; k <= 10; k += 3) {
7145 for (uint32_t m = 1; m <= 5; m++) {
7146 GemmMicrokernelTester()
7147 .mr(5)
7148 .nr(8)
7149 .kr(1)
7150 .sr(1)
7151 .m(m)
7152 .n(n)
7153 .k(k)
7154 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007155 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007156 }
7157 }
7158 }
7159 }
7160
Marat Dukhande06f492020-04-09 00:19:31 -07007161 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007162 TEST_REQUIRES_ARM_NEON_FMA;
7163 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007164 for (uint32_t n = 1; n <= 8; n++) {
7165 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007166 GemmMicrokernelTester()
7167 .mr(5)
7168 .nr(8)
7169 .kr(1)
7170 .sr(1)
7171 .m(m)
7172 .n(n)
7173 .k(k)
7174 .cm_stride(11)
7175 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007176 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007177 }
7178 }
7179 }
7180 }
7181
Marat Dukhande06f492020-04-09 00:19:31 -07007182 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007183 TEST_REQUIRES_ARM_NEON_FMA;
7184 GemmMicrokernelTester()
7185 .mr(5)
7186 .nr(8)
7187 .kr(1)
7188 .sr(1)
7189 .m(5)
7190 .n(8)
7191 .k(2)
7192 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007193 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007194 }
7195
Marat Dukhande06f492020-04-09 00:19:31 -07007196 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007197 TEST_REQUIRES_ARM_NEON_FMA;
7198 GemmMicrokernelTester()
7199 .mr(5)
7200 .nr(8)
7201 .kr(1)
7202 .sr(1)
7203 .m(5)
7204 .n(8)
7205 .k(2)
7206 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007207 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007208 }
7209
Marat Dukhande06f492020-04-09 00:19:31 -07007210 TEST(F32_GEMMINC_MINMAX_5X8__NEONFMA_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007211 TEST_REQUIRES_ARM_NEON_FMA;
7212 GemmMicrokernelTester()
7213 .mr(5)
7214 .nr(8)
7215 .kr(1)
7216 .sr(1)
7217 .m(5)
7218 .n(8)
7219 .k(2)
7220 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007221 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007222 }
7223#endif // XNN_ARCH_ARM64
7224
7225
7226#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07007227 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007228 TEST_REQUIRES_ARM_NEON_FMA;
7229 GemmMicrokernelTester()
7230 .mr(6)
7231 .nr(8)
7232 .kr(1)
7233 .sr(1)
7234 .m(6)
7235 .n(8)
7236 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007237 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007238 }
7239
Marat Dukhande06f492020-04-09 00:19:31 -07007240 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007241 TEST_REQUIRES_ARM_NEON_FMA;
7242 GemmMicrokernelTester()
7243 .mr(6)
7244 .nr(8)
7245 .kr(1)
7246 .sr(1)
7247 .m(6)
7248 .n(8)
7249 .k(2)
7250 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007251 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007252 }
7253
Marat Dukhande06f492020-04-09 00:19:31 -07007254 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007255 TEST_REQUIRES_ARM_NEON_FMA;
7256 GemmMicrokernelTester()
7257 .mr(6)
7258 .nr(8)
7259 .kr(1)
7260 .sr(1)
7261 .m(6)
7262 .n(8)
7263 .k(2)
7264 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007265 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007266 }
7267
Marat Dukhande06f492020-04-09 00:19:31 -07007268 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007269 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007270 for (uint32_t n = 1; n <= 8; n++) {
7271 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007272 GemmMicrokernelTester()
7273 .mr(6)
7274 .nr(8)
7275 .kr(1)
7276 .sr(1)
7277 .m(m)
7278 .n(n)
7279 .k(2)
7280 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007281 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007282 }
7283 }
7284 }
7285
Marat Dukhande06f492020-04-09 00:19:31 -07007286 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007287 TEST_REQUIRES_ARM_NEON_FMA;
7288 for (uint32_t m = 1; m <= 6; m++) {
7289 GemmMicrokernelTester()
7290 .mr(6)
7291 .nr(8)
7292 .kr(1)
7293 .sr(1)
7294 .m(m)
7295 .n(8)
7296 .k(2)
7297 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007298 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007299 }
7300 }
7301
Marat Dukhande06f492020-04-09 00:19:31 -07007302 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007303 TEST_REQUIRES_ARM_NEON_FMA;
7304 for (uint32_t n = 1; n <= 8; n++) {
7305 GemmMicrokernelTester()
7306 .mr(6)
7307 .nr(8)
7308 .kr(1)
7309 .sr(1)
7310 .m(6)
7311 .n(n)
7312 .k(2)
7313 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007314 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007315 }
7316 }
7317
Marat Dukhande06f492020-04-09 00:19:31 -07007318 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007319 TEST_REQUIRES_ARM_NEON_FMA;
7320 for (size_t k = 1; k < 2; k++) {
7321 GemmMicrokernelTester()
7322 .mr(6)
7323 .nr(8)
7324 .kr(1)
7325 .sr(1)
7326 .m(6)
7327 .n(8)
7328 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007329 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007330 }
7331 }
7332
Marat Dukhande06f492020-04-09 00:19:31 -07007333 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007334 TEST_REQUIRES_ARM_NEON_FMA;
7335 for (size_t k = 1; k < 2; k++) {
7336 GemmMicrokernelTester()
7337 .mr(6)
7338 .nr(8)
7339 .kr(1)
7340 .sr(1)
7341 .m(6)
7342 .n(8)
7343 .k(k)
7344 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007345 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007346 }
7347 }
7348
Marat Dukhande06f492020-04-09 00:19:31 -07007349 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007350 TEST_REQUIRES_ARM_NEON_FMA;
7351 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007352 for (uint32_t n = 1; n <= 8; n++) {
7353 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007354 GemmMicrokernelTester()
7355 .mr(6)
7356 .nr(8)
7357 .kr(1)
7358 .sr(1)
7359 .m(m)
7360 .n(n)
7361 .k(k)
7362 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007363 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007364 }
7365 }
7366 }
7367 }
7368
Marat Dukhande06f492020-04-09 00:19:31 -07007369 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007370 TEST_REQUIRES_ARM_NEON_FMA;
7371 for (size_t k = 3; k < 4; k++) {
7372 GemmMicrokernelTester()
7373 .mr(6)
7374 .nr(8)
7375 .kr(1)
7376 .sr(1)
7377 .m(6)
7378 .n(8)
7379 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007380 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007381 }
7382 }
7383
Marat Dukhande06f492020-04-09 00:19:31 -07007384 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007385 TEST_REQUIRES_ARM_NEON_FMA;
7386 for (size_t k = 3; k < 4; k++) {
7387 GemmMicrokernelTester()
7388 .mr(6)
7389 .nr(8)
7390 .kr(1)
7391 .sr(1)
7392 .m(6)
7393 .n(8)
7394 .k(k)
7395 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007396 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007397 }
7398 }
7399
Marat Dukhande06f492020-04-09 00:19:31 -07007400 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007401 TEST_REQUIRES_ARM_NEON_FMA;
7402 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007403 for (uint32_t n = 1; n <= 8; n++) {
7404 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007405 GemmMicrokernelTester()
7406 .mr(6)
7407 .nr(8)
7408 .kr(1)
7409 .sr(1)
7410 .m(m)
7411 .n(n)
7412 .k(k)
7413 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007414 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007415 }
7416 }
7417 }
7418 }
7419
Marat Dukhande06f492020-04-09 00:19:31 -07007420 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007421 TEST_REQUIRES_ARM_NEON_FMA;
7422 for (size_t k = 4; k <= 20; k += 2) {
7423 GemmMicrokernelTester()
7424 .mr(6)
7425 .nr(8)
7426 .kr(1)
7427 .sr(1)
7428 .m(6)
7429 .n(8)
7430 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007431 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007432 }
7433 }
7434
Marat Dukhande06f492020-04-09 00:19:31 -07007435 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007436 TEST_REQUIRES_ARM_NEON_FMA;
7437 for (size_t k = 4; k <= 20; k += 2) {
7438 GemmMicrokernelTester()
7439 .mr(6)
7440 .nr(8)
7441 .kr(1)
7442 .sr(1)
7443 .m(6)
7444 .n(8)
7445 .k(k)
7446 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007447 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007448 }
7449 }
7450
Marat Dukhande06f492020-04-09 00:19:31 -07007451 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007452 TEST_REQUIRES_ARM_NEON_FMA;
7453 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007454 for (uint32_t n = 1; n <= 8; n++) {
7455 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007456 GemmMicrokernelTester()
7457 .mr(6)
7458 .nr(8)
7459 .kr(1)
7460 .sr(1)
7461 .m(m)
7462 .n(n)
7463 .k(k)
7464 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007465 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007466 }
7467 }
7468 }
7469 }
7470
Marat Dukhande06f492020-04-09 00:19:31 -07007471 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007472 TEST_REQUIRES_ARM_NEON_FMA;
7473 for (uint32_t n = 9; n < 16; n++) {
7474 for (size_t k = 1; k <= 10; k += 3) {
7475 GemmMicrokernelTester()
7476 .mr(6)
7477 .nr(8)
7478 .kr(1)
7479 .sr(1)
7480 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007481 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007482 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007483 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007484 }
7485 }
7486 }
7487
Marat Dukhande06f492020-04-09 00:19:31 -07007488 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007489 TEST_REQUIRES_ARM_NEON_FMA;
7490 for (uint32_t n = 9; n < 16; n++) {
7491 for (size_t k = 1; k <= 10; k += 3) {
7492 GemmMicrokernelTester()
7493 .mr(6)
7494 .nr(8)
7495 .kr(1)
7496 .sr(1)
7497 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007498 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007499 .k(k)
7500 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007501 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007502 }
7503 }
7504 }
7505
Marat Dukhande06f492020-04-09 00:19:31 -07007506 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007507 TEST_REQUIRES_ARM_NEON_FMA;
7508 for (uint32_t n = 9; n < 16; n++) {
7509 for (size_t k = 1; k <= 10; k += 3) {
7510 GemmMicrokernelTester()
7511 .mr(6)
7512 .nr(8)
7513 .kr(1)
7514 .sr(1)
7515 .m(6)
7516 .n(n)
7517 .k(k)
7518 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007519 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007520 }
7521 }
7522 }
7523
Marat Dukhande06f492020-04-09 00:19:31 -07007524 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007525 TEST_REQUIRES_ARM_NEON_FMA;
7526 for (uint32_t n = 9; n < 16; n++) {
7527 for (size_t k = 1; k <= 10; k += 3) {
7528 for (uint32_t m = 1; m <= 6; m++) {
7529 GemmMicrokernelTester()
7530 .mr(6)
7531 .nr(8)
7532 .kr(1)
7533 .sr(1)
7534 .m(m)
7535 .n(n)
7536 .k(k)
7537 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007538 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007539 }
7540 }
7541 }
7542 }
7543
Marat Dukhande06f492020-04-09 00:19:31 -07007544 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007545 TEST_REQUIRES_ARM_NEON_FMA;
7546 for (uint32_t n = 16; n <= 24; n += 8) {
7547 for (size_t k = 1; k <= 10; k += 3) {
7548 GemmMicrokernelTester()
7549 .mr(6)
7550 .nr(8)
7551 .kr(1)
7552 .sr(1)
7553 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007554 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007555 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007556 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007557 }
7558 }
7559 }
7560
Marat Dukhande06f492020-04-09 00:19:31 -07007561 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007562 TEST_REQUIRES_ARM_NEON_FMA;
7563 for (uint32_t n = 16; n <= 24; n += 8) {
7564 for (size_t k = 1; k <= 10; k += 3) {
7565 GemmMicrokernelTester()
7566 .mr(6)
7567 .nr(8)
7568 .kr(1)
7569 .sr(1)
7570 .m(6)
7571 .n(n)
7572 .k(k)
7573 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007574 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007575 }
7576 }
7577 }
7578
Marat Dukhande06f492020-04-09 00:19:31 -07007579 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007580 TEST_REQUIRES_ARM_NEON_FMA;
7581 for (uint32_t n = 16; n <= 24; n += 8) {
7582 for (size_t k = 1; k <= 10; k += 3) {
7583 GemmMicrokernelTester()
7584 .mr(6)
7585 .nr(8)
7586 .kr(1)
7587 .sr(1)
7588 .m(6)
7589 .n(n)
7590 .k(k)
7591 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007592 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007593 }
7594 }
7595 }
7596
Marat Dukhande06f492020-04-09 00:19:31 -07007597 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007598 TEST_REQUIRES_ARM_NEON_FMA;
7599 for (uint32_t n = 16; n <= 24; n += 8) {
7600 for (size_t k = 1; k <= 10; k += 3) {
7601 for (uint32_t m = 1; m <= 6; m++) {
7602 GemmMicrokernelTester()
7603 .mr(6)
7604 .nr(8)
7605 .kr(1)
7606 .sr(1)
7607 .m(m)
7608 .n(n)
7609 .k(k)
7610 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007611 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007612 }
7613 }
7614 }
7615 }
7616
Marat Dukhande06f492020-04-09 00:19:31 -07007617 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007618 TEST_REQUIRES_ARM_NEON_FMA;
7619 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007620 for (uint32_t n = 1; n <= 8; n++) {
7621 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007622 GemmMicrokernelTester()
7623 .mr(6)
7624 .nr(8)
7625 .kr(1)
7626 .sr(1)
7627 .m(m)
7628 .n(n)
7629 .k(k)
7630 .cm_stride(11)
7631 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007632 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007633 }
7634 }
7635 }
7636 }
7637
Marat Dukhande06f492020-04-09 00:19:31 -07007638 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007639 TEST_REQUIRES_ARM_NEON_FMA;
7640 GemmMicrokernelTester()
7641 .mr(6)
7642 .nr(8)
7643 .kr(1)
7644 .sr(1)
7645 .m(6)
7646 .n(8)
7647 .k(2)
7648 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007649 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007650 }
7651
Marat Dukhande06f492020-04-09 00:19:31 -07007652 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007653 TEST_REQUIRES_ARM_NEON_FMA;
7654 GemmMicrokernelTester()
7655 .mr(6)
7656 .nr(8)
7657 .kr(1)
7658 .sr(1)
7659 .m(6)
7660 .n(8)
7661 .k(2)
7662 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007663 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007664 }
7665
Marat Dukhande06f492020-04-09 00:19:31 -07007666 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007667 TEST_REQUIRES_ARM_NEON_FMA;
7668 GemmMicrokernelTester()
7669 .mr(6)
7670 .nr(8)
7671 .kr(1)
7672 .sr(1)
7673 .m(6)
7674 .n(8)
7675 .k(2)
7676 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007677 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007678 }
7679#endif // XNN_ARCH_ARM64
7680
7681
7682#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07007683 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007684 TEST_REQUIRES_ARM_NEON_FMA;
7685 GemmMicrokernelTester()
7686 .mr(6)
7687 .nr(8)
7688 .kr(1)
7689 .sr(1)
7690 .m(6)
7691 .n(8)
7692 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007693 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007694 }
7695
Marat Dukhande06f492020-04-09 00:19:31 -07007696 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007697 TEST_REQUIRES_ARM_NEON_FMA;
7698 GemmMicrokernelTester()
7699 .mr(6)
7700 .nr(8)
7701 .kr(1)
7702 .sr(1)
7703 .m(6)
7704 .n(8)
7705 .k(4)
7706 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007707 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007708 }
7709
Marat Dukhande06f492020-04-09 00:19:31 -07007710 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007711 TEST_REQUIRES_ARM_NEON_FMA;
7712 GemmMicrokernelTester()
7713 .mr(6)
7714 .nr(8)
7715 .kr(1)
7716 .sr(1)
7717 .m(6)
7718 .n(8)
7719 .k(4)
7720 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007721 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007722 }
7723
Marat Dukhande06f492020-04-09 00:19:31 -07007724 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007725 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007726 for (uint32_t n = 1; n <= 8; n++) {
7727 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007728 GemmMicrokernelTester()
7729 .mr(6)
7730 .nr(8)
7731 .kr(1)
7732 .sr(1)
7733 .m(m)
7734 .n(n)
7735 .k(4)
7736 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007737 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007738 }
7739 }
7740 }
7741
Marat Dukhande06f492020-04-09 00:19:31 -07007742 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007743 TEST_REQUIRES_ARM_NEON_FMA;
7744 for (uint32_t m = 1; m <= 6; m++) {
7745 GemmMicrokernelTester()
7746 .mr(6)
7747 .nr(8)
7748 .kr(1)
7749 .sr(1)
7750 .m(m)
7751 .n(8)
7752 .k(4)
7753 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007754 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007755 }
7756 }
7757
Marat Dukhande06f492020-04-09 00:19:31 -07007758 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007759 TEST_REQUIRES_ARM_NEON_FMA;
7760 for (uint32_t n = 1; n <= 8; n++) {
7761 GemmMicrokernelTester()
7762 .mr(6)
7763 .nr(8)
7764 .kr(1)
7765 .sr(1)
7766 .m(6)
7767 .n(n)
7768 .k(4)
7769 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007770 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007771 }
7772 }
7773
Marat Dukhande06f492020-04-09 00:19:31 -07007774 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007775 TEST_REQUIRES_ARM_NEON_FMA;
7776 for (size_t k = 1; k < 4; k++) {
7777 GemmMicrokernelTester()
7778 .mr(6)
7779 .nr(8)
7780 .kr(1)
7781 .sr(1)
7782 .m(6)
7783 .n(8)
7784 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007785 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007786 }
7787 }
7788
Marat Dukhande06f492020-04-09 00:19:31 -07007789 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007790 TEST_REQUIRES_ARM_NEON_FMA;
7791 for (size_t k = 1; k < 4; k++) {
7792 GemmMicrokernelTester()
7793 .mr(6)
7794 .nr(8)
7795 .kr(1)
7796 .sr(1)
7797 .m(6)
7798 .n(8)
7799 .k(k)
7800 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007801 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007802 }
7803 }
7804
Marat Dukhande06f492020-04-09 00:19:31 -07007805 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007806 TEST_REQUIRES_ARM_NEON_FMA;
7807 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007808 for (uint32_t n = 1; n <= 8; n++) {
7809 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007810 GemmMicrokernelTester()
7811 .mr(6)
7812 .nr(8)
7813 .kr(1)
7814 .sr(1)
7815 .m(m)
7816 .n(n)
7817 .k(k)
7818 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007819 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007820 }
7821 }
7822 }
7823 }
7824
Marat Dukhande06f492020-04-09 00:19:31 -07007825 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007826 TEST_REQUIRES_ARM_NEON_FMA;
7827 for (size_t k = 5; k < 8; k++) {
7828 GemmMicrokernelTester()
7829 .mr(6)
7830 .nr(8)
7831 .kr(1)
7832 .sr(1)
7833 .m(6)
7834 .n(8)
7835 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007836 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007837 }
7838 }
7839
Marat Dukhande06f492020-04-09 00:19:31 -07007840 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007841 TEST_REQUIRES_ARM_NEON_FMA;
7842 for (size_t k = 5; k < 8; k++) {
7843 GemmMicrokernelTester()
7844 .mr(6)
7845 .nr(8)
7846 .kr(1)
7847 .sr(1)
7848 .m(6)
7849 .n(8)
7850 .k(k)
7851 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007852 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007853 }
7854 }
7855
Marat Dukhande06f492020-04-09 00:19:31 -07007856 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007857 TEST_REQUIRES_ARM_NEON_FMA;
7858 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007859 for (uint32_t n = 1; n <= 8; n++) {
7860 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007861 GemmMicrokernelTester()
7862 .mr(6)
7863 .nr(8)
7864 .kr(1)
7865 .sr(1)
7866 .m(m)
7867 .n(n)
7868 .k(k)
7869 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007870 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007871 }
7872 }
7873 }
7874 }
7875
Marat Dukhande06f492020-04-09 00:19:31 -07007876 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007877 TEST_REQUIRES_ARM_NEON_FMA;
7878 for (size_t k = 8; k <= 40; k += 4) {
7879 GemmMicrokernelTester()
7880 .mr(6)
7881 .nr(8)
7882 .kr(1)
7883 .sr(1)
7884 .m(6)
7885 .n(8)
7886 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007887 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007888 }
7889 }
7890
Marat Dukhande06f492020-04-09 00:19:31 -07007891 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007892 TEST_REQUIRES_ARM_NEON_FMA;
7893 for (size_t k = 8; k <= 40; k += 4) {
7894 GemmMicrokernelTester()
7895 .mr(6)
7896 .nr(8)
7897 .kr(1)
7898 .sr(1)
7899 .m(6)
7900 .n(8)
7901 .k(k)
7902 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007903 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007904 }
7905 }
7906
Marat Dukhande06f492020-04-09 00:19:31 -07007907 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007908 TEST_REQUIRES_ARM_NEON_FMA;
7909 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007910 for (uint32_t n = 1; n <= 8; n++) {
7911 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007912 GemmMicrokernelTester()
7913 .mr(6)
7914 .nr(8)
7915 .kr(1)
7916 .sr(1)
7917 .m(m)
7918 .n(n)
7919 .k(k)
7920 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007921 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007922 }
7923 }
7924 }
7925 }
7926
Marat Dukhande06f492020-04-09 00:19:31 -07007927 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007928 TEST_REQUIRES_ARM_NEON_FMA;
7929 for (uint32_t n = 9; n < 16; n++) {
7930 for (size_t k = 1; k <= 20; k += 5) {
7931 GemmMicrokernelTester()
7932 .mr(6)
7933 .nr(8)
7934 .kr(1)
7935 .sr(1)
7936 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007937 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007938 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007939 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007940 }
7941 }
7942 }
7943
Marat Dukhande06f492020-04-09 00:19:31 -07007944 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007945 TEST_REQUIRES_ARM_NEON_FMA;
7946 for (uint32_t n = 9; n < 16; n++) {
7947 for (size_t k = 1; k <= 20; k += 5) {
7948 GemmMicrokernelTester()
7949 .mr(6)
7950 .nr(8)
7951 .kr(1)
7952 .sr(1)
7953 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007954 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007955 .k(k)
7956 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007957 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007958 }
7959 }
7960 }
7961
Marat Dukhande06f492020-04-09 00:19:31 -07007962 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007963 TEST_REQUIRES_ARM_NEON_FMA;
7964 for (uint32_t n = 9; n < 16; n++) {
7965 for (size_t k = 1; k <= 20; k += 5) {
7966 GemmMicrokernelTester()
7967 .mr(6)
7968 .nr(8)
7969 .kr(1)
7970 .sr(1)
7971 .m(6)
7972 .n(n)
7973 .k(k)
7974 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007975 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007976 }
7977 }
7978 }
7979
Marat Dukhande06f492020-04-09 00:19:31 -07007980 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007981 TEST_REQUIRES_ARM_NEON_FMA;
7982 for (uint32_t n = 9; n < 16; n++) {
7983 for (size_t k = 1; k <= 20; k += 5) {
7984 for (uint32_t m = 1; m <= 6; m++) {
7985 GemmMicrokernelTester()
7986 .mr(6)
7987 .nr(8)
7988 .kr(1)
7989 .sr(1)
7990 .m(m)
7991 .n(n)
7992 .k(k)
7993 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007994 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007995 }
7996 }
7997 }
7998 }
7999
Marat Dukhande06f492020-04-09 00:19:31 -07008000 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008001 TEST_REQUIRES_ARM_NEON_FMA;
8002 for (uint32_t n = 16; n <= 24; n += 8) {
8003 for (size_t k = 1; k <= 20; k += 5) {
8004 GemmMicrokernelTester()
8005 .mr(6)
8006 .nr(8)
8007 .kr(1)
8008 .sr(1)
8009 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008010 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008011 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008012 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008013 }
8014 }
8015 }
8016
Marat Dukhande06f492020-04-09 00:19:31 -07008017 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008018 TEST_REQUIRES_ARM_NEON_FMA;
8019 for (uint32_t n = 16; n <= 24; n += 8) {
8020 for (size_t k = 1; k <= 20; k += 5) {
8021 GemmMicrokernelTester()
8022 .mr(6)
8023 .nr(8)
8024 .kr(1)
8025 .sr(1)
8026 .m(6)
8027 .n(n)
8028 .k(k)
8029 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008030 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008031 }
8032 }
8033 }
8034
Marat Dukhande06f492020-04-09 00:19:31 -07008035 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008036 TEST_REQUIRES_ARM_NEON_FMA;
8037 for (uint32_t n = 16; n <= 24; n += 8) {
8038 for (size_t k = 1; k <= 20; k += 5) {
8039 GemmMicrokernelTester()
8040 .mr(6)
8041 .nr(8)
8042 .kr(1)
8043 .sr(1)
8044 .m(6)
8045 .n(n)
8046 .k(k)
8047 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008048 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008049 }
8050 }
8051 }
8052
Marat Dukhande06f492020-04-09 00:19:31 -07008053 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008054 TEST_REQUIRES_ARM_NEON_FMA;
8055 for (uint32_t n = 16; n <= 24; n += 8) {
8056 for (size_t k = 1; k <= 20; k += 5) {
8057 for (uint32_t m = 1; m <= 6; m++) {
8058 GemmMicrokernelTester()
8059 .mr(6)
8060 .nr(8)
8061 .kr(1)
8062 .sr(1)
8063 .m(m)
8064 .n(n)
8065 .k(k)
8066 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008067 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008068 }
8069 }
8070 }
8071 }
8072
Marat Dukhande06f492020-04-09 00:19:31 -07008073 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008074 TEST_REQUIRES_ARM_NEON_FMA;
8075 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008076 for (uint32_t n = 1; n <= 8; n++) {
8077 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008078 GemmMicrokernelTester()
8079 .mr(6)
8080 .nr(8)
8081 .kr(1)
8082 .sr(1)
8083 .m(m)
8084 .n(n)
8085 .k(k)
8086 .cm_stride(11)
8087 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008088 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008089 }
8090 }
8091 }
8092 }
8093
Marat Dukhande06f492020-04-09 00:19:31 -07008094 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008095 TEST_REQUIRES_ARM_NEON_FMA;
8096 GemmMicrokernelTester()
8097 .mr(6)
8098 .nr(8)
8099 .kr(1)
8100 .sr(1)
8101 .m(6)
8102 .n(8)
8103 .k(4)
8104 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008105 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008106 }
8107
Marat Dukhande06f492020-04-09 00:19:31 -07008108 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008109 TEST_REQUIRES_ARM_NEON_FMA;
8110 GemmMicrokernelTester()
8111 .mr(6)
8112 .nr(8)
8113 .kr(1)
8114 .sr(1)
8115 .m(6)
8116 .n(8)
8117 .k(4)
8118 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008119 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008120 }
8121
Marat Dukhande06f492020-04-09 00:19:31 -07008122 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008123 TEST_REQUIRES_ARM_NEON_FMA;
8124 GemmMicrokernelTester()
8125 .mr(6)
8126 .nr(8)
8127 .kr(1)
8128 .sr(1)
8129 .m(6)
8130 .n(8)
8131 .k(4)
8132 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008133 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008134 }
8135#endif // XNN_ARCH_ARM64
8136
8137
8138#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07008139 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008140 TEST_REQUIRES_ARM_NEON;
8141 GemmMicrokernelTester()
8142 .mr(1)
8143 .nr(8)
8144 .kr(1)
8145 .sr(1)
8146 .m(1)
8147 .n(8)
8148 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008149 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008150 }
8151
Marat Dukhande06f492020-04-09 00:19:31 -07008152 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008153 TEST_REQUIRES_ARM_NEON;
8154 GemmMicrokernelTester()
8155 .mr(1)
8156 .nr(8)
8157 .kr(1)
8158 .sr(1)
8159 .m(1)
8160 .n(8)
8161 .k(2)
8162 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008163 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008164 }
8165
Marat Dukhande06f492020-04-09 00:19:31 -07008166 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008167 TEST_REQUIRES_ARM_NEON;
8168 GemmMicrokernelTester()
8169 .mr(1)
8170 .nr(8)
8171 .kr(1)
8172 .sr(1)
8173 .m(1)
8174 .n(8)
8175 .k(2)
8176 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008177 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008178 }
8179
Marat Dukhande06f492020-04-09 00:19:31 -07008180 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008181 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008182 for (uint32_t n = 1; n <= 8; n++) {
8183 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008184 GemmMicrokernelTester()
8185 .mr(1)
8186 .nr(8)
8187 .kr(1)
8188 .sr(1)
8189 .m(m)
8190 .n(n)
8191 .k(2)
8192 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008193 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008194 }
8195 }
8196 }
8197
Marat Dukhande06f492020-04-09 00:19:31 -07008198 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008199 TEST_REQUIRES_ARM_NEON;
8200 for (uint32_t m = 1; m <= 1; m++) {
8201 GemmMicrokernelTester()
8202 .mr(1)
8203 .nr(8)
8204 .kr(1)
8205 .sr(1)
8206 .m(m)
8207 .n(8)
8208 .k(2)
8209 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008210 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008211 }
8212 }
8213
Marat Dukhande06f492020-04-09 00:19:31 -07008214 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008215 TEST_REQUIRES_ARM_NEON;
8216 for (uint32_t n = 1; n <= 8; n++) {
8217 GemmMicrokernelTester()
8218 .mr(1)
8219 .nr(8)
8220 .kr(1)
8221 .sr(1)
8222 .m(1)
8223 .n(n)
8224 .k(2)
8225 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008226 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008227 }
8228 }
8229
Marat Dukhande06f492020-04-09 00:19:31 -07008230 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008231 TEST_REQUIRES_ARM_NEON;
8232 for (size_t k = 1; k < 2; k++) {
8233 GemmMicrokernelTester()
8234 .mr(1)
8235 .nr(8)
8236 .kr(1)
8237 .sr(1)
8238 .m(1)
8239 .n(8)
8240 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008241 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008242 }
8243 }
8244
Marat Dukhande06f492020-04-09 00:19:31 -07008245 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008246 TEST_REQUIRES_ARM_NEON;
8247 for (size_t k = 1; k < 2; k++) {
8248 GemmMicrokernelTester()
8249 .mr(1)
8250 .nr(8)
8251 .kr(1)
8252 .sr(1)
8253 .m(1)
8254 .n(8)
8255 .k(k)
8256 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008257 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008258 }
8259 }
8260
Marat Dukhande06f492020-04-09 00:19:31 -07008261 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008262 TEST_REQUIRES_ARM_NEON;
8263 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008264 for (uint32_t n = 1; n <= 8; n++) {
8265 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008266 GemmMicrokernelTester()
8267 .mr(1)
8268 .nr(8)
8269 .kr(1)
8270 .sr(1)
8271 .m(m)
8272 .n(n)
8273 .k(k)
8274 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008275 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008276 }
8277 }
8278 }
8279 }
8280
Marat Dukhande06f492020-04-09 00:19:31 -07008281 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008282 TEST_REQUIRES_ARM_NEON;
8283 for (size_t k = 3; k < 4; k++) {
8284 GemmMicrokernelTester()
8285 .mr(1)
8286 .nr(8)
8287 .kr(1)
8288 .sr(1)
8289 .m(1)
8290 .n(8)
8291 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008292 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008293 }
8294 }
8295
Marat Dukhande06f492020-04-09 00:19:31 -07008296 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008297 TEST_REQUIRES_ARM_NEON;
8298 for (size_t k = 3; k < 4; k++) {
8299 GemmMicrokernelTester()
8300 .mr(1)
8301 .nr(8)
8302 .kr(1)
8303 .sr(1)
8304 .m(1)
8305 .n(8)
8306 .k(k)
8307 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008308 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008309 }
8310 }
8311
Marat Dukhande06f492020-04-09 00:19:31 -07008312 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008313 TEST_REQUIRES_ARM_NEON;
8314 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008315 for (uint32_t n = 1; n <= 8; n++) {
8316 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008317 GemmMicrokernelTester()
8318 .mr(1)
8319 .nr(8)
8320 .kr(1)
8321 .sr(1)
8322 .m(m)
8323 .n(n)
8324 .k(k)
8325 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008326 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008327 }
8328 }
8329 }
8330 }
8331
Marat Dukhande06f492020-04-09 00:19:31 -07008332 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008333 TEST_REQUIRES_ARM_NEON;
8334 for (size_t k = 4; k <= 20; k += 2) {
8335 GemmMicrokernelTester()
8336 .mr(1)
8337 .nr(8)
8338 .kr(1)
8339 .sr(1)
8340 .m(1)
8341 .n(8)
8342 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008343 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008344 }
8345 }
8346
Marat Dukhande06f492020-04-09 00:19:31 -07008347 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008348 TEST_REQUIRES_ARM_NEON;
8349 for (size_t k = 4; k <= 20; k += 2) {
8350 GemmMicrokernelTester()
8351 .mr(1)
8352 .nr(8)
8353 .kr(1)
8354 .sr(1)
8355 .m(1)
8356 .n(8)
8357 .k(k)
8358 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008359 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008360 }
8361 }
8362
Marat Dukhande06f492020-04-09 00:19:31 -07008363 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008364 TEST_REQUIRES_ARM_NEON;
8365 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008366 for (uint32_t n = 1; n <= 8; n++) {
8367 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008368 GemmMicrokernelTester()
8369 .mr(1)
8370 .nr(8)
8371 .kr(1)
8372 .sr(1)
8373 .m(m)
8374 .n(n)
8375 .k(k)
8376 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008377 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008378 }
8379 }
8380 }
8381 }
8382
Marat Dukhande06f492020-04-09 00:19:31 -07008383 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008384 TEST_REQUIRES_ARM_NEON;
8385 for (uint32_t n = 9; n < 16; n++) {
8386 for (size_t k = 1; k <= 10; k += 3) {
8387 GemmMicrokernelTester()
8388 .mr(1)
8389 .nr(8)
8390 .kr(1)
8391 .sr(1)
8392 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008393 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008394 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008395 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008396 }
8397 }
8398 }
8399
Marat Dukhande06f492020-04-09 00:19:31 -07008400 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008401 TEST_REQUIRES_ARM_NEON;
8402 for (uint32_t n = 9; n < 16; n++) {
8403 for (size_t k = 1; k <= 10; k += 3) {
8404 GemmMicrokernelTester()
8405 .mr(1)
8406 .nr(8)
8407 .kr(1)
8408 .sr(1)
8409 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008410 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008411 .k(k)
8412 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008413 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008414 }
8415 }
8416 }
8417
Marat Dukhande06f492020-04-09 00:19:31 -07008418 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008419 TEST_REQUIRES_ARM_NEON;
8420 for (uint32_t n = 9; n < 16; n++) {
8421 for (size_t k = 1; k <= 10; k += 3) {
8422 GemmMicrokernelTester()
8423 .mr(1)
8424 .nr(8)
8425 .kr(1)
8426 .sr(1)
8427 .m(1)
8428 .n(n)
8429 .k(k)
8430 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008431 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008432 }
8433 }
8434 }
8435
Marat Dukhande06f492020-04-09 00:19:31 -07008436 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008437 TEST_REQUIRES_ARM_NEON;
8438 for (uint32_t n = 9; n < 16; n++) {
8439 for (size_t k = 1; k <= 10; k += 3) {
8440 for (uint32_t m = 1; m <= 1; m++) {
8441 GemmMicrokernelTester()
8442 .mr(1)
8443 .nr(8)
8444 .kr(1)
8445 .sr(1)
8446 .m(m)
8447 .n(n)
8448 .k(k)
8449 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008450 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008451 }
8452 }
8453 }
8454 }
8455
Marat Dukhande06f492020-04-09 00:19:31 -07008456 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008457 TEST_REQUIRES_ARM_NEON;
8458 for (uint32_t n = 16; n <= 24; n += 8) {
8459 for (size_t k = 1; k <= 10; k += 3) {
8460 GemmMicrokernelTester()
8461 .mr(1)
8462 .nr(8)
8463 .kr(1)
8464 .sr(1)
8465 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008466 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008467 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008468 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008469 }
8470 }
8471 }
8472
Marat Dukhande06f492020-04-09 00:19:31 -07008473 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008474 TEST_REQUIRES_ARM_NEON;
8475 for (uint32_t n = 16; n <= 24; n += 8) {
8476 for (size_t k = 1; k <= 10; k += 3) {
8477 GemmMicrokernelTester()
8478 .mr(1)
8479 .nr(8)
8480 .kr(1)
8481 .sr(1)
8482 .m(1)
8483 .n(n)
8484 .k(k)
8485 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008486 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008487 }
8488 }
8489 }
8490
Marat Dukhande06f492020-04-09 00:19:31 -07008491 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008492 TEST_REQUIRES_ARM_NEON;
8493 for (uint32_t n = 16; n <= 24; n += 8) {
8494 for (size_t k = 1; k <= 10; k += 3) {
8495 GemmMicrokernelTester()
8496 .mr(1)
8497 .nr(8)
8498 .kr(1)
8499 .sr(1)
8500 .m(1)
8501 .n(n)
8502 .k(k)
8503 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008504 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008505 }
8506 }
8507 }
8508
Marat Dukhande06f492020-04-09 00:19:31 -07008509 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008510 TEST_REQUIRES_ARM_NEON;
8511 for (uint32_t n = 16; n <= 24; n += 8) {
8512 for (size_t k = 1; k <= 10; k += 3) {
8513 for (uint32_t m = 1; m <= 1; m++) {
8514 GemmMicrokernelTester()
8515 .mr(1)
8516 .nr(8)
8517 .kr(1)
8518 .sr(1)
8519 .m(m)
8520 .n(n)
8521 .k(k)
8522 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008523 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008524 }
8525 }
8526 }
8527 }
8528
Marat Dukhande06f492020-04-09 00:19:31 -07008529 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008530 TEST_REQUIRES_ARM_NEON;
8531 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008532 for (uint32_t n = 1; n <= 8; n++) {
8533 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008534 GemmMicrokernelTester()
8535 .mr(1)
8536 .nr(8)
8537 .kr(1)
8538 .sr(1)
8539 .m(m)
8540 .n(n)
8541 .k(k)
8542 .cm_stride(11)
8543 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008544 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008545 }
8546 }
8547 }
8548 }
8549
Marat Dukhande06f492020-04-09 00:19:31 -07008550 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008551 TEST_REQUIRES_ARM_NEON;
8552 GemmMicrokernelTester()
8553 .mr(1)
8554 .nr(8)
8555 .kr(1)
8556 .sr(1)
8557 .m(1)
8558 .n(8)
8559 .k(2)
8560 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008561 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008562 }
8563
Marat Dukhande06f492020-04-09 00:19:31 -07008564 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008565 TEST_REQUIRES_ARM_NEON;
8566 GemmMicrokernelTester()
8567 .mr(1)
8568 .nr(8)
8569 .kr(1)
8570 .sr(1)
8571 .m(1)
8572 .n(8)
8573 .k(2)
8574 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008575 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008576 }
8577
Marat Dukhande06f492020-04-09 00:19:31 -07008578 TEST(F32_GEMMINC_MINMAX_1X8__NEON_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008579 TEST_REQUIRES_ARM_NEON;
8580 GemmMicrokernelTester()
8581 .mr(1)
8582 .nr(8)
8583 .kr(1)
8584 .sr(1)
8585 .m(1)
8586 .n(8)
8587 .k(2)
8588 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008589 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008590 }
8591#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8592
8593
8594#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07008595 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008596 TEST_REQUIRES_ARM_NEON;
8597 GemmMicrokernelTester()
8598 .mr(6)
8599 .nr(8)
8600 .kr(1)
8601 .sr(1)
8602 .m(6)
8603 .n(8)
8604 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008605 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008606 }
8607
Marat Dukhande06f492020-04-09 00:19:31 -07008608 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008609 TEST_REQUIRES_ARM_NEON;
8610 GemmMicrokernelTester()
8611 .mr(6)
8612 .nr(8)
8613 .kr(1)
8614 .sr(1)
8615 .m(6)
8616 .n(8)
8617 .k(2)
8618 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008619 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008620 }
8621
Marat Dukhande06f492020-04-09 00:19:31 -07008622 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008623 TEST_REQUIRES_ARM_NEON;
8624 GemmMicrokernelTester()
8625 .mr(6)
8626 .nr(8)
8627 .kr(1)
8628 .sr(1)
8629 .m(6)
8630 .n(8)
8631 .k(2)
8632 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008633 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008634 }
8635
Marat Dukhande06f492020-04-09 00:19:31 -07008636 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008637 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008638 for (uint32_t n = 1; n <= 8; n++) {
8639 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008640 GemmMicrokernelTester()
8641 .mr(6)
8642 .nr(8)
8643 .kr(1)
8644 .sr(1)
8645 .m(m)
8646 .n(n)
8647 .k(2)
8648 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008649 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008650 }
8651 }
8652 }
8653
Marat Dukhande06f492020-04-09 00:19:31 -07008654 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008655 TEST_REQUIRES_ARM_NEON;
8656 for (uint32_t m = 1; m <= 6; m++) {
8657 GemmMicrokernelTester()
8658 .mr(6)
8659 .nr(8)
8660 .kr(1)
8661 .sr(1)
8662 .m(m)
8663 .n(8)
8664 .k(2)
8665 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008666 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008667 }
8668 }
8669
Marat Dukhande06f492020-04-09 00:19:31 -07008670 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008671 TEST_REQUIRES_ARM_NEON;
8672 for (uint32_t n = 1; n <= 8; n++) {
8673 GemmMicrokernelTester()
8674 .mr(6)
8675 .nr(8)
8676 .kr(1)
8677 .sr(1)
8678 .m(6)
8679 .n(n)
8680 .k(2)
8681 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008682 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008683 }
8684 }
8685
Marat Dukhande06f492020-04-09 00:19:31 -07008686 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008687 TEST_REQUIRES_ARM_NEON;
8688 for (size_t k = 1; k < 2; k++) {
8689 GemmMicrokernelTester()
8690 .mr(6)
8691 .nr(8)
8692 .kr(1)
8693 .sr(1)
8694 .m(6)
8695 .n(8)
8696 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008697 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008698 }
8699 }
8700
Marat Dukhande06f492020-04-09 00:19:31 -07008701 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008702 TEST_REQUIRES_ARM_NEON;
8703 for (size_t k = 1; k < 2; k++) {
8704 GemmMicrokernelTester()
8705 .mr(6)
8706 .nr(8)
8707 .kr(1)
8708 .sr(1)
8709 .m(6)
8710 .n(8)
8711 .k(k)
8712 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008713 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008714 }
8715 }
8716
Marat Dukhande06f492020-04-09 00:19:31 -07008717 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008718 TEST_REQUIRES_ARM_NEON;
8719 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008720 for (uint32_t n = 1; n <= 8; n++) {
8721 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008722 GemmMicrokernelTester()
8723 .mr(6)
8724 .nr(8)
8725 .kr(1)
8726 .sr(1)
8727 .m(m)
8728 .n(n)
8729 .k(k)
8730 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008731 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008732 }
8733 }
8734 }
8735 }
8736
Marat Dukhande06f492020-04-09 00:19:31 -07008737 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008738 TEST_REQUIRES_ARM_NEON;
8739 for (size_t k = 3; k < 4; k++) {
8740 GemmMicrokernelTester()
8741 .mr(6)
8742 .nr(8)
8743 .kr(1)
8744 .sr(1)
8745 .m(6)
8746 .n(8)
8747 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008748 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008749 }
8750 }
8751
Marat Dukhande06f492020-04-09 00:19:31 -07008752 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008753 TEST_REQUIRES_ARM_NEON;
8754 for (size_t k = 3; k < 4; k++) {
8755 GemmMicrokernelTester()
8756 .mr(6)
8757 .nr(8)
8758 .kr(1)
8759 .sr(1)
8760 .m(6)
8761 .n(8)
8762 .k(k)
8763 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008764 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008765 }
8766 }
8767
Marat Dukhande06f492020-04-09 00:19:31 -07008768 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008769 TEST_REQUIRES_ARM_NEON;
8770 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008771 for (uint32_t n = 1; n <= 8; n++) {
8772 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008773 GemmMicrokernelTester()
8774 .mr(6)
8775 .nr(8)
8776 .kr(1)
8777 .sr(1)
8778 .m(m)
8779 .n(n)
8780 .k(k)
8781 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008782 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008783 }
8784 }
8785 }
8786 }
8787
Marat Dukhande06f492020-04-09 00:19:31 -07008788 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008789 TEST_REQUIRES_ARM_NEON;
8790 for (size_t k = 4; k <= 20; k += 2) {
8791 GemmMicrokernelTester()
8792 .mr(6)
8793 .nr(8)
8794 .kr(1)
8795 .sr(1)
8796 .m(6)
8797 .n(8)
8798 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008799 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008800 }
8801 }
8802
Marat Dukhande06f492020-04-09 00:19:31 -07008803 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008804 TEST_REQUIRES_ARM_NEON;
8805 for (size_t k = 4; k <= 20; k += 2) {
8806 GemmMicrokernelTester()
8807 .mr(6)
8808 .nr(8)
8809 .kr(1)
8810 .sr(1)
8811 .m(6)
8812 .n(8)
8813 .k(k)
8814 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008815 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008816 }
8817 }
8818
Marat Dukhande06f492020-04-09 00:19:31 -07008819 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008820 TEST_REQUIRES_ARM_NEON;
8821 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008822 for (uint32_t n = 1; n <= 8; n++) {
8823 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008824 GemmMicrokernelTester()
8825 .mr(6)
8826 .nr(8)
8827 .kr(1)
8828 .sr(1)
8829 .m(m)
8830 .n(n)
8831 .k(k)
8832 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008833 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008834 }
8835 }
8836 }
8837 }
8838
Marat Dukhande06f492020-04-09 00:19:31 -07008839 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008840 TEST_REQUIRES_ARM_NEON;
8841 for (uint32_t n = 9; n < 16; n++) {
8842 for (size_t k = 1; k <= 10; k += 3) {
8843 GemmMicrokernelTester()
8844 .mr(6)
8845 .nr(8)
8846 .kr(1)
8847 .sr(1)
8848 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008849 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008850 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008851 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008852 }
8853 }
8854 }
8855
Marat Dukhande06f492020-04-09 00:19:31 -07008856 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008857 TEST_REQUIRES_ARM_NEON;
8858 for (uint32_t n = 9; n < 16; n++) {
8859 for (size_t k = 1; k <= 10; k += 3) {
8860 GemmMicrokernelTester()
8861 .mr(6)
8862 .nr(8)
8863 .kr(1)
8864 .sr(1)
8865 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008866 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008867 .k(k)
8868 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008869 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008870 }
8871 }
8872 }
8873
Marat Dukhande06f492020-04-09 00:19:31 -07008874 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008875 TEST_REQUIRES_ARM_NEON;
8876 for (uint32_t n = 9; n < 16; n++) {
8877 for (size_t k = 1; k <= 10; k += 3) {
8878 GemmMicrokernelTester()
8879 .mr(6)
8880 .nr(8)
8881 .kr(1)
8882 .sr(1)
8883 .m(6)
8884 .n(n)
8885 .k(k)
8886 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008887 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008888 }
8889 }
8890 }
8891
Marat Dukhande06f492020-04-09 00:19:31 -07008892 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008893 TEST_REQUIRES_ARM_NEON;
8894 for (uint32_t n = 9; n < 16; n++) {
8895 for (size_t k = 1; k <= 10; k += 3) {
8896 for (uint32_t m = 1; m <= 6; m++) {
8897 GemmMicrokernelTester()
8898 .mr(6)
8899 .nr(8)
8900 .kr(1)
8901 .sr(1)
8902 .m(m)
8903 .n(n)
8904 .k(k)
8905 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008906 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008907 }
8908 }
8909 }
8910 }
8911
Marat Dukhande06f492020-04-09 00:19:31 -07008912 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008913 TEST_REQUIRES_ARM_NEON;
8914 for (uint32_t n = 16; n <= 24; n += 8) {
8915 for (size_t k = 1; k <= 10; k += 3) {
8916 GemmMicrokernelTester()
8917 .mr(6)
8918 .nr(8)
8919 .kr(1)
8920 .sr(1)
8921 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008922 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008923 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008924 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008925 }
8926 }
8927 }
8928
Marat Dukhande06f492020-04-09 00:19:31 -07008929 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008930 TEST_REQUIRES_ARM_NEON;
8931 for (uint32_t n = 16; n <= 24; n += 8) {
8932 for (size_t k = 1; k <= 10; k += 3) {
8933 GemmMicrokernelTester()
8934 .mr(6)
8935 .nr(8)
8936 .kr(1)
8937 .sr(1)
8938 .m(6)
8939 .n(n)
8940 .k(k)
8941 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008942 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008943 }
8944 }
8945 }
8946
Marat Dukhande06f492020-04-09 00:19:31 -07008947 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008948 TEST_REQUIRES_ARM_NEON;
8949 for (uint32_t n = 16; n <= 24; n += 8) {
8950 for (size_t k = 1; k <= 10; k += 3) {
8951 GemmMicrokernelTester()
8952 .mr(6)
8953 .nr(8)
8954 .kr(1)
8955 .sr(1)
8956 .m(6)
8957 .n(n)
8958 .k(k)
8959 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008960 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008961 }
8962 }
8963 }
8964
Marat Dukhande06f492020-04-09 00:19:31 -07008965 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008966 TEST_REQUIRES_ARM_NEON;
8967 for (uint32_t n = 16; n <= 24; n += 8) {
8968 for (size_t k = 1; k <= 10; k += 3) {
8969 for (uint32_t m = 1; m <= 6; m++) {
8970 GemmMicrokernelTester()
8971 .mr(6)
8972 .nr(8)
8973 .kr(1)
8974 .sr(1)
8975 .m(m)
8976 .n(n)
8977 .k(k)
8978 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008979 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008980 }
8981 }
8982 }
8983 }
8984
Marat Dukhande06f492020-04-09 00:19:31 -07008985 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008986 TEST_REQUIRES_ARM_NEON;
8987 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008988 for (uint32_t n = 1; n <= 8; n++) {
8989 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008990 GemmMicrokernelTester()
8991 .mr(6)
8992 .nr(8)
8993 .kr(1)
8994 .sr(1)
8995 .m(m)
8996 .n(n)
8997 .k(k)
8998 .cm_stride(11)
8999 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009000 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009001 }
9002 }
9003 }
9004 }
9005
Marat Dukhande06f492020-04-09 00:19:31 -07009006 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009007 TEST_REQUIRES_ARM_NEON;
9008 GemmMicrokernelTester()
9009 .mr(6)
9010 .nr(8)
9011 .kr(1)
9012 .sr(1)
9013 .m(6)
9014 .n(8)
9015 .k(2)
9016 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009017 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009018 }
9019
Marat Dukhande06f492020-04-09 00:19:31 -07009020 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009021 TEST_REQUIRES_ARM_NEON;
9022 GemmMicrokernelTester()
9023 .mr(6)
9024 .nr(8)
9025 .kr(1)
9026 .sr(1)
9027 .m(6)
9028 .n(8)
9029 .k(2)
9030 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009031 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009032 }
9033
Marat Dukhande06f492020-04-09 00:19:31 -07009034 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009035 TEST_REQUIRES_ARM_NEON;
9036 GemmMicrokernelTester()
9037 .mr(6)
9038 .nr(8)
9039 .kr(1)
9040 .sr(1)
9041 .m(6)
9042 .n(8)
9043 .k(2)
9044 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009045 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009046 }
9047#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9048
9049
9050#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009051 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009052 TEST_REQUIRES_ARM_NEON;
9053 GemmMicrokernelTester()
9054 .mr(6)
9055 .nr(8)
9056 .kr(1)
9057 .sr(1)
9058 .m(6)
9059 .n(8)
9060 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009061 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009062 }
9063
Marat Dukhande06f492020-04-09 00:19:31 -07009064 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009065 TEST_REQUIRES_ARM_NEON;
9066 GemmMicrokernelTester()
9067 .mr(6)
9068 .nr(8)
9069 .kr(1)
9070 .sr(1)
9071 .m(6)
9072 .n(8)
9073 .k(4)
9074 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009075 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009076 }
9077
Marat Dukhande06f492020-04-09 00:19:31 -07009078 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009079 TEST_REQUIRES_ARM_NEON;
9080 GemmMicrokernelTester()
9081 .mr(6)
9082 .nr(8)
9083 .kr(1)
9084 .sr(1)
9085 .m(6)
9086 .n(8)
9087 .k(4)
9088 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009089 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009090 }
9091
Marat Dukhande06f492020-04-09 00:19:31 -07009092 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009093 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009094 for (uint32_t n = 1; n <= 8; n++) {
9095 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009096 GemmMicrokernelTester()
9097 .mr(6)
9098 .nr(8)
9099 .kr(1)
9100 .sr(1)
9101 .m(m)
9102 .n(n)
9103 .k(4)
9104 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009105 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009106 }
9107 }
9108 }
9109
Marat Dukhande06f492020-04-09 00:19:31 -07009110 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009111 TEST_REQUIRES_ARM_NEON;
9112 for (uint32_t m = 1; m <= 6; m++) {
9113 GemmMicrokernelTester()
9114 .mr(6)
9115 .nr(8)
9116 .kr(1)
9117 .sr(1)
9118 .m(m)
9119 .n(8)
9120 .k(4)
9121 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009122 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009123 }
9124 }
9125
Marat Dukhande06f492020-04-09 00:19:31 -07009126 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009127 TEST_REQUIRES_ARM_NEON;
9128 for (uint32_t n = 1; n <= 8; n++) {
9129 GemmMicrokernelTester()
9130 .mr(6)
9131 .nr(8)
9132 .kr(1)
9133 .sr(1)
9134 .m(6)
9135 .n(n)
9136 .k(4)
9137 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009138 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009139 }
9140 }
9141
Marat Dukhande06f492020-04-09 00:19:31 -07009142 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009143 TEST_REQUIRES_ARM_NEON;
9144 for (size_t k = 1; k < 4; k++) {
9145 GemmMicrokernelTester()
9146 .mr(6)
9147 .nr(8)
9148 .kr(1)
9149 .sr(1)
9150 .m(6)
9151 .n(8)
9152 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009153 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009154 }
9155 }
9156
Marat Dukhande06f492020-04-09 00:19:31 -07009157 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009158 TEST_REQUIRES_ARM_NEON;
9159 for (size_t k = 1; k < 4; k++) {
9160 GemmMicrokernelTester()
9161 .mr(6)
9162 .nr(8)
9163 .kr(1)
9164 .sr(1)
9165 .m(6)
9166 .n(8)
9167 .k(k)
9168 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009169 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009170 }
9171 }
9172
Marat Dukhande06f492020-04-09 00:19:31 -07009173 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009174 TEST_REQUIRES_ARM_NEON;
9175 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009176 for (uint32_t n = 1; n <= 8; n++) {
9177 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009178 GemmMicrokernelTester()
9179 .mr(6)
9180 .nr(8)
9181 .kr(1)
9182 .sr(1)
9183 .m(m)
9184 .n(n)
9185 .k(k)
9186 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009187 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009188 }
9189 }
9190 }
9191 }
9192
Marat Dukhande06f492020-04-09 00:19:31 -07009193 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009194 TEST_REQUIRES_ARM_NEON;
9195 for (size_t k = 5; k < 8; k++) {
9196 GemmMicrokernelTester()
9197 .mr(6)
9198 .nr(8)
9199 .kr(1)
9200 .sr(1)
9201 .m(6)
9202 .n(8)
9203 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009204 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009205 }
9206 }
9207
Marat Dukhande06f492020-04-09 00:19:31 -07009208 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009209 TEST_REQUIRES_ARM_NEON;
9210 for (size_t k = 5; k < 8; k++) {
9211 GemmMicrokernelTester()
9212 .mr(6)
9213 .nr(8)
9214 .kr(1)
9215 .sr(1)
9216 .m(6)
9217 .n(8)
9218 .k(k)
9219 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009220 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009221 }
9222 }
9223
Marat Dukhande06f492020-04-09 00:19:31 -07009224 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009225 TEST_REQUIRES_ARM_NEON;
9226 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009227 for (uint32_t n = 1; n <= 8; n++) {
9228 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009229 GemmMicrokernelTester()
9230 .mr(6)
9231 .nr(8)
9232 .kr(1)
9233 .sr(1)
9234 .m(m)
9235 .n(n)
9236 .k(k)
9237 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009238 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009239 }
9240 }
9241 }
9242 }
9243
Marat Dukhande06f492020-04-09 00:19:31 -07009244 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009245 TEST_REQUIRES_ARM_NEON;
9246 for (size_t k = 8; k <= 40; k += 4) {
9247 GemmMicrokernelTester()
9248 .mr(6)
9249 .nr(8)
9250 .kr(1)
9251 .sr(1)
9252 .m(6)
9253 .n(8)
9254 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009255 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009256 }
9257 }
9258
Marat Dukhande06f492020-04-09 00:19:31 -07009259 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009260 TEST_REQUIRES_ARM_NEON;
9261 for (size_t k = 8; k <= 40; k += 4) {
9262 GemmMicrokernelTester()
9263 .mr(6)
9264 .nr(8)
9265 .kr(1)
9266 .sr(1)
9267 .m(6)
9268 .n(8)
9269 .k(k)
9270 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009271 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009272 }
9273 }
9274
Marat Dukhande06f492020-04-09 00:19:31 -07009275 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009276 TEST_REQUIRES_ARM_NEON;
9277 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009278 for (uint32_t n = 1; n <= 8; n++) {
9279 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009280 GemmMicrokernelTester()
9281 .mr(6)
9282 .nr(8)
9283 .kr(1)
9284 .sr(1)
9285 .m(m)
9286 .n(n)
9287 .k(k)
9288 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009289 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009290 }
9291 }
9292 }
9293 }
9294
Marat Dukhande06f492020-04-09 00:19:31 -07009295 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009296 TEST_REQUIRES_ARM_NEON;
9297 for (uint32_t n = 9; n < 16; n++) {
9298 for (size_t k = 1; k <= 20; k += 5) {
9299 GemmMicrokernelTester()
9300 .mr(6)
9301 .nr(8)
9302 .kr(1)
9303 .sr(1)
9304 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009305 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009306 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009307 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009308 }
9309 }
9310 }
9311
Marat Dukhande06f492020-04-09 00:19:31 -07009312 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009313 TEST_REQUIRES_ARM_NEON;
9314 for (uint32_t n = 9; n < 16; n++) {
9315 for (size_t k = 1; k <= 20; k += 5) {
9316 GemmMicrokernelTester()
9317 .mr(6)
9318 .nr(8)
9319 .kr(1)
9320 .sr(1)
9321 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009322 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009323 .k(k)
9324 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009325 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009326 }
9327 }
9328 }
9329
Marat Dukhande06f492020-04-09 00:19:31 -07009330 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009331 TEST_REQUIRES_ARM_NEON;
9332 for (uint32_t n = 9; n < 16; n++) {
9333 for (size_t k = 1; k <= 20; k += 5) {
9334 GemmMicrokernelTester()
9335 .mr(6)
9336 .nr(8)
9337 .kr(1)
9338 .sr(1)
9339 .m(6)
9340 .n(n)
9341 .k(k)
9342 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009343 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009344 }
9345 }
9346 }
9347
Marat Dukhande06f492020-04-09 00:19:31 -07009348 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009349 TEST_REQUIRES_ARM_NEON;
9350 for (uint32_t n = 9; n < 16; n++) {
9351 for (size_t k = 1; k <= 20; k += 5) {
9352 for (uint32_t m = 1; m <= 6; m++) {
9353 GemmMicrokernelTester()
9354 .mr(6)
9355 .nr(8)
9356 .kr(1)
9357 .sr(1)
9358 .m(m)
9359 .n(n)
9360 .k(k)
9361 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009362 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009363 }
9364 }
9365 }
9366 }
9367
Marat Dukhande06f492020-04-09 00:19:31 -07009368 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009369 TEST_REQUIRES_ARM_NEON;
9370 for (uint32_t n = 16; n <= 24; n += 8) {
9371 for (size_t k = 1; k <= 20; k += 5) {
9372 GemmMicrokernelTester()
9373 .mr(6)
9374 .nr(8)
9375 .kr(1)
9376 .sr(1)
9377 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009378 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009379 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009380 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009381 }
9382 }
9383 }
9384
Marat Dukhande06f492020-04-09 00:19:31 -07009385 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009386 TEST_REQUIRES_ARM_NEON;
9387 for (uint32_t n = 16; n <= 24; n += 8) {
9388 for (size_t k = 1; k <= 20; k += 5) {
9389 GemmMicrokernelTester()
9390 .mr(6)
9391 .nr(8)
9392 .kr(1)
9393 .sr(1)
9394 .m(6)
9395 .n(n)
9396 .k(k)
9397 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009398 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009399 }
9400 }
9401 }
9402
Marat Dukhande06f492020-04-09 00:19:31 -07009403 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009404 TEST_REQUIRES_ARM_NEON;
9405 for (uint32_t n = 16; n <= 24; n += 8) {
9406 for (size_t k = 1; k <= 20; k += 5) {
9407 GemmMicrokernelTester()
9408 .mr(6)
9409 .nr(8)
9410 .kr(1)
9411 .sr(1)
9412 .m(6)
9413 .n(n)
9414 .k(k)
9415 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009416 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009417 }
9418 }
9419 }
9420
Marat Dukhande06f492020-04-09 00:19:31 -07009421 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009422 TEST_REQUIRES_ARM_NEON;
9423 for (uint32_t n = 16; n <= 24; n += 8) {
9424 for (size_t k = 1; k <= 20; k += 5) {
9425 for (uint32_t m = 1; m <= 6; m++) {
9426 GemmMicrokernelTester()
9427 .mr(6)
9428 .nr(8)
9429 .kr(1)
9430 .sr(1)
9431 .m(m)
9432 .n(n)
9433 .k(k)
9434 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009435 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009436 }
9437 }
9438 }
9439 }
9440
Marat Dukhande06f492020-04-09 00:19:31 -07009441 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009442 TEST_REQUIRES_ARM_NEON;
9443 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009444 for (uint32_t n = 1; n <= 8; n++) {
9445 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009446 GemmMicrokernelTester()
9447 .mr(6)
9448 .nr(8)
9449 .kr(1)
9450 .sr(1)
9451 .m(m)
9452 .n(n)
9453 .k(k)
9454 .cm_stride(11)
9455 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009456 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009457 }
9458 }
9459 }
9460 }
9461
Marat Dukhande06f492020-04-09 00:19:31 -07009462 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009463 TEST_REQUIRES_ARM_NEON;
9464 GemmMicrokernelTester()
9465 .mr(6)
9466 .nr(8)
9467 .kr(1)
9468 .sr(1)
9469 .m(6)
9470 .n(8)
9471 .k(4)
9472 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009473 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009474 }
9475
Marat Dukhande06f492020-04-09 00:19:31 -07009476 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009477 TEST_REQUIRES_ARM_NEON;
9478 GemmMicrokernelTester()
9479 .mr(6)
9480 .nr(8)
9481 .kr(1)
9482 .sr(1)
9483 .m(6)
9484 .n(8)
9485 .k(4)
9486 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009487 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009488 }
9489
Marat Dukhande06f492020-04-09 00:19:31 -07009490 TEST(F32_GEMMINC_MINMAX_6X8__NEON_DUP_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009491 TEST_REQUIRES_ARM_NEON;
9492 GemmMicrokernelTester()
9493 .mr(6)
9494 .nr(8)
9495 .kr(1)
9496 .sr(1)
9497 .m(6)
9498 .n(8)
9499 .k(4)
9500 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009501 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009502 }
9503#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9504
9505
9506#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009507 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009508 TEST_REQUIRES_ARM_NEON_FMA;
9509 GemmMicrokernelTester()
9510 .mr(4)
9511 .nr(8)
9512 .kr(1)
9513 .sr(1)
9514 .m(4)
9515 .n(8)
9516 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009517 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009518 }
9519
Marat Dukhande06f492020-04-09 00:19:31 -07009520 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009521 TEST_REQUIRES_ARM_NEON_FMA;
9522 GemmMicrokernelTester()
9523 .mr(4)
9524 .nr(8)
9525 .kr(1)
9526 .sr(1)
9527 .m(4)
9528 .n(8)
9529 .k(2)
9530 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009531 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009532 }
9533
Marat Dukhande06f492020-04-09 00:19:31 -07009534 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009535 TEST_REQUIRES_ARM_NEON_FMA;
9536 GemmMicrokernelTester()
9537 .mr(4)
9538 .nr(8)
9539 .kr(1)
9540 .sr(1)
9541 .m(4)
9542 .n(8)
9543 .k(2)
9544 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009545 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009546 }
9547
Marat Dukhande06f492020-04-09 00:19:31 -07009548 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009549 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009550 for (uint32_t n = 1; n <= 8; n++) {
9551 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009552 GemmMicrokernelTester()
9553 .mr(4)
9554 .nr(8)
9555 .kr(1)
9556 .sr(1)
9557 .m(m)
9558 .n(n)
9559 .k(2)
9560 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009561 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009562 }
9563 }
9564 }
9565
Marat Dukhande06f492020-04-09 00:19:31 -07009566 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009567 TEST_REQUIRES_ARM_NEON_FMA;
9568 for (uint32_t m = 1; m <= 4; m++) {
9569 GemmMicrokernelTester()
9570 .mr(4)
9571 .nr(8)
9572 .kr(1)
9573 .sr(1)
9574 .m(m)
9575 .n(8)
9576 .k(2)
9577 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009578 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009579 }
9580 }
9581
Marat Dukhande06f492020-04-09 00:19:31 -07009582 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009583 TEST_REQUIRES_ARM_NEON_FMA;
9584 for (uint32_t n = 1; n <= 8; n++) {
9585 GemmMicrokernelTester()
9586 .mr(4)
9587 .nr(8)
9588 .kr(1)
9589 .sr(1)
9590 .m(4)
9591 .n(n)
9592 .k(2)
9593 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009594 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009595 }
9596 }
9597
Marat Dukhande06f492020-04-09 00:19:31 -07009598 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009599 TEST_REQUIRES_ARM_NEON_FMA;
9600 for (size_t k = 1; k < 2; k++) {
9601 GemmMicrokernelTester()
9602 .mr(4)
9603 .nr(8)
9604 .kr(1)
9605 .sr(1)
9606 .m(4)
9607 .n(8)
9608 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009609 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009610 }
9611 }
9612
Marat Dukhande06f492020-04-09 00:19:31 -07009613 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009614 TEST_REQUIRES_ARM_NEON_FMA;
9615 for (size_t k = 1; k < 2; k++) {
9616 GemmMicrokernelTester()
9617 .mr(4)
9618 .nr(8)
9619 .kr(1)
9620 .sr(1)
9621 .m(4)
9622 .n(8)
9623 .k(k)
9624 .a_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009625 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009626 }
9627 }
9628
Marat Dukhande06f492020-04-09 00:19:31 -07009629 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009630 TEST_REQUIRES_ARM_NEON_FMA;
9631 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009632 for (uint32_t n = 1; n <= 8; n++) {
9633 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009634 GemmMicrokernelTester()
9635 .mr(4)
9636 .nr(8)
9637 .kr(1)
9638 .sr(1)
9639 .m(m)
9640 .n(n)
9641 .k(k)
9642 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009643 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009644 }
9645 }
9646 }
9647 }
9648
Marat Dukhande06f492020-04-09 00:19:31 -07009649 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009650 TEST_REQUIRES_ARM_NEON_FMA;
9651 for (size_t k = 3; k < 4; k++) {
9652 GemmMicrokernelTester()
9653 .mr(4)
9654 .nr(8)
9655 .kr(1)
9656 .sr(1)
9657 .m(4)
9658 .n(8)
9659 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009660 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009661 }
9662 }
9663
Marat Dukhande06f492020-04-09 00:19:31 -07009664 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009665 TEST_REQUIRES_ARM_NEON_FMA;
9666 for (size_t k = 3; k < 4; k++) {
9667 GemmMicrokernelTester()
9668 .mr(4)
9669 .nr(8)
9670 .kr(1)
9671 .sr(1)
9672 .m(4)
9673 .n(8)
9674 .k(k)
9675 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009676 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009677 }
9678 }
9679
Marat Dukhande06f492020-04-09 00:19:31 -07009680 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009681 TEST_REQUIRES_ARM_NEON_FMA;
9682 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009683 for (uint32_t n = 1; n <= 8; n++) {
9684 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009685 GemmMicrokernelTester()
9686 .mr(4)
9687 .nr(8)
9688 .kr(1)
9689 .sr(1)
9690 .m(m)
9691 .n(n)
9692 .k(k)
9693 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009694 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009695 }
9696 }
9697 }
9698 }
9699
Marat Dukhande06f492020-04-09 00:19:31 -07009700 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009701 TEST_REQUIRES_ARM_NEON_FMA;
9702 for (size_t k = 4; k <= 20; k += 2) {
9703 GemmMicrokernelTester()
9704 .mr(4)
9705 .nr(8)
9706 .kr(1)
9707 .sr(1)
9708 .m(4)
9709 .n(8)
9710 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009711 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009712 }
9713 }
9714
Marat Dukhande06f492020-04-09 00:19:31 -07009715 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009716 TEST_REQUIRES_ARM_NEON_FMA;
9717 for (size_t k = 4; k <= 20; k += 2) {
9718 GemmMicrokernelTester()
9719 .mr(4)
9720 .nr(8)
9721 .kr(1)
9722 .sr(1)
9723 .m(4)
9724 .n(8)
9725 .k(k)
9726 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009727 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009728 }
9729 }
9730
Marat Dukhande06f492020-04-09 00:19:31 -07009731 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009732 TEST_REQUIRES_ARM_NEON_FMA;
9733 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009734 for (uint32_t n = 1; n <= 8; n++) {
9735 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009736 GemmMicrokernelTester()
9737 .mr(4)
9738 .nr(8)
9739 .kr(1)
9740 .sr(1)
9741 .m(m)
9742 .n(n)
9743 .k(k)
9744 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009745 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009746 }
9747 }
9748 }
9749 }
9750
Marat Dukhande06f492020-04-09 00:19:31 -07009751 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009752 TEST_REQUIRES_ARM_NEON_FMA;
9753 for (uint32_t n = 9; n < 16; n++) {
9754 for (size_t k = 1; k <= 10; k += 3) {
9755 GemmMicrokernelTester()
9756 .mr(4)
9757 .nr(8)
9758 .kr(1)
9759 .sr(1)
9760 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009761 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009762 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009763 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009764 }
9765 }
9766 }
9767
Marat Dukhande06f492020-04-09 00:19:31 -07009768 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009769 TEST_REQUIRES_ARM_NEON_FMA;
9770 for (uint32_t n = 9; n < 16; n++) {
9771 for (size_t k = 1; k <= 10; k += 3) {
9772 GemmMicrokernelTester()
9773 .mr(4)
9774 .nr(8)
9775 .kr(1)
9776 .sr(1)
9777 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009778 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009779 .k(k)
9780 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009781 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009782 }
9783 }
9784 }
9785
Marat Dukhande06f492020-04-09 00:19:31 -07009786 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009787 TEST_REQUIRES_ARM_NEON_FMA;
9788 for (uint32_t n = 9; n < 16; n++) {
9789 for (size_t k = 1; k <= 10; k += 3) {
9790 GemmMicrokernelTester()
9791 .mr(4)
9792 .nr(8)
9793 .kr(1)
9794 .sr(1)
9795 .m(4)
9796 .n(n)
9797 .k(k)
9798 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009799 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009800 }
9801 }
9802 }
9803
Marat Dukhande06f492020-04-09 00:19:31 -07009804 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009805 TEST_REQUIRES_ARM_NEON_FMA;
9806 for (uint32_t n = 9; n < 16; n++) {
9807 for (size_t k = 1; k <= 10; k += 3) {
9808 for (uint32_t m = 1; m <= 4; m++) {
9809 GemmMicrokernelTester()
9810 .mr(4)
9811 .nr(8)
9812 .kr(1)
9813 .sr(1)
9814 .m(m)
9815 .n(n)
9816 .k(k)
9817 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009818 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009819 }
9820 }
9821 }
9822 }
9823
Marat Dukhande06f492020-04-09 00:19:31 -07009824 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009825 TEST_REQUIRES_ARM_NEON_FMA;
9826 for (uint32_t n = 16; n <= 24; n += 8) {
9827 for (size_t k = 1; k <= 10; k += 3) {
9828 GemmMicrokernelTester()
9829 .mr(4)
9830 .nr(8)
9831 .kr(1)
9832 .sr(1)
9833 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009834 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009835 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009836 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009837 }
9838 }
9839 }
9840
Marat Dukhande06f492020-04-09 00:19:31 -07009841 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009842 TEST_REQUIRES_ARM_NEON_FMA;
9843 for (uint32_t n = 16; n <= 24; n += 8) {
9844 for (size_t k = 1; k <= 10; k += 3) {
9845 GemmMicrokernelTester()
9846 .mr(4)
9847 .nr(8)
9848 .kr(1)
9849 .sr(1)
9850 .m(4)
9851 .n(n)
9852 .k(k)
9853 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009854 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009855 }
9856 }
9857 }
9858
Marat Dukhande06f492020-04-09 00:19:31 -07009859 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009860 TEST_REQUIRES_ARM_NEON_FMA;
9861 for (uint32_t n = 16; n <= 24; n += 8) {
9862 for (size_t k = 1; k <= 10; k += 3) {
9863 GemmMicrokernelTester()
9864 .mr(4)
9865 .nr(8)
9866 .kr(1)
9867 .sr(1)
9868 .m(4)
9869 .n(n)
9870 .k(k)
9871 .a_stride(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009872 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009873 }
9874 }
9875 }
9876
Marat Dukhande06f492020-04-09 00:19:31 -07009877 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009878 TEST_REQUIRES_ARM_NEON_FMA;
9879 for (uint32_t n = 16; n <= 24; n += 8) {
9880 for (size_t k = 1; k <= 10; k += 3) {
9881 for (uint32_t m = 1; m <= 4; m++) {
9882 GemmMicrokernelTester()
9883 .mr(4)
9884 .nr(8)
9885 .kr(1)
9886 .sr(1)
9887 .m(m)
9888 .n(n)
9889 .k(k)
9890 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009891 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009892 }
9893 }
9894 }
9895 }
9896
Marat Dukhande06f492020-04-09 00:19:31 -07009897 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009898 TEST_REQUIRES_ARM_NEON_FMA;
9899 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009900 for (uint32_t n = 1; n <= 8; n++) {
9901 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009902 GemmMicrokernelTester()
9903 .mr(4)
9904 .nr(8)
9905 .kr(1)
9906 .sr(1)
9907 .m(m)
9908 .n(n)
9909 .k(k)
9910 .cm_stride(11)
9911 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009912 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009913 }
9914 }
9915 }
9916 }
9917
Marat Dukhande06f492020-04-09 00:19:31 -07009918 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009919 TEST_REQUIRES_ARM_NEON_FMA;
9920 GemmMicrokernelTester()
9921 .mr(4)
9922 .nr(8)
9923 .kr(1)
9924 .sr(1)
9925 .m(4)
9926 .n(8)
9927 .k(2)
9928 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009929 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009930 }
9931
Marat Dukhande06f492020-04-09 00:19:31 -07009932 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009933 TEST_REQUIRES_ARM_NEON_FMA;
9934 GemmMicrokernelTester()
9935 .mr(4)
9936 .nr(8)
9937 .kr(1)
9938 .sr(1)
9939 .m(4)
9940 .n(8)
9941 .k(2)
9942 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009943 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009944 }
9945
Marat Dukhande06f492020-04-09 00:19:31 -07009946 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009947 TEST_REQUIRES_ARM_NEON_FMA;
9948 GemmMicrokernelTester()
9949 .mr(4)
9950 .nr(8)
9951 .kr(1)
9952 .sr(1)
9953 .m(4)
9954 .n(8)
9955 .k(2)
9956 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009957 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009958 }
9959#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9960
9961
9962#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009963 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009964 TEST_REQUIRES_ARM_NEON_FMA;
9965 GemmMicrokernelTester()
9966 .mr(4)
9967 .nr(8)
9968 .kr(1)
9969 .sr(1)
9970 .m(4)
9971 .n(8)
9972 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009973 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009974 }
9975
Marat Dukhande06f492020-04-09 00:19:31 -07009976 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009977 TEST_REQUIRES_ARM_NEON_FMA;
9978 GemmMicrokernelTester()
9979 .mr(4)
9980 .nr(8)
9981 .kr(1)
9982 .sr(1)
9983 .m(4)
9984 .n(8)
9985 .k(4)
9986 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009987 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009988 }
9989
Marat Dukhande06f492020-04-09 00:19:31 -07009990 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009991 TEST_REQUIRES_ARM_NEON_FMA;
9992 GemmMicrokernelTester()
9993 .mr(4)
9994 .nr(8)
9995 .kr(1)
9996 .sr(1)
9997 .m(4)
9998 .n(8)
9999 .k(4)
10000 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010001 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010002 }
10003
Marat Dukhande06f492020-04-09 00:19:31 -070010004 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010005 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010006 for (uint32_t n = 1; n <= 8; n++) {
10007 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010008 GemmMicrokernelTester()
10009 .mr(4)
10010 .nr(8)
10011 .kr(1)
10012 .sr(1)
10013 .m(m)
10014 .n(n)
10015 .k(4)
10016 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010017 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010018 }
10019 }
10020 }
10021
Marat Dukhande06f492020-04-09 00:19:31 -070010022 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010023 TEST_REQUIRES_ARM_NEON_FMA;
10024 for (uint32_t m = 1; m <= 4; m++) {
10025 GemmMicrokernelTester()
10026 .mr(4)
10027 .nr(8)
10028 .kr(1)
10029 .sr(1)
10030 .m(m)
10031 .n(8)
10032 .k(4)
10033 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010034 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010035 }
10036 }
10037
Marat Dukhande06f492020-04-09 00:19:31 -070010038 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010039 TEST_REQUIRES_ARM_NEON_FMA;
10040 for (uint32_t n = 1; n <= 8; n++) {
10041 GemmMicrokernelTester()
10042 .mr(4)
10043 .nr(8)
10044 .kr(1)
10045 .sr(1)
10046 .m(4)
10047 .n(n)
10048 .k(4)
10049 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010050 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010051 }
10052 }
10053
Marat Dukhande06f492020-04-09 00:19:31 -070010054 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010055 TEST_REQUIRES_ARM_NEON_FMA;
10056 for (size_t k = 1; k < 4; k++) {
10057 GemmMicrokernelTester()
10058 .mr(4)
10059 .nr(8)
10060 .kr(1)
10061 .sr(1)
10062 .m(4)
10063 .n(8)
10064 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010065 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010066 }
10067 }
10068
Marat Dukhande06f492020-04-09 00:19:31 -070010069 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010070 TEST_REQUIRES_ARM_NEON_FMA;
10071 for (size_t k = 1; k < 4; k++) {
10072 GemmMicrokernelTester()
10073 .mr(4)
10074 .nr(8)
10075 .kr(1)
10076 .sr(1)
10077 .m(4)
10078 .n(8)
10079 .k(k)
10080 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010081 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010082 }
10083 }
10084
Marat Dukhande06f492020-04-09 00:19:31 -070010085 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010086 TEST_REQUIRES_ARM_NEON_FMA;
10087 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010088 for (uint32_t n = 1; n <= 8; n++) {
10089 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010090 GemmMicrokernelTester()
10091 .mr(4)
10092 .nr(8)
10093 .kr(1)
10094 .sr(1)
10095 .m(m)
10096 .n(n)
10097 .k(k)
10098 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010099 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010100 }
10101 }
10102 }
10103 }
10104
Marat Dukhande06f492020-04-09 00:19:31 -070010105 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010106 TEST_REQUIRES_ARM_NEON_FMA;
10107 for (size_t k = 5; k < 8; k++) {
10108 GemmMicrokernelTester()
10109 .mr(4)
10110 .nr(8)
10111 .kr(1)
10112 .sr(1)
10113 .m(4)
10114 .n(8)
10115 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010116 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010117 }
10118 }
10119
Marat Dukhande06f492020-04-09 00:19:31 -070010120 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010121 TEST_REQUIRES_ARM_NEON_FMA;
10122 for (size_t k = 5; k < 8; k++) {
10123 GemmMicrokernelTester()
10124 .mr(4)
10125 .nr(8)
10126 .kr(1)
10127 .sr(1)
10128 .m(4)
10129 .n(8)
10130 .k(k)
10131 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010132 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010133 }
10134 }
10135
Marat Dukhande06f492020-04-09 00:19:31 -070010136 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010137 TEST_REQUIRES_ARM_NEON_FMA;
10138 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010139 for (uint32_t n = 1; n <= 8; n++) {
10140 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010141 GemmMicrokernelTester()
10142 .mr(4)
10143 .nr(8)
10144 .kr(1)
10145 .sr(1)
10146 .m(m)
10147 .n(n)
10148 .k(k)
10149 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010150 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010151 }
10152 }
10153 }
10154 }
10155
Marat Dukhande06f492020-04-09 00:19:31 -070010156 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010157 TEST_REQUIRES_ARM_NEON_FMA;
10158 for (size_t k = 8; k <= 40; k += 4) {
10159 GemmMicrokernelTester()
10160 .mr(4)
10161 .nr(8)
10162 .kr(1)
10163 .sr(1)
10164 .m(4)
10165 .n(8)
10166 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010167 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010168 }
10169 }
10170
Marat Dukhande06f492020-04-09 00:19:31 -070010171 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010172 TEST_REQUIRES_ARM_NEON_FMA;
10173 for (size_t k = 8; k <= 40; k += 4) {
10174 GemmMicrokernelTester()
10175 .mr(4)
10176 .nr(8)
10177 .kr(1)
10178 .sr(1)
10179 .m(4)
10180 .n(8)
10181 .k(k)
10182 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010183 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010184 }
10185 }
10186
Marat Dukhande06f492020-04-09 00:19:31 -070010187 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010188 TEST_REQUIRES_ARM_NEON_FMA;
10189 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010190 for (uint32_t n = 1; n <= 8; n++) {
10191 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010192 GemmMicrokernelTester()
10193 .mr(4)
10194 .nr(8)
10195 .kr(1)
10196 .sr(1)
10197 .m(m)
10198 .n(n)
10199 .k(k)
10200 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010201 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010202 }
10203 }
10204 }
10205 }
10206
Marat Dukhande06f492020-04-09 00:19:31 -070010207 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010208 TEST_REQUIRES_ARM_NEON_FMA;
10209 for (uint32_t n = 9; n < 16; n++) {
10210 for (size_t k = 1; k <= 20; k += 5) {
10211 GemmMicrokernelTester()
10212 .mr(4)
10213 .nr(8)
10214 .kr(1)
10215 .sr(1)
10216 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010217 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010218 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010219 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010220 }
10221 }
10222 }
10223
Marat Dukhande06f492020-04-09 00:19:31 -070010224 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010225 TEST_REQUIRES_ARM_NEON_FMA;
10226 for (uint32_t n = 9; n < 16; n++) {
10227 for (size_t k = 1; k <= 20; k += 5) {
10228 GemmMicrokernelTester()
10229 .mr(4)
10230 .nr(8)
10231 .kr(1)
10232 .sr(1)
10233 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010234 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010235 .k(k)
10236 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010237 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010238 }
10239 }
10240 }
10241
Marat Dukhande06f492020-04-09 00:19:31 -070010242 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010243 TEST_REQUIRES_ARM_NEON_FMA;
10244 for (uint32_t n = 9; n < 16; n++) {
10245 for (size_t k = 1; k <= 20; k += 5) {
10246 GemmMicrokernelTester()
10247 .mr(4)
10248 .nr(8)
10249 .kr(1)
10250 .sr(1)
10251 .m(4)
10252 .n(n)
10253 .k(k)
10254 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010255 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010256 }
10257 }
10258 }
10259
Marat Dukhande06f492020-04-09 00:19:31 -070010260 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010261 TEST_REQUIRES_ARM_NEON_FMA;
10262 for (uint32_t n = 9; n < 16; n++) {
10263 for (size_t k = 1; k <= 20; k += 5) {
10264 for (uint32_t m = 1; m <= 4; m++) {
10265 GemmMicrokernelTester()
10266 .mr(4)
10267 .nr(8)
10268 .kr(1)
10269 .sr(1)
10270 .m(m)
10271 .n(n)
10272 .k(k)
10273 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010274 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010275 }
10276 }
10277 }
10278 }
10279
Marat Dukhande06f492020-04-09 00:19:31 -070010280 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010281 TEST_REQUIRES_ARM_NEON_FMA;
10282 for (uint32_t n = 16; n <= 24; n += 8) {
10283 for (size_t k = 1; k <= 20; k += 5) {
10284 GemmMicrokernelTester()
10285 .mr(4)
10286 .nr(8)
10287 .kr(1)
10288 .sr(1)
10289 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010290 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010291 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010292 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010293 }
10294 }
10295 }
10296
Marat Dukhande06f492020-04-09 00:19:31 -070010297 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010298 TEST_REQUIRES_ARM_NEON_FMA;
10299 for (uint32_t n = 16; n <= 24; n += 8) {
10300 for (size_t k = 1; k <= 20; k += 5) {
10301 GemmMicrokernelTester()
10302 .mr(4)
10303 .nr(8)
10304 .kr(1)
10305 .sr(1)
10306 .m(4)
10307 .n(n)
10308 .k(k)
10309 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010310 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010311 }
10312 }
10313 }
10314
Marat Dukhande06f492020-04-09 00:19:31 -070010315 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010316 TEST_REQUIRES_ARM_NEON_FMA;
10317 for (uint32_t n = 16; n <= 24; n += 8) {
10318 for (size_t k = 1; k <= 20; k += 5) {
10319 GemmMicrokernelTester()
10320 .mr(4)
10321 .nr(8)
10322 .kr(1)
10323 .sr(1)
10324 .m(4)
10325 .n(n)
10326 .k(k)
10327 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010328 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010329 }
10330 }
10331 }
10332
Marat Dukhande06f492020-04-09 00:19:31 -070010333 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010334 TEST_REQUIRES_ARM_NEON_FMA;
10335 for (uint32_t n = 16; n <= 24; n += 8) {
10336 for (size_t k = 1; k <= 20; k += 5) {
10337 for (uint32_t m = 1; m <= 4; m++) {
10338 GemmMicrokernelTester()
10339 .mr(4)
10340 .nr(8)
10341 .kr(1)
10342 .sr(1)
10343 .m(m)
10344 .n(n)
10345 .k(k)
10346 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010347 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010348 }
10349 }
10350 }
10351 }
10352
Marat Dukhande06f492020-04-09 00:19:31 -070010353 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010354 TEST_REQUIRES_ARM_NEON_FMA;
10355 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010356 for (uint32_t n = 1; n <= 8; n++) {
10357 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010358 GemmMicrokernelTester()
10359 .mr(4)
10360 .nr(8)
10361 .kr(1)
10362 .sr(1)
10363 .m(m)
10364 .n(n)
10365 .k(k)
10366 .cm_stride(11)
10367 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010368 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010369 }
10370 }
10371 }
10372 }
10373
Marat Dukhande06f492020-04-09 00:19:31 -070010374 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010375 TEST_REQUIRES_ARM_NEON_FMA;
10376 GemmMicrokernelTester()
10377 .mr(4)
10378 .nr(8)
10379 .kr(1)
10380 .sr(1)
10381 .m(4)
10382 .n(8)
10383 .k(4)
10384 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010385 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010386 }
10387
Marat Dukhande06f492020-04-09 00:19:31 -070010388 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010389 TEST_REQUIRES_ARM_NEON_FMA;
10390 GemmMicrokernelTester()
10391 .mr(4)
10392 .nr(8)
10393 .kr(1)
10394 .sr(1)
10395 .m(4)
10396 .n(8)
10397 .k(4)
10398 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010399 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010400 }
10401
Marat Dukhande06f492020-04-09 00:19:31 -070010402 TEST(F32_GEMMINC_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010403 TEST_REQUIRES_ARM_NEON_FMA;
10404 GemmMicrokernelTester()
10405 .mr(4)
10406 .nr(8)
10407 .kr(1)
10408 .sr(1)
10409 .m(4)
10410 .n(8)
10411 .k(4)
10412 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010413 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010414 }
10415#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10416
10417
10418#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010419 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010420 TEST_REQUIRES_ARM_NEON_FMA;
10421 GemmMicrokernelTester()
10422 .mr(6)
10423 .nr(8)
10424 .kr(1)
10425 .sr(1)
10426 .m(6)
10427 .n(8)
10428 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010429 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010430 }
10431
Marat Dukhande06f492020-04-09 00:19:31 -070010432 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010433 TEST_REQUIRES_ARM_NEON_FMA;
10434 GemmMicrokernelTester()
10435 .mr(6)
10436 .nr(8)
10437 .kr(1)
10438 .sr(1)
10439 .m(6)
10440 .n(8)
10441 .k(4)
10442 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010443 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010444 }
10445
Marat Dukhande06f492020-04-09 00:19:31 -070010446 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010447 TEST_REQUIRES_ARM_NEON_FMA;
10448 GemmMicrokernelTester()
10449 .mr(6)
10450 .nr(8)
10451 .kr(1)
10452 .sr(1)
10453 .m(6)
10454 .n(8)
10455 .k(4)
10456 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010457 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010458 }
10459
Marat Dukhande06f492020-04-09 00:19:31 -070010460 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010461 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010462 for (uint32_t n = 1; n <= 8; n++) {
10463 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010464 GemmMicrokernelTester()
10465 .mr(6)
10466 .nr(8)
10467 .kr(1)
10468 .sr(1)
10469 .m(m)
10470 .n(n)
10471 .k(4)
10472 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010473 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010474 }
10475 }
10476 }
10477
Marat Dukhande06f492020-04-09 00:19:31 -070010478 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010479 TEST_REQUIRES_ARM_NEON_FMA;
10480 for (uint32_t m = 1; m <= 6; m++) {
10481 GemmMicrokernelTester()
10482 .mr(6)
10483 .nr(8)
10484 .kr(1)
10485 .sr(1)
10486 .m(m)
10487 .n(8)
10488 .k(4)
10489 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010490 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010491 }
10492 }
10493
Marat Dukhande06f492020-04-09 00:19:31 -070010494 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010495 TEST_REQUIRES_ARM_NEON_FMA;
10496 for (uint32_t n = 1; n <= 8; n++) {
10497 GemmMicrokernelTester()
10498 .mr(6)
10499 .nr(8)
10500 .kr(1)
10501 .sr(1)
10502 .m(6)
10503 .n(n)
10504 .k(4)
10505 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010506 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010507 }
10508 }
10509
Marat Dukhande06f492020-04-09 00:19:31 -070010510 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010511 TEST_REQUIRES_ARM_NEON_FMA;
10512 for (size_t k = 1; k < 4; k++) {
10513 GemmMicrokernelTester()
10514 .mr(6)
10515 .nr(8)
10516 .kr(1)
10517 .sr(1)
10518 .m(6)
10519 .n(8)
10520 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010521 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010522 }
10523 }
10524
Marat Dukhande06f492020-04-09 00:19:31 -070010525 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010526 TEST_REQUIRES_ARM_NEON_FMA;
10527 for (size_t k = 1; k < 4; k++) {
10528 GemmMicrokernelTester()
10529 .mr(6)
10530 .nr(8)
10531 .kr(1)
10532 .sr(1)
10533 .m(6)
10534 .n(8)
10535 .k(k)
10536 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010537 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010538 }
10539 }
10540
Marat Dukhande06f492020-04-09 00:19:31 -070010541 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010542 TEST_REQUIRES_ARM_NEON_FMA;
10543 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010544 for (uint32_t n = 1; n <= 8; n++) {
10545 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010546 GemmMicrokernelTester()
10547 .mr(6)
10548 .nr(8)
10549 .kr(1)
10550 .sr(1)
10551 .m(m)
10552 .n(n)
10553 .k(k)
10554 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010555 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010556 }
10557 }
10558 }
10559 }
10560
Marat Dukhande06f492020-04-09 00:19:31 -070010561 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010562 TEST_REQUIRES_ARM_NEON_FMA;
10563 for (size_t k = 5; k < 8; k++) {
10564 GemmMicrokernelTester()
10565 .mr(6)
10566 .nr(8)
10567 .kr(1)
10568 .sr(1)
10569 .m(6)
10570 .n(8)
10571 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010572 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010573 }
10574 }
10575
Marat Dukhande06f492020-04-09 00:19:31 -070010576 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010577 TEST_REQUIRES_ARM_NEON_FMA;
10578 for (size_t k = 5; k < 8; k++) {
10579 GemmMicrokernelTester()
10580 .mr(6)
10581 .nr(8)
10582 .kr(1)
10583 .sr(1)
10584 .m(6)
10585 .n(8)
10586 .k(k)
10587 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010588 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010589 }
10590 }
10591
Marat Dukhande06f492020-04-09 00:19:31 -070010592 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010593 TEST_REQUIRES_ARM_NEON_FMA;
10594 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010595 for (uint32_t n = 1; n <= 8; n++) {
10596 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010597 GemmMicrokernelTester()
10598 .mr(6)
10599 .nr(8)
10600 .kr(1)
10601 .sr(1)
10602 .m(m)
10603 .n(n)
10604 .k(k)
10605 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010606 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010607 }
10608 }
10609 }
10610 }
10611
Marat Dukhande06f492020-04-09 00:19:31 -070010612 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010613 TEST_REQUIRES_ARM_NEON_FMA;
10614 for (size_t k = 8; k <= 40; k += 4) {
10615 GemmMicrokernelTester()
10616 .mr(6)
10617 .nr(8)
10618 .kr(1)
10619 .sr(1)
10620 .m(6)
10621 .n(8)
10622 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010623 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010624 }
10625 }
10626
Marat Dukhande06f492020-04-09 00:19:31 -070010627 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010628 TEST_REQUIRES_ARM_NEON_FMA;
10629 for (size_t k = 8; k <= 40; k += 4) {
10630 GemmMicrokernelTester()
10631 .mr(6)
10632 .nr(8)
10633 .kr(1)
10634 .sr(1)
10635 .m(6)
10636 .n(8)
10637 .k(k)
10638 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010639 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010640 }
10641 }
10642
Marat Dukhande06f492020-04-09 00:19:31 -070010643 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010644 TEST_REQUIRES_ARM_NEON_FMA;
10645 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010646 for (uint32_t n = 1; n <= 8; n++) {
10647 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010648 GemmMicrokernelTester()
10649 .mr(6)
10650 .nr(8)
10651 .kr(1)
10652 .sr(1)
10653 .m(m)
10654 .n(n)
10655 .k(k)
10656 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010657 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010658 }
10659 }
10660 }
10661 }
10662
Marat Dukhande06f492020-04-09 00:19:31 -070010663 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010664 TEST_REQUIRES_ARM_NEON_FMA;
10665 for (uint32_t n = 9; n < 16; n++) {
10666 for (size_t k = 1; k <= 20; k += 5) {
10667 GemmMicrokernelTester()
10668 .mr(6)
10669 .nr(8)
10670 .kr(1)
10671 .sr(1)
10672 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010673 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010674 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010675 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010676 }
10677 }
10678 }
10679
Marat Dukhande06f492020-04-09 00:19:31 -070010680 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010681 TEST_REQUIRES_ARM_NEON_FMA;
10682 for (uint32_t n = 9; n < 16; n++) {
10683 for (size_t k = 1; k <= 20; k += 5) {
10684 GemmMicrokernelTester()
10685 .mr(6)
10686 .nr(8)
10687 .kr(1)
10688 .sr(1)
10689 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010690 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010691 .k(k)
10692 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010693 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010694 }
10695 }
10696 }
10697
Marat Dukhande06f492020-04-09 00:19:31 -070010698 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010699 TEST_REQUIRES_ARM_NEON_FMA;
10700 for (uint32_t n = 9; n < 16; n++) {
10701 for (size_t k = 1; k <= 20; k += 5) {
10702 GemmMicrokernelTester()
10703 .mr(6)
10704 .nr(8)
10705 .kr(1)
10706 .sr(1)
10707 .m(6)
10708 .n(n)
10709 .k(k)
10710 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010711 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010712 }
10713 }
10714 }
10715
Marat Dukhande06f492020-04-09 00:19:31 -070010716 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010717 TEST_REQUIRES_ARM_NEON_FMA;
10718 for (uint32_t n = 9; n < 16; n++) {
10719 for (size_t k = 1; k <= 20; k += 5) {
10720 for (uint32_t m = 1; m <= 6; m++) {
10721 GemmMicrokernelTester()
10722 .mr(6)
10723 .nr(8)
10724 .kr(1)
10725 .sr(1)
10726 .m(m)
10727 .n(n)
10728 .k(k)
10729 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010730 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010731 }
10732 }
10733 }
10734 }
10735
Marat Dukhande06f492020-04-09 00:19:31 -070010736 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010737 TEST_REQUIRES_ARM_NEON_FMA;
10738 for (uint32_t n = 16; n <= 24; n += 8) {
10739 for (size_t k = 1; k <= 20; k += 5) {
10740 GemmMicrokernelTester()
10741 .mr(6)
10742 .nr(8)
10743 .kr(1)
10744 .sr(1)
10745 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010746 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010747 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010748 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010749 }
10750 }
10751 }
10752
Marat Dukhande06f492020-04-09 00:19:31 -070010753 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010754 TEST_REQUIRES_ARM_NEON_FMA;
10755 for (uint32_t n = 16; n <= 24; n += 8) {
10756 for (size_t k = 1; k <= 20; k += 5) {
10757 GemmMicrokernelTester()
10758 .mr(6)
10759 .nr(8)
10760 .kr(1)
10761 .sr(1)
10762 .m(6)
10763 .n(n)
10764 .k(k)
10765 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010766 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010767 }
10768 }
10769 }
10770
Marat Dukhande06f492020-04-09 00:19:31 -070010771 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010772 TEST_REQUIRES_ARM_NEON_FMA;
10773 for (uint32_t n = 16; n <= 24; n += 8) {
10774 for (size_t k = 1; k <= 20; k += 5) {
10775 GemmMicrokernelTester()
10776 .mr(6)
10777 .nr(8)
10778 .kr(1)
10779 .sr(1)
10780 .m(6)
10781 .n(n)
10782 .k(k)
10783 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010784 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010785 }
10786 }
10787 }
10788
Marat Dukhande06f492020-04-09 00:19:31 -070010789 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010790 TEST_REQUIRES_ARM_NEON_FMA;
10791 for (uint32_t n = 16; n <= 24; n += 8) {
10792 for (size_t k = 1; k <= 20; k += 5) {
10793 for (uint32_t m = 1; m <= 6; m++) {
10794 GemmMicrokernelTester()
10795 .mr(6)
10796 .nr(8)
10797 .kr(1)
10798 .sr(1)
10799 .m(m)
10800 .n(n)
10801 .k(k)
10802 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010803 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010804 }
10805 }
10806 }
10807 }
10808
Marat Dukhande06f492020-04-09 00:19:31 -070010809 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010810 TEST_REQUIRES_ARM_NEON_FMA;
10811 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010812 for (uint32_t n = 1; n <= 8; n++) {
10813 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010814 GemmMicrokernelTester()
10815 .mr(6)
10816 .nr(8)
10817 .kr(1)
10818 .sr(1)
10819 .m(m)
10820 .n(n)
10821 .k(k)
10822 .cm_stride(11)
10823 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010824 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010825 }
10826 }
10827 }
10828 }
10829
Marat Dukhande06f492020-04-09 00:19:31 -070010830 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010831 TEST_REQUIRES_ARM_NEON_FMA;
10832 GemmMicrokernelTester()
10833 .mr(6)
10834 .nr(8)
10835 .kr(1)
10836 .sr(1)
10837 .m(6)
10838 .n(8)
10839 .k(4)
10840 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010841 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010842 }
10843
Marat Dukhande06f492020-04-09 00:19:31 -070010844 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010845 TEST_REQUIRES_ARM_NEON_FMA;
10846 GemmMicrokernelTester()
10847 .mr(6)
10848 .nr(8)
10849 .kr(1)
10850 .sr(1)
10851 .m(6)
10852 .n(8)
10853 .k(4)
10854 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010855 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010856 }
10857
Marat Dukhande06f492020-04-09 00:19:31 -070010858 TEST(F32_GEMMINC_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010859 TEST_REQUIRES_ARM_NEON_FMA;
10860 GemmMicrokernelTester()
10861 .mr(6)
10862 .nr(8)
10863 .kr(1)
10864 .sr(1)
10865 .m(6)
10866 .n(8)
10867 .k(4)
10868 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010869 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010870 }
10871#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10872
10873
10874#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010875 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010876 TEST_REQUIRES_ARM_NEON;
10877 GemmMicrokernelTester()
10878 .mr(1)
10879 .nr(8)
10880 .kr(1)
10881 .sr(4)
10882 .m(1)
10883 .n(8)
10884 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010885 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010886 }
10887
Marat Dukhande06f492020-04-09 00:19:31 -070010888 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010889 TEST_REQUIRES_ARM_NEON;
10890 GemmMicrokernelTester()
10891 .mr(1)
10892 .nr(8)
10893 .kr(1)
10894 .sr(4)
10895 .m(1)
10896 .n(8)
10897 .k(4)
10898 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010899 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010900 }
10901
Marat Dukhande06f492020-04-09 00:19:31 -070010902 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010903 TEST_REQUIRES_ARM_NEON;
10904 GemmMicrokernelTester()
10905 .mr(1)
10906 .nr(8)
10907 .kr(1)
10908 .sr(4)
10909 .m(1)
10910 .n(8)
10911 .k(4)
10912 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010913 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010914 }
10915
Marat Dukhande06f492020-04-09 00:19:31 -070010916 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010917 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010918 for (uint32_t n = 1; n <= 8; n++) {
10919 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010920 GemmMicrokernelTester()
10921 .mr(1)
10922 .nr(8)
10923 .kr(1)
10924 .sr(4)
10925 .m(m)
10926 .n(n)
10927 .k(4)
10928 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010929 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010930 }
10931 }
10932 }
10933
Marat Dukhande06f492020-04-09 00:19:31 -070010934 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010935 TEST_REQUIRES_ARM_NEON;
10936 for (uint32_t m = 1; m <= 1; m++) {
10937 GemmMicrokernelTester()
10938 .mr(1)
10939 .nr(8)
10940 .kr(1)
10941 .sr(4)
10942 .m(m)
10943 .n(8)
10944 .k(4)
10945 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010946 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010947 }
10948 }
10949
Marat Dukhande06f492020-04-09 00:19:31 -070010950 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010951 TEST_REQUIRES_ARM_NEON;
10952 for (uint32_t n = 1; n <= 8; n++) {
10953 GemmMicrokernelTester()
10954 .mr(1)
10955 .nr(8)
10956 .kr(1)
10957 .sr(4)
10958 .m(1)
10959 .n(n)
10960 .k(4)
10961 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010962 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010963 }
10964 }
10965
Marat Dukhande06f492020-04-09 00:19:31 -070010966 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010967 TEST_REQUIRES_ARM_NEON;
10968 for (size_t k = 1; k < 4; k++) {
10969 GemmMicrokernelTester()
10970 .mr(1)
10971 .nr(8)
10972 .kr(1)
10973 .sr(4)
10974 .m(1)
10975 .n(8)
10976 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010977 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010978 }
10979 }
10980
Marat Dukhande06f492020-04-09 00:19:31 -070010981 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010982 TEST_REQUIRES_ARM_NEON;
10983 for (size_t k = 1; k < 4; k++) {
10984 GemmMicrokernelTester()
10985 .mr(1)
10986 .nr(8)
10987 .kr(1)
10988 .sr(4)
10989 .m(1)
10990 .n(8)
10991 .k(k)
10992 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010993 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010994 }
10995 }
10996
Marat Dukhande06f492020-04-09 00:19:31 -070010997 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010998 TEST_REQUIRES_ARM_NEON;
10999 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011000 for (uint32_t n = 1; n <= 8; n++) {
11001 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011002 GemmMicrokernelTester()
11003 .mr(1)
11004 .nr(8)
11005 .kr(1)
11006 .sr(4)
11007 .m(m)
11008 .n(n)
11009 .k(k)
11010 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011011 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011012 }
11013 }
11014 }
11015 }
11016
Marat Dukhande06f492020-04-09 00:19:31 -070011017 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011018 TEST_REQUIRES_ARM_NEON;
11019 for (size_t k = 5; k < 8; k++) {
11020 GemmMicrokernelTester()
11021 .mr(1)
11022 .nr(8)
11023 .kr(1)
11024 .sr(4)
11025 .m(1)
11026 .n(8)
11027 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011028 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011029 }
11030 }
11031
Marat Dukhande06f492020-04-09 00:19:31 -070011032 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011033 TEST_REQUIRES_ARM_NEON;
11034 for (size_t k = 5; k < 8; k++) {
11035 GemmMicrokernelTester()
11036 .mr(1)
11037 .nr(8)
11038 .kr(1)
11039 .sr(4)
11040 .m(1)
11041 .n(8)
11042 .k(k)
11043 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011044 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011045 }
11046 }
11047
Marat Dukhande06f492020-04-09 00:19:31 -070011048 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011049 TEST_REQUIRES_ARM_NEON;
11050 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011051 for (uint32_t n = 1; n <= 8; n++) {
11052 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011053 GemmMicrokernelTester()
11054 .mr(1)
11055 .nr(8)
11056 .kr(1)
11057 .sr(4)
11058 .m(m)
11059 .n(n)
11060 .k(k)
11061 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011062 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011063 }
11064 }
11065 }
11066 }
11067
Marat Dukhande06f492020-04-09 00:19:31 -070011068 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011069 TEST_REQUIRES_ARM_NEON;
11070 for (size_t k = 8; k <= 40; k += 4) {
11071 GemmMicrokernelTester()
11072 .mr(1)
11073 .nr(8)
11074 .kr(1)
11075 .sr(4)
11076 .m(1)
11077 .n(8)
11078 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011079 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011080 }
11081 }
11082
Marat Dukhande06f492020-04-09 00:19:31 -070011083 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011084 TEST_REQUIRES_ARM_NEON;
11085 for (size_t k = 8; k <= 40; k += 4) {
11086 GemmMicrokernelTester()
11087 .mr(1)
11088 .nr(8)
11089 .kr(1)
11090 .sr(4)
11091 .m(1)
11092 .n(8)
11093 .k(k)
11094 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011095 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011096 }
11097 }
11098
Marat Dukhande06f492020-04-09 00:19:31 -070011099 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011100 TEST_REQUIRES_ARM_NEON;
11101 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011102 for (uint32_t n = 1; n <= 8; n++) {
11103 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011104 GemmMicrokernelTester()
11105 .mr(1)
11106 .nr(8)
11107 .kr(1)
11108 .sr(4)
11109 .m(m)
11110 .n(n)
11111 .k(k)
11112 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011113 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011114 }
11115 }
11116 }
11117 }
11118
Marat Dukhande06f492020-04-09 00:19:31 -070011119 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011120 TEST_REQUIRES_ARM_NEON;
11121 for (uint32_t n = 9; n < 16; n++) {
11122 for (size_t k = 1; k <= 20; k += 5) {
11123 GemmMicrokernelTester()
11124 .mr(1)
11125 .nr(8)
11126 .kr(1)
11127 .sr(4)
11128 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011129 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011130 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011131 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011132 }
11133 }
11134 }
11135
Marat Dukhande06f492020-04-09 00:19:31 -070011136 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011137 TEST_REQUIRES_ARM_NEON;
11138 for (uint32_t n = 9; n < 16; n++) {
11139 for (size_t k = 1; k <= 20; k += 5) {
11140 GemmMicrokernelTester()
11141 .mr(1)
11142 .nr(8)
11143 .kr(1)
11144 .sr(4)
11145 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011146 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011147 .k(k)
11148 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011149 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011150 }
11151 }
11152 }
11153
Marat Dukhande06f492020-04-09 00:19:31 -070011154 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011155 TEST_REQUIRES_ARM_NEON;
11156 for (uint32_t n = 9; n < 16; n++) {
11157 for (size_t k = 1; k <= 20; k += 5) {
11158 GemmMicrokernelTester()
11159 .mr(1)
11160 .nr(8)
11161 .kr(1)
11162 .sr(4)
11163 .m(1)
11164 .n(n)
11165 .k(k)
11166 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011167 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011168 }
11169 }
11170 }
11171
Marat Dukhande06f492020-04-09 00:19:31 -070011172 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011173 TEST_REQUIRES_ARM_NEON;
11174 for (uint32_t n = 9; n < 16; n++) {
11175 for (size_t k = 1; k <= 20; k += 5) {
11176 for (uint32_t m = 1; m <= 1; m++) {
11177 GemmMicrokernelTester()
11178 .mr(1)
11179 .nr(8)
11180 .kr(1)
11181 .sr(4)
11182 .m(m)
11183 .n(n)
11184 .k(k)
11185 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011186 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011187 }
11188 }
11189 }
11190 }
11191
Marat Dukhande06f492020-04-09 00:19:31 -070011192 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011193 TEST_REQUIRES_ARM_NEON;
11194 for (uint32_t n = 16; n <= 24; n += 8) {
11195 for (size_t k = 1; k <= 20; k += 5) {
11196 GemmMicrokernelTester()
11197 .mr(1)
11198 .nr(8)
11199 .kr(1)
11200 .sr(4)
11201 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011202 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011203 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011204 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011205 }
11206 }
11207 }
11208
Marat Dukhande06f492020-04-09 00:19:31 -070011209 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011210 TEST_REQUIRES_ARM_NEON;
11211 for (uint32_t n = 16; n <= 24; n += 8) {
11212 for (size_t k = 1; k <= 20; k += 5) {
11213 GemmMicrokernelTester()
11214 .mr(1)
11215 .nr(8)
11216 .kr(1)
11217 .sr(4)
11218 .m(1)
11219 .n(n)
11220 .k(k)
11221 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011222 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011223 }
11224 }
11225 }
11226
Marat Dukhande06f492020-04-09 00:19:31 -070011227 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011228 TEST_REQUIRES_ARM_NEON;
11229 for (uint32_t n = 16; n <= 24; n += 8) {
11230 for (size_t k = 1; k <= 20; k += 5) {
11231 GemmMicrokernelTester()
11232 .mr(1)
11233 .nr(8)
11234 .kr(1)
11235 .sr(4)
11236 .m(1)
11237 .n(n)
11238 .k(k)
11239 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011240 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011241 }
11242 }
11243 }
11244
Marat Dukhande06f492020-04-09 00:19:31 -070011245 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011246 TEST_REQUIRES_ARM_NEON;
11247 for (uint32_t n = 16; n <= 24; n += 8) {
11248 for (size_t k = 1; k <= 20; k += 5) {
11249 for (uint32_t m = 1; m <= 1; m++) {
11250 GemmMicrokernelTester()
11251 .mr(1)
11252 .nr(8)
11253 .kr(1)
11254 .sr(4)
11255 .m(m)
11256 .n(n)
11257 .k(k)
11258 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011259 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011260 }
11261 }
11262 }
11263 }
11264
Marat Dukhande06f492020-04-09 00:19:31 -070011265 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011266 TEST_REQUIRES_ARM_NEON;
11267 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011268 for (uint32_t n = 1; n <= 8; n++) {
11269 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011270 GemmMicrokernelTester()
11271 .mr(1)
11272 .nr(8)
11273 .kr(1)
11274 .sr(4)
11275 .m(m)
11276 .n(n)
11277 .k(k)
11278 .cm_stride(11)
11279 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011280 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011281 }
11282 }
11283 }
11284 }
11285
Marat Dukhande06f492020-04-09 00:19:31 -070011286 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011287 TEST_REQUIRES_ARM_NEON;
11288 GemmMicrokernelTester()
11289 .mr(1)
11290 .nr(8)
11291 .kr(1)
11292 .sr(4)
11293 .m(1)
11294 .n(8)
11295 .k(4)
11296 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011297 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011298 }
11299
Marat Dukhande06f492020-04-09 00:19:31 -070011300 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011301 TEST_REQUIRES_ARM_NEON;
11302 GemmMicrokernelTester()
11303 .mr(1)
11304 .nr(8)
11305 .kr(1)
11306 .sr(4)
11307 .m(1)
11308 .n(8)
11309 .k(4)
11310 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011311 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011312 }
11313
Marat Dukhande06f492020-04-09 00:19:31 -070011314 TEST(F32_GEMMINC_MINMAX_1X8S4__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011315 TEST_REQUIRES_ARM_NEON;
11316 GemmMicrokernelTester()
11317 .mr(1)
11318 .nr(8)
11319 .kr(1)
11320 .sr(4)
11321 .m(1)
11322 .n(8)
11323 .k(4)
11324 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011325 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011326 }
11327#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11328
11329
11330#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070011331 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011332 TEST_REQUIRES_ARM_NEON;
11333 GemmMicrokernelTester()
11334 .mr(4)
11335 .nr(8)
11336 .kr(1)
11337 .sr(4)
11338 .m(4)
11339 .n(8)
11340 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011341 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011342 }
11343
Marat Dukhande06f492020-04-09 00:19:31 -070011344 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011345 TEST_REQUIRES_ARM_NEON;
11346 GemmMicrokernelTester()
11347 .mr(4)
11348 .nr(8)
11349 .kr(1)
11350 .sr(4)
11351 .m(4)
11352 .n(8)
11353 .k(4)
11354 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011355 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011356 }
11357
Marat Dukhande06f492020-04-09 00:19:31 -070011358 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011359 TEST_REQUIRES_ARM_NEON;
11360 GemmMicrokernelTester()
11361 .mr(4)
11362 .nr(8)
11363 .kr(1)
11364 .sr(4)
11365 .m(4)
11366 .n(8)
11367 .k(4)
11368 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011369 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011370 }
11371
Marat Dukhande06f492020-04-09 00:19:31 -070011372 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011373 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011374 for (uint32_t n = 1; n <= 8; n++) {
11375 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011376 GemmMicrokernelTester()
11377 .mr(4)
11378 .nr(8)
11379 .kr(1)
11380 .sr(4)
11381 .m(m)
11382 .n(n)
11383 .k(4)
11384 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011385 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011386 }
11387 }
11388 }
11389
Marat Dukhande06f492020-04-09 00:19:31 -070011390 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011391 TEST_REQUIRES_ARM_NEON;
11392 for (uint32_t m = 1; m <= 4; m++) {
11393 GemmMicrokernelTester()
11394 .mr(4)
11395 .nr(8)
11396 .kr(1)
11397 .sr(4)
11398 .m(m)
11399 .n(8)
11400 .k(4)
11401 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011402 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011403 }
11404 }
11405
Marat Dukhande06f492020-04-09 00:19:31 -070011406 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011407 TEST_REQUIRES_ARM_NEON;
11408 for (uint32_t n = 1; n <= 8; n++) {
11409 GemmMicrokernelTester()
11410 .mr(4)
11411 .nr(8)
11412 .kr(1)
11413 .sr(4)
11414 .m(4)
11415 .n(n)
11416 .k(4)
11417 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011418 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011419 }
11420 }
11421
Marat Dukhande06f492020-04-09 00:19:31 -070011422 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011423 TEST_REQUIRES_ARM_NEON;
11424 for (size_t k = 1; k < 4; k++) {
11425 GemmMicrokernelTester()
11426 .mr(4)
11427 .nr(8)
11428 .kr(1)
11429 .sr(4)
11430 .m(4)
11431 .n(8)
11432 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011433 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011434 }
11435 }
11436
Marat Dukhande06f492020-04-09 00:19:31 -070011437 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011438 TEST_REQUIRES_ARM_NEON;
11439 for (size_t k = 1; k < 4; k++) {
11440 GemmMicrokernelTester()
11441 .mr(4)
11442 .nr(8)
11443 .kr(1)
11444 .sr(4)
11445 .m(4)
11446 .n(8)
11447 .k(k)
11448 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011449 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011450 }
11451 }
11452
Marat Dukhande06f492020-04-09 00:19:31 -070011453 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011454 TEST_REQUIRES_ARM_NEON;
11455 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011456 for (uint32_t n = 1; n <= 8; n++) {
11457 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011458 GemmMicrokernelTester()
11459 .mr(4)
11460 .nr(8)
11461 .kr(1)
11462 .sr(4)
11463 .m(m)
11464 .n(n)
11465 .k(k)
11466 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011467 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011468 }
11469 }
11470 }
11471 }
11472
Marat Dukhande06f492020-04-09 00:19:31 -070011473 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011474 TEST_REQUIRES_ARM_NEON;
11475 for (size_t k = 5; k < 8; k++) {
11476 GemmMicrokernelTester()
11477 .mr(4)
11478 .nr(8)
11479 .kr(1)
11480 .sr(4)
11481 .m(4)
11482 .n(8)
11483 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011484 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011485 }
11486 }
11487
Marat Dukhande06f492020-04-09 00:19:31 -070011488 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011489 TEST_REQUIRES_ARM_NEON;
11490 for (size_t k = 5; k < 8; k++) {
11491 GemmMicrokernelTester()
11492 .mr(4)
11493 .nr(8)
11494 .kr(1)
11495 .sr(4)
11496 .m(4)
11497 .n(8)
11498 .k(k)
11499 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011500 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011501 }
11502 }
11503
Marat Dukhande06f492020-04-09 00:19:31 -070011504 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011505 TEST_REQUIRES_ARM_NEON;
11506 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011507 for (uint32_t n = 1; n <= 8; n++) {
11508 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011509 GemmMicrokernelTester()
11510 .mr(4)
11511 .nr(8)
11512 .kr(1)
11513 .sr(4)
11514 .m(m)
11515 .n(n)
11516 .k(k)
11517 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011518 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011519 }
11520 }
11521 }
11522 }
11523
Marat Dukhande06f492020-04-09 00:19:31 -070011524 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011525 TEST_REQUIRES_ARM_NEON;
11526 for (size_t k = 8; k <= 40; k += 4) {
11527 GemmMicrokernelTester()
11528 .mr(4)
11529 .nr(8)
11530 .kr(1)
11531 .sr(4)
11532 .m(4)
11533 .n(8)
11534 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011535 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011536 }
11537 }
11538
Marat Dukhande06f492020-04-09 00:19:31 -070011539 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011540 TEST_REQUIRES_ARM_NEON;
11541 for (size_t k = 8; k <= 40; k += 4) {
11542 GemmMicrokernelTester()
11543 .mr(4)
11544 .nr(8)
11545 .kr(1)
11546 .sr(4)
11547 .m(4)
11548 .n(8)
11549 .k(k)
11550 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011551 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011552 }
11553 }
11554
Marat Dukhande06f492020-04-09 00:19:31 -070011555 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011556 TEST_REQUIRES_ARM_NEON;
11557 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011558 for (uint32_t n = 1; n <= 8; n++) {
11559 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011560 GemmMicrokernelTester()
11561 .mr(4)
11562 .nr(8)
11563 .kr(1)
11564 .sr(4)
11565 .m(m)
11566 .n(n)
11567 .k(k)
11568 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011569 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011570 }
11571 }
11572 }
11573 }
11574
Marat Dukhande06f492020-04-09 00:19:31 -070011575 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011576 TEST_REQUIRES_ARM_NEON;
11577 for (uint32_t n = 9; n < 16; n++) {
11578 for (size_t k = 1; k <= 20; k += 5) {
11579 GemmMicrokernelTester()
11580 .mr(4)
11581 .nr(8)
11582 .kr(1)
11583 .sr(4)
11584 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011585 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011586 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011588 }
11589 }
11590 }
11591
Marat Dukhande06f492020-04-09 00:19:31 -070011592 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011593 TEST_REQUIRES_ARM_NEON;
11594 for (uint32_t n = 9; n < 16; n++) {
11595 for (size_t k = 1; k <= 20; k += 5) {
11596 GemmMicrokernelTester()
11597 .mr(4)
11598 .nr(8)
11599 .kr(1)
11600 .sr(4)
11601 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011602 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011603 .k(k)
11604 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011605 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011606 }
11607 }
11608 }
11609
Marat Dukhande06f492020-04-09 00:19:31 -070011610 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011611 TEST_REQUIRES_ARM_NEON;
11612 for (uint32_t n = 9; n < 16; n++) {
11613 for (size_t k = 1; k <= 20; k += 5) {
11614 GemmMicrokernelTester()
11615 .mr(4)
11616 .nr(8)
11617 .kr(1)
11618 .sr(4)
11619 .m(4)
11620 .n(n)
11621 .k(k)
11622 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011623 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011624 }
11625 }
11626 }
11627
Marat Dukhande06f492020-04-09 00:19:31 -070011628 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011629 TEST_REQUIRES_ARM_NEON;
11630 for (uint32_t n = 9; n < 16; n++) {
11631 for (size_t k = 1; k <= 20; k += 5) {
11632 for (uint32_t m = 1; m <= 4; m++) {
11633 GemmMicrokernelTester()
11634 .mr(4)
11635 .nr(8)
11636 .kr(1)
11637 .sr(4)
11638 .m(m)
11639 .n(n)
11640 .k(k)
11641 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011642 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011643 }
11644 }
11645 }
11646 }
11647
Marat Dukhande06f492020-04-09 00:19:31 -070011648 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011649 TEST_REQUIRES_ARM_NEON;
11650 for (uint32_t n = 16; n <= 24; n += 8) {
11651 for (size_t k = 1; k <= 20; k += 5) {
11652 GemmMicrokernelTester()
11653 .mr(4)
11654 .nr(8)
11655 .kr(1)
11656 .sr(4)
11657 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011658 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011659 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011660 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011661 }
11662 }
11663 }
11664
Marat Dukhande06f492020-04-09 00:19:31 -070011665 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011666 TEST_REQUIRES_ARM_NEON;
11667 for (uint32_t n = 16; n <= 24; n += 8) {
11668 for (size_t k = 1; k <= 20; k += 5) {
11669 GemmMicrokernelTester()
11670 .mr(4)
11671 .nr(8)
11672 .kr(1)
11673 .sr(4)
11674 .m(4)
11675 .n(n)
11676 .k(k)
11677 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011678 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011679 }
11680 }
11681 }
11682
Marat Dukhande06f492020-04-09 00:19:31 -070011683 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011684 TEST_REQUIRES_ARM_NEON;
11685 for (uint32_t n = 16; n <= 24; n += 8) {
11686 for (size_t k = 1; k <= 20; k += 5) {
11687 GemmMicrokernelTester()
11688 .mr(4)
11689 .nr(8)
11690 .kr(1)
11691 .sr(4)
11692 .m(4)
11693 .n(n)
11694 .k(k)
11695 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011696 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011697 }
11698 }
11699 }
11700
Marat Dukhande06f492020-04-09 00:19:31 -070011701 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011702 TEST_REQUIRES_ARM_NEON;
11703 for (uint32_t n = 16; n <= 24; n += 8) {
11704 for (size_t k = 1; k <= 20; k += 5) {
11705 for (uint32_t m = 1; m <= 4; m++) {
11706 GemmMicrokernelTester()
11707 .mr(4)
11708 .nr(8)
11709 .kr(1)
11710 .sr(4)
11711 .m(m)
11712 .n(n)
11713 .k(k)
11714 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011715 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011716 }
11717 }
11718 }
11719 }
11720
Marat Dukhande06f492020-04-09 00:19:31 -070011721 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011722 TEST_REQUIRES_ARM_NEON;
11723 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011724 for (uint32_t n = 1; n <= 8; n++) {
11725 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011726 GemmMicrokernelTester()
11727 .mr(4)
11728 .nr(8)
11729 .kr(1)
11730 .sr(4)
11731 .m(m)
11732 .n(n)
11733 .k(k)
11734 .cm_stride(11)
11735 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011736 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011737 }
11738 }
11739 }
11740 }
11741
Marat Dukhande06f492020-04-09 00:19:31 -070011742 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011743 TEST_REQUIRES_ARM_NEON;
11744 GemmMicrokernelTester()
11745 .mr(4)
11746 .nr(8)
11747 .kr(1)
11748 .sr(4)
11749 .m(4)
11750 .n(8)
11751 .k(4)
11752 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011753 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011754 }
11755
Marat Dukhande06f492020-04-09 00:19:31 -070011756 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011757 TEST_REQUIRES_ARM_NEON;
11758 GemmMicrokernelTester()
11759 .mr(4)
11760 .nr(8)
11761 .kr(1)
11762 .sr(4)
11763 .m(4)
11764 .n(8)
11765 .k(4)
11766 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011767 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011768 }
11769
Marat Dukhande06f492020-04-09 00:19:31 -070011770 TEST(F32_GEMMINC_MINMAX_4X8S4__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011771 TEST_REQUIRES_ARM_NEON;
11772 GemmMicrokernelTester()
11773 .mr(4)
11774 .nr(8)
11775 .kr(1)
11776 .sr(4)
11777 .m(4)
11778 .n(8)
11779 .k(4)
11780 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011781 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011782 }
11783#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11784
11785
11786#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070011787 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011788 TEST_REQUIRES_ARM_NEON_FMA;
11789 GemmMicrokernelTester()
11790 .mr(4)
11791 .nr(8)
11792 .kr(1)
11793 .sr(4)
11794 .m(4)
11795 .n(8)
11796 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011797 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011798 }
11799
Marat Dukhande06f492020-04-09 00:19:31 -070011800 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011801 TEST_REQUIRES_ARM_NEON_FMA;
11802 GemmMicrokernelTester()
11803 .mr(4)
11804 .nr(8)
11805 .kr(1)
11806 .sr(4)
11807 .m(4)
11808 .n(8)
11809 .k(4)
11810 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011811 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011812 }
11813
Marat Dukhande06f492020-04-09 00:19:31 -070011814 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011815 TEST_REQUIRES_ARM_NEON_FMA;
11816 GemmMicrokernelTester()
11817 .mr(4)
11818 .nr(8)
11819 .kr(1)
11820 .sr(4)
11821 .m(4)
11822 .n(8)
11823 .k(4)
11824 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011825 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011826 }
11827
Marat Dukhande06f492020-04-09 00:19:31 -070011828 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011829 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011830 for (uint32_t n = 1; n <= 8; n++) {
11831 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011832 GemmMicrokernelTester()
11833 .mr(4)
11834 .nr(8)
11835 .kr(1)
11836 .sr(4)
11837 .m(m)
11838 .n(n)
11839 .k(4)
11840 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011841 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011842 }
11843 }
11844 }
11845
Marat Dukhande06f492020-04-09 00:19:31 -070011846 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011847 TEST_REQUIRES_ARM_NEON_FMA;
11848 for (uint32_t m = 1; m <= 4; m++) {
11849 GemmMicrokernelTester()
11850 .mr(4)
11851 .nr(8)
11852 .kr(1)
11853 .sr(4)
11854 .m(m)
11855 .n(8)
11856 .k(4)
11857 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011858 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011859 }
11860 }
11861
Marat Dukhande06f492020-04-09 00:19:31 -070011862 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011863 TEST_REQUIRES_ARM_NEON_FMA;
11864 for (uint32_t n = 1; n <= 8; n++) {
11865 GemmMicrokernelTester()
11866 .mr(4)
11867 .nr(8)
11868 .kr(1)
11869 .sr(4)
11870 .m(4)
11871 .n(n)
11872 .k(4)
11873 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011874 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011875 }
11876 }
11877
Marat Dukhande06f492020-04-09 00:19:31 -070011878 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011879 TEST_REQUIRES_ARM_NEON_FMA;
11880 for (size_t k = 1; k < 4; k++) {
11881 GemmMicrokernelTester()
11882 .mr(4)
11883 .nr(8)
11884 .kr(1)
11885 .sr(4)
11886 .m(4)
11887 .n(8)
11888 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011889 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011890 }
11891 }
11892
Marat Dukhande06f492020-04-09 00:19:31 -070011893 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011894 TEST_REQUIRES_ARM_NEON_FMA;
11895 for (size_t k = 1; k < 4; k++) {
11896 GemmMicrokernelTester()
11897 .mr(4)
11898 .nr(8)
11899 .kr(1)
11900 .sr(4)
11901 .m(4)
11902 .n(8)
11903 .k(k)
11904 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011905 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011906 }
11907 }
11908
Marat Dukhande06f492020-04-09 00:19:31 -070011909 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011910 TEST_REQUIRES_ARM_NEON_FMA;
11911 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011912 for (uint32_t n = 1; n <= 8; n++) {
11913 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011914 GemmMicrokernelTester()
11915 .mr(4)
11916 .nr(8)
11917 .kr(1)
11918 .sr(4)
11919 .m(m)
11920 .n(n)
11921 .k(k)
11922 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011923 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011924 }
11925 }
11926 }
11927 }
11928
Marat Dukhande06f492020-04-09 00:19:31 -070011929 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011930 TEST_REQUIRES_ARM_NEON_FMA;
11931 for (size_t k = 5; k < 8; k++) {
11932 GemmMicrokernelTester()
11933 .mr(4)
11934 .nr(8)
11935 .kr(1)
11936 .sr(4)
11937 .m(4)
11938 .n(8)
11939 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011940 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011941 }
11942 }
11943
Marat Dukhande06f492020-04-09 00:19:31 -070011944 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011945 TEST_REQUIRES_ARM_NEON_FMA;
11946 for (size_t k = 5; k < 8; k++) {
11947 GemmMicrokernelTester()
11948 .mr(4)
11949 .nr(8)
11950 .kr(1)
11951 .sr(4)
11952 .m(4)
11953 .n(8)
11954 .k(k)
11955 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011956 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011957 }
11958 }
11959
Marat Dukhande06f492020-04-09 00:19:31 -070011960 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011961 TEST_REQUIRES_ARM_NEON_FMA;
11962 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011963 for (uint32_t n = 1; n <= 8; n++) {
11964 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011965 GemmMicrokernelTester()
11966 .mr(4)
11967 .nr(8)
11968 .kr(1)
11969 .sr(4)
11970 .m(m)
11971 .n(n)
11972 .k(k)
11973 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011974 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011975 }
11976 }
11977 }
11978 }
11979
Marat Dukhande06f492020-04-09 00:19:31 -070011980 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011981 TEST_REQUIRES_ARM_NEON_FMA;
11982 for (size_t k = 8; k <= 40; k += 4) {
11983 GemmMicrokernelTester()
11984 .mr(4)
11985 .nr(8)
11986 .kr(1)
11987 .sr(4)
11988 .m(4)
11989 .n(8)
11990 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011991 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011992 }
11993 }
11994
Marat Dukhande06f492020-04-09 00:19:31 -070011995 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011996 TEST_REQUIRES_ARM_NEON_FMA;
11997 for (size_t k = 8; k <= 40; k += 4) {
11998 GemmMicrokernelTester()
11999 .mr(4)
12000 .nr(8)
12001 .kr(1)
12002 .sr(4)
12003 .m(4)
12004 .n(8)
12005 .k(k)
12006 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012007 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012008 }
12009 }
12010
Marat Dukhande06f492020-04-09 00:19:31 -070012011 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012012 TEST_REQUIRES_ARM_NEON_FMA;
12013 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012014 for (uint32_t n = 1; n <= 8; n++) {
12015 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012016 GemmMicrokernelTester()
12017 .mr(4)
12018 .nr(8)
12019 .kr(1)
12020 .sr(4)
12021 .m(m)
12022 .n(n)
12023 .k(k)
12024 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012025 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012026 }
12027 }
12028 }
12029 }
12030
Marat Dukhande06f492020-04-09 00:19:31 -070012031 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012032 TEST_REQUIRES_ARM_NEON_FMA;
12033 for (uint32_t n = 9; n < 16; n++) {
12034 for (size_t k = 1; k <= 20; k += 5) {
12035 GemmMicrokernelTester()
12036 .mr(4)
12037 .nr(8)
12038 .kr(1)
12039 .sr(4)
12040 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012041 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012042 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012043 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012044 }
12045 }
12046 }
12047
Marat Dukhande06f492020-04-09 00:19:31 -070012048 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012049 TEST_REQUIRES_ARM_NEON_FMA;
12050 for (uint32_t n = 9; n < 16; n++) {
12051 for (size_t k = 1; k <= 20; k += 5) {
12052 GemmMicrokernelTester()
12053 .mr(4)
12054 .nr(8)
12055 .kr(1)
12056 .sr(4)
12057 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012058 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012059 .k(k)
12060 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012061 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012062 }
12063 }
12064 }
12065
Marat Dukhande06f492020-04-09 00:19:31 -070012066 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012067 TEST_REQUIRES_ARM_NEON_FMA;
12068 for (uint32_t n = 9; n < 16; n++) {
12069 for (size_t k = 1; k <= 20; k += 5) {
12070 GemmMicrokernelTester()
12071 .mr(4)
12072 .nr(8)
12073 .kr(1)
12074 .sr(4)
12075 .m(4)
12076 .n(n)
12077 .k(k)
12078 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012079 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012080 }
12081 }
12082 }
12083
Marat Dukhande06f492020-04-09 00:19:31 -070012084 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012085 TEST_REQUIRES_ARM_NEON_FMA;
12086 for (uint32_t n = 9; n < 16; n++) {
12087 for (size_t k = 1; k <= 20; k += 5) {
12088 for (uint32_t m = 1; m <= 4; m++) {
12089 GemmMicrokernelTester()
12090 .mr(4)
12091 .nr(8)
12092 .kr(1)
12093 .sr(4)
12094 .m(m)
12095 .n(n)
12096 .k(k)
12097 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012098 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012099 }
12100 }
12101 }
12102 }
12103
Marat Dukhande06f492020-04-09 00:19:31 -070012104 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012105 TEST_REQUIRES_ARM_NEON_FMA;
12106 for (uint32_t n = 16; n <= 24; n += 8) {
12107 for (size_t k = 1; k <= 20; k += 5) {
12108 GemmMicrokernelTester()
12109 .mr(4)
12110 .nr(8)
12111 .kr(1)
12112 .sr(4)
12113 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012114 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012115 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012116 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012117 }
12118 }
12119 }
12120
Marat Dukhande06f492020-04-09 00:19:31 -070012121 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012122 TEST_REQUIRES_ARM_NEON_FMA;
12123 for (uint32_t n = 16; n <= 24; n += 8) {
12124 for (size_t k = 1; k <= 20; k += 5) {
12125 GemmMicrokernelTester()
12126 .mr(4)
12127 .nr(8)
12128 .kr(1)
12129 .sr(4)
12130 .m(4)
12131 .n(n)
12132 .k(k)
12133 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012134 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012135 }
12136 }
12137 }
12138
Marat Dukhande06f492020-04-09 00:19:31 -070012139 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012140 TEST_REQUIRES_ARM_NEON_FMA;
12141 for (uint32_t n = 16; n <= 24; n += 8) {
12142 for (size_t k = 1; k <= 20; k += 5) {
12143 GemmMicrokernelTester()
12144 .mr(4)
12145 .nr(8)
12146 .kr(1)
12147 .sr(4)
12148 .m(4)
12149 .n(n)
12150 .k(k)
12151 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012152 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012153 }
12154 }
12155 }
12156
Marat Dukhande06f492020-04-09 00:19:31 -070012157 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012158 TEST_REQUIRES_ARM_NEON_FMA;
12159 for (uint32_t n = 16; n <= 24; n += 8) {
12160 for (size_t k = 1; k <= 20; k += 5) {
12161 for (uint32_t m = 1; m <= 4; m++) {
12162 GemmMicrokernelTester()
12163 .mr(4)
12164 .nr(8)
12165 .kr(1)
12166 .sr(4)
12167 .m(m)
12168 .n(n)
12169 .k(k)
12170 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012171 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012172 }
12173 }
12174 }
12175 }
12176
Marat Dukhande06f492020-04-09 00:19:31 -070012177 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012178 TEST_REQUIRES_ARM_NEON_FMA;
12179 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012180 for (uint32_t n = 1; n <= 8; n++) {
12181 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012182 GemmMicrokernelTester()
12183 .mr(4)
12184 .nr(8)
12185 .kr(1)
12186 .sr(4)
12187 .m(m)
12188 .n(n)
12189 .k(k)
12190 .cm_stride(11)
12191 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012192 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012193 }
12194 }
12195 }
12196 }
12197
Marat Dukhande06f492020-04-09 00:19:31 -070012198 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012199 TEST_REQUIRES_ARM_NEON_FMA;
12200 GemmMicrokernelTester()
12201 .mr(4)
12202 .nr(8)
12203 .kr(1)
12204 .sr(4)
12205 .m(4)
12206 .n(8)
12207 .k(4)
12208 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012209 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012210 }
12211
Marat Dukhande06f492020-04-09 00:19:31 -070012212 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012213 TEST_REQUIRES_ARM_NEON_FMA;
12214 GemmMicrokernelTester()
12215 .mr(4)
12216 .nr(8)
12217 .kr(1)
12218 .sr(4)
12219 .m(4)
12220 .n(8)
12221 .k(4)
12222 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012223 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012224 }
12225
Marat Dukhande06f492020-04-09 00:19:31 -070012226 TEST(F32_GEMMINC_MINMAX_4X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012227 TEST_REQUIRES_ARM_NEON_FMA;
12228 GemmMicrokernelTester()
12229 .mr(4)
12230 .nr(8)
12231 .kr(1)
12232 .sr(4)
12233 .m(4)
12234 .n(8)
12235 .k(4)
12236 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012237 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012238 }
12239#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12240
12241
Marat Dukhan1c587112020-04-08 20:04:28 -070012242#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070012243 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012244 TEST_REQUIRES_X86_SSE;
12245 GemmMicrokernelTester()
12246 .mr(1)
12247 .nr(8)
12248 .kr(1)
12249 .sr(1)
12250 .m(1)
12251 .n(8)
12252 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012253 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012254 }
12255
Marat Dukhande06f492020-04-09 00:19:31 -070012256 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012257 TEST_REQUIRES_X86_SSE;
12258 GemmMicrokernelTester()
12259 .mr(1)
12260 .nr(8)
12261 .kr(1)
12262 .sr(1)
12263 .m(1)
12264 .n(8)
12265 .k(1)
12266 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012267 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012268 }
12269
Marat Dukhande06f492020-04-09 00:19:31 -070012270 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012271 TEST_REQUIRES_X86_SSE;
12272 GemmMicrokernelTester()
12273 .mr(1)
12274 .nr(8)
12275 .kr(1)
12276 .sr(1)
12277 .m(1)
12278 .n(8)
12279 .k(1)
12280 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012281 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012282 }
12283
Marat Dukhande06f492020-04-09 00:19:31 -070012284 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012285 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012286 for (uint32_t n = 1; n <= 8; n++) {
12287 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012288 GemmMicrokernelTester()
12289 .mr(1)
12290 .nr(8)
12291 .kr(1)
12292 .sr(1)
12293 .m(m)
12294 .n(n)
12295 .k(1)
12296 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012297 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012298 }
12299 }
12300 }
12301
Marat Dukhande06f492020-04-09 00:19:31 -070012302 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012303 TEST_REQUIRES_X86_SSE;
12304 for (uint32_t m = 1; m <= 1; m++) {
12305 GemmMicrokernelTester()
12306 .mr(1)
12307 .nr(8)
12308 .kr(1)
12309 .sr(1)
12310 .m(m)
12311 .n(8)
12312 .k(1)
12313 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012314 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012315 }
12316 }
12317
Marat Dukhande06f492020-04-09 00:19:31 -070012318 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012319 TEST_REQUIRES_X86_SSE;
12320 for (uint32_t n = 1; n <= 8; n++) {
12321 GemmMicrokernelTester()
12322 .mr(1)
12323 .nr(8)
12324 .kr(1)
12325 .sr(1)
12326 .m(1)
12327 .n(n)
12328 .k(1)
12329 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012330 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012331 }
12332 }
12333
Marat Dukhande06f492020-04-09 00:19:31 -070012334 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012335 TEST_REQUIRES_X86_SSE;
12336 for (size_t k = 2; k < 10; k++) {
12337 GemmMicrokernelTester()
12338 .mr(1)
12339 .nr(8)
12340 .kr(1)
12341 .sr(1)
12342 .m(1)
12343 .n(8)
12344 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012345 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012346 }
12347 }
12348
Marat Dukhande06f492020-04-09 00:19:31 -070012349 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012350 TEST_REQUIRES_X86_SSE;
12351 for (size_t k = 2; k < 10; k++) {
12352 GemmMicrokernelTester()
12353 .mr(1)
12354 .nr(8)
12355 .kr(1)
12356 .sr(1)
12357 .m(1)
12358 .n(8)
12359 .k(k)
12360 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012361 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012362 }
12363 }
12364
Marat Dukhande06f492020-04-09 00:19:31 -070012365 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012366 TEST_REQUIRES_X86_SSE;
12367 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012368 for (uint32_t n = 1; n <= 8; n++) {
12369 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012370 GemmMicrokernelTester()
12371 .mr(1)
12372 .nr(8)
12373 .kr(1)
12374 .sr(1)
12375 .m(m)
12376 .n(n)
12377 .k(k)
12378 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012379 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012380 }
12381 }
12382 }
12383 }
12384
Marat Dukhande06f492020-04-09 00:19:31 -070012385 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012386 TEST_REQUIRES_X86_SSE;
12387 for (uint32_t n = 9; n < 16; n++) {
12388 for (size_t k = 1; k <= 5; k += 2) {
12389 GemmMicrokernelTester()
12390 .mr(1)
12391 .nr(8)
12392 .kr(1)
12393 .sr(1)
12394 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012395 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012396 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012397 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012398 }
12399 }
12400 }
12401
Marat Dukhande06f492020-04-09 00:19:31 -070012402 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012403 TEST_REQUIRES_X86_SSE;
12404 for (uint32_t n = 9; n < 16; n++) {
12405 for (size_t k = 1; k <= 5; k += 2) {
12406 GemmMicrokernelTester()
12407 .mr(1)
12408 .nr(8)
12409 .kr(1)
12410 .sr(1)
12411 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012412 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012413 .k(k)
12414 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012415 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012416 }
12417 }
12418 }
12419
Marat Dukhande06f492020-04-09 00:19:31 -070012420 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012421 TEST_REQUIRES_X86_SSE;
12422 for (uint32_t n = 9; n < 16; n++) {
12423 for (size_t k = 1; k <= 5; k += 2) {
12424 GemmMicrokernelTester()
12425 .mr(1)
12426 .nr(8)
12427 .kr(1)
12428 .sr(1)
12429 .m(1)
12430 .n(n)
12431 .k(k)
12432 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012433 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012434 }
12435 }
12436 }
12437
Marat Dukhande06f492020-04-09 00:19:31 -070012438 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012439 TEST_REQUIRES_X86_SSE;
12440 for (uint32_t n = 9; n < 16; n++) {
12441 for (size_t k = 1; k <= 5; k += 2) {
12442 for (uint32_t m = 1; m <= 1; m++) {
12443 GemmMicrokernelTester()
12444 .mr(1)
12445 .nr(8)
12446 .kr(1)
12447 .sr(1)
12448 .m(m)
12449 .n(n)
12450 .k(k)
12451 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012452 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012453 }
12454 }
12455 }
12456 }
12457
Marat Dukhande06f492020-04-09 00:19:31 -070012458 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012459 TEST_REQUIRES_X86_SSE;
12460 for (uint32_t n = 16; n <= 24; n += 8) {
12461 for (size_t k = 1; k <= 5; k += 2) {
12462 GemmMicrokernelTester()
12463 .mr(1)
12464 .nr(8)
12465 .kr(1)
12466 .sr(1)
12467 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012468 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012469 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012470 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012471 }
12472 }
12473 }
12474
Marat Dukhande06f492020-04-09 00:19:31 -070012475 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012476 TEST_REQUIRES_X86_SSE;
12477 for (uint32_t n = 16; n <= 24; n += 8) {
12478 for (size_t k = 1; k <= 5; k += 2) {
12479 GemmMicrokernelTester()
12480 .mr(1)
12481 .nr(8)
12482 .kr(1)
12483 .sr(1)
12484 .m(1)
12485 .n(n)
12486 .k(k)
12487 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012488 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012489 }
12490 }
12491 }
12492
Marat Dukhande06f492020-04-09 00:19:31 -070012493 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012494 TEST_REQUIRES_X86_SSE;
12495 for (uint32_t n = 16; n <= 24; n += 8) {
12496 for (size_t k = 1; k <= 5; k += 2) {
12497 GemmMicrokernelTester()
12498 .mr(1)
12499 .nr(8)
12500 .kr(1)
12501 .sr(1)
12502 .m(1)
12503 .n(n)
12504 .k(k)
12505 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012506 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012507 }
12508 }
12509 }
12510
Marat Dukhande06f492020-04-09 00:19:31 -070012511 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012512 TEST_REQUIRES_X86_SSE;
12513 for (uint32_t n = 16; n <= 24; n += 8) {
12514 for (size_t k = 1; k <= 5; k += 2) {
12515 for (uint32_t m = 1; m <= 1; m++) {
12516 GemmMicrokernelTester()
12517 .mr(1)
12518 .nr(8)
12519 .kr(1)
12520 .sr(1)
12521 .m(m)
12522 .n(n)
12523 .k(k)
12524 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012525 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012526 }
12527 }
12528 }
12529 }
12530
Marat Dukhande06f492020-04-09 00:19:31 -070012531 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012532 TEST_REQUIRES_X86_SSE;
12533 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012534 for (uint32_t n = 1; n <= 8; n++) {
12535 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012536 GemmMicrokernelTester()
12537 .mr(1)
12538 .nr(8)
12539 .kr(1)
12540 .sr(1)
12541 .m(m)
12542 .n(n)
12543 .k(k)
12544 .cm_stride(11)
12545 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012546 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012547 }
12548 }
12549 }
12550 }
12551
Marat Dukhande06f492020-04-09 00:19:31 -070012552 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012553 TEST_REQUIRES_X86_SSE;
12554 GemmMicrokernelTester()
12555 .mr(1)
12556 .nr(8)
12557 .kr(1)
12558 .sr(1)
12559 .m(1)
12560 .n(8)
12561 .k(1)
12562 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012563 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012564 }
12565
Marat Dukhande06f492020-04-09 00:19:31 -070012566 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012567 TEST_REQUIRES_X86_SSE;
12568 GemmMicrokernelTester()
12569 .mr(1)
12570 .nr(8)
12571 .kr(1)
12572 .sr(1)
12573 .m(1)
12574 .n(8)
12575 .k(1)
12576 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012577 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012578 }
12579
Marat Dukhande06f492020-04-09 00:19:31 -070012580 TEST(F32_GEMMINC_MINMAX_1X8__SSE_LOAD1, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012581 TEST_REQUIRES_X86_SSE;
12582 GemmMicrokernelTester()
12583 .mr(1)
12584 .nr(8)
12585 .kr(1)
12586 .sr(1)
12587 .m(1)
12588 .n(8)
12589 .k(1)
12590 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012591 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012592 }
12593#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12594
12595
12596#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070012597 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012598 TEST_REQUIRES_X86_SSE;
12599 GemmMicrokernelTester()
12600 .mr(1)
12601 .nr(8)
12602 .kr(1)
12603 .sr(1)
12604 .m(1)
12605 .n(8)
12606 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012607 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012608 }
12609
Marat Dukhande06f492020-04-09 00:19:31 -070012610 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012611 TEST_REQUIRES_X86_SSE;
12612 GemmMicrokernelTester()
12613 .mr(1)
12614 .nr(8)
12615 .kr(1)
12616 .sr(1)
12617 .m(1)
12618 .n(8)
12619 .k(4)
12620 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012621 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012622 }
12623
Marat Dukhande06f492020-04-09 00:19:31 -070012624 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012625 TEST_REQUIRES_X86_SSE;
12626 GemmMicrokernelTester()
12627 .mr(1)
12628 .nr(8)
12629 .kr(1)
12630 .sr(1)
12631 .m(1)
12632 .n(8)
12633 .k(4)
12634 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012635 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012636 }
12637
Marat Dukhande06f492020-04-09 00:19:31 -070012638 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012639 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012640 for (uint32_t n = 1; n <= 8; n++) {
12641 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012642 GemmMicrokernelTester()
12643 .mr(1)
12644 .nr(8)
12645 .kr(1)
12646 .sr(1)
12647 .m(m)
12648 .n(n)
12649 .k(4)
12650 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012651 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012652 }
12653 }
12654 }
12655
Marat Dukhande06f492020-04-09 00:19:31 -070012656 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012657 TEST_REQUIRES_X86_SSE;
12658 for (uint32_t m = 1; m <= 1; m++) {
12659 GemmMicrokernelTester()
12660 .mr(1)
12661 .nr(8)
12662 .kr(1)
12663 .sr(1)
12664 .m(m)
12665 .n(8)
12666 .k(4)
12667 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012668 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012669 }
12670 }
12671
Marat Dukhande06f492020-04-09 00:19:31 -070012672 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012673 TEST_REQUIRES_X86_SSE;
12674 for (uint32_t n = 1; n <= 8; n++) {
12675 GemmMicrokernelTester()
12676 .mr(1)
12677 .nr(8)
12678 .kr(1)
12679 .sr(1)
12680 .m(1)
12681 .n(n)
12682 .k(4)
12683 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012684 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012685 }
12686 }
12687
Marat Dukhande06f492020-04-09 00:19:31 -070012688 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012689 TEST_REQUIRES_X86_SSE;
12690 for (size_t k = 1; k < 4; k++) {
12691 GemmMicrokernelTester()
12692 .mr(1)
12693 .nr(8)
12694 .kr(1)
12695 .sr(1)
12696 .m(1)
12697 .n(8)
12698 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012699 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012700 }
12701 }
12702
Marat Dukhande06f492020-04-09 00:19:31 -070012703 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012704 TEST_REQUIRES_X86_SSE;
12705 for (size_t k = 1; k < 4; k++) {
12706 GemmMicrokernelTester()
12707 .mr(1)
12708 .nr(8)
12709 .kr(1)
12710 .sr(1)
12711 .m(1)
12712 .n(8)
12713 .k(k)
12714 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012715 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012716 }
12717 }
12718
Marat Dukhande06f492020-04-09 00:19:31 -070012719 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012720 TEST_REQUIRES_X86_SSE;
12721 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012722 for (uint32_t n = 1; n <= 8; n++) {
12723 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012724 GemmMicrokernelTester()
12725 .mr(1)
12726 .nr(8)
12727 .kr(1)
12728 .sr(1)
12729 .m(m)
12730 .n(n)
12731 .k(k)
12732 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012733 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012734 }
12735 }
12736 }
12737 }
12738
Marat Dukhande06f492020-04-09 00:19:31 -070012739 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012740 TEST_REQUIRES_X86_SSE;
12741 for (size_t k = 5; k < 8; k++) {
12742 GemmMicrokernelTester()
12743 .mr(1)
12744 .nr(8)
12745 .kr(1)
12746 .sr(1)
12747 .m(1)
12748 .n(8)
12749 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012750 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012751 }
12752 }
12753
Marat Dukhande06f492020-04-09 00:19:31 -070012754 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012755 TEST_REQUIRES_X86_SSE;
12756 for (size_t k = 5; k < 8; k++) {
12757 GemmMicrokernelTester()
12758 .mr(1)
12759 .nr(8)
12760 .kr(1)
12761 .sr(1)
12762 .m(1)
12763 .n(8)
12764 .k(k)
12765 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012766 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012767 }
12768 }
12769
Marat Dukhande06f492020-04-09 00:19:31 -070012770 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012771 TEST_REQUIRES_X86_SSE;
12772 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012773 for (uint32_t n = 1; n <= 8; n++) {
12774 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012775 GemmMicrokernelTester()
12776 .mr(1)
12777 .nr(8)
12778 .kr(1)
12779 .sr(1)
12780 .m(m)
12781 .n(n)
12782 .k(k)
12783 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012784 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012785 }
12786 }
12787 }
12788 }
12789
Marat Dukhande06f492020-04-09 00:19:31 -070012790 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012791 TEST_REQUIRES_X86_SSE;
12792 for (size_t k = 8; k <= 40; k += 4) {
12793 GemmMicrokernelTester()
12794 .mr(1)
12795 .nr(8)
12796 .kr(1)
12797 .sr(1)
12798 .m(1)
12799 .n(8)
12800 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012801 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012802 }
12803 }
12804
Marat Dukhande06f492020-04-09 00:19:31 -070012805 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012806 TEST_REQUIRES_X86_SSE;
12807 for (size_t k = 8; k <= 40; k += 4) {
12808 GemmMicrokernelTester()
12809 .mr(1)
12810 .nr(8)
12811 .kr(1)
12812 .sr(1)
12813 .m(1)
12814 .n(8)
12815 .k(k)
12816 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012817 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012818 }
12819 }
12820
Marat Dukhande06f492020-04-09 00:19:31 -070012821 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012822 TEST_REQUIRES_X86_SSE;
12823 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012824 for (uint32_t n = 1; n <= 8; n++) {
12825 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012826 GemmMicrokernelTester()
12827 .mr(1)
12828 .nr(8)
12829 .kr(1)
12830 .sr(1)
12831 .m(m)
12832 .n(n)
12833 .k(k)
12834 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012835 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012836 }
12837 }
12838 }
12839 }
12840
Marat Dukhande06f492020-04-09 00:19:31 -070012841 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012842 TEST_REQUIRES_X86_SSE;
12843 for (uint32_t n = 9; n < 16; n++) {
12844 for (size_t k = 1; k <= 20; k += 5) {
12845 GemmMicrokernelTester()
12846 .mr(1)
12847 .nr(8)
12848 .kr(1)
12849 .sr(1)
12850 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012851 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012852 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012853 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012854 }
12855 }
12856 }
12857
Marat Dukhande06f492020-04-09 00:19:31 -070012858 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012859 TEST_REQUIRES_X86_SSE;
12860 for (uint32_t n = 9; n < 16; n++) {
12861 for (size_t k = 1; k <= 20; k += 5) {
12862 GemmMicrokernelTester()
12863 .mr(1)
12864 .nr(8)
12865 .kr(1)
12866 .sr(1)
12867 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012868 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012869 .k(k)
12870 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012871 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012872 }
12873 }
12874 }
12875
Marat Dukhande06f492020-04-09 00:19:31 -070012876 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012877 TEST_REQUIRES_X86_SSE;
12878 for (uint32_t n = 9; n < 16; n++) {
12879 for (size_t k = 1; k <= 20; k += 5) {
12880 GemmMicrokernelTester()
12881 .mr(1)
12882 .nr(8)
12883 .kr(1)
12884 .sr(1)
12885 .m(1)
12886 .n(n)
12887 .k(k)
12888 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012889 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012890 }
12891 }
12892 }
12893
Marat Dukhande06f492020-04-09 00:19:31 -070012894 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012895 TEST_REQUIRES_X86_SSE;
12896 for (uint32_t n = 9; n < 16; n++) {
12897 for (size_t k = 1; k <= 20; k += 5) {
12898 for (uint32_t m = 1; m <= 1; m++) {
12899 GemmMicrokernelTester()
12900 .mr(1)
12901 .nr(8)
12902 .kr(1)
12903 .sr(1)
12904 .m(m)
12905 .n(n)
12906 .k(k)
12907 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012908 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012909 }
12910 }
12911 }
12912 }
12913
Marat Dukhande06f492020-04-09 00:19:31 -070012914 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012915 TEST_REQUIRES_X86_SSE;
12916 for (uint32_t n = 16; n <= 24; n += 8) {
12917 for (size_t k = 1; k <= 20; k += 5) {
12918 GemmMicrokernelTester()
12919 .mr(1)
12920 .nr(8)
12921 .kr(1)
12922 .sr(1)
12923 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012924 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012925 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012926 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012927 }
12928 }
12929 }
12930
Marat Dukhande06f492020-04-09 00:19:31 -070012931 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012932 TEST_REQUIRES_X86_SSE;
12933 for (uint32_t n = 16; n <= 24; n += 8) {
12934 for (size_t k = 1; k <= 20; k += 5) {
12935 GemmMicrokernelTester()
12936 .mr(1)
12937 .nr(8)
12938 .kr(1)
12939 .sr(1)
12940 .m(1)
12941 .n(n)
12942 .k(k)
12943 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012944 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012945 }
12946 }
12947 }
12948
Marat Dukhande06f492020-04-09 00:19:31 -070012949 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012950 TEST_REQUIRES_X86_SSE;
12951 for (uint32_t n = 16; n <= 24; n += 8) {
12952 for (size_t k = 1; k <= 20; k += 5) {
12953 GemmMicrokernelTester()
12954 .mr(1)
12955 .nr(8)
12956 .kr(1)
12957 .sr(1)
12958 .m(1)
12959 .n(n)
12960 .k(k)
12961 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012962 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012963 }
12964 }
12965 }
12966
Marat Dukhande06f492020-04-09 00:19:31 -070012967 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012968 TEST_REQUIRES_X86_SSE;
12969 for (uint32_t n = 16; n <= 24; n += 8) {
12970 for (size_t k = 1; k <= 20; k += 5) {
12971 for (uint32_t m = 1; m <= 1; m++) {
12972 GemmMicrokernelTester()
12973 .mr(1)
12974 .nr(8)
12975 .kr(1)
12976 .sr(1)
12977 .m(m)
12978 .n(n)
12979 .k(k)
12980 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012981 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012982 }
12983 }
12984 }
12985 }
12986
Marat Dukhande06f492020-04-09 00:19:31 -070012987 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012988 TEST_REQUIRES_X86_SSE;
12989 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012990 for (uint32_t n = 1; n <= 8; n++) {
12991 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012992 GemmMicrokernelTester()
12993 .mr(1)
12994 .nr(8)
12995 .kr(1)
12996 .sr(1)
12997 .m(m)
12998 .n(n)
12999 .k(k)
13000 .cm_stride(11)
13001 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013002 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013003 }
13004 }
13005 }
13006 }
13007
Marat Dukhande06f492020-04-09 00:19:31 -070013008 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013009 TEST_REQUIRES_X86_SSE;
13010 GemmMicrokernelTester()
13011 .mr(1)
13012 .nr(8)
13013 .kr(1)
13014 .sr(1)
13015 .m(1)
13016 .n(8)
13017 .k(4)
13018 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013019 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013020 }
13021
Marat Dukhande06f492020-04-09 00:19:31 -070013022 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013023 TEST_REQUIRES_X86_SSE;
13024 GemmMicrokernelTester()
13025 .mr(1)
13026 .nr(8)
13027 .kr(1)
13028 .sr(1)
13029 .m(1)
13030 .n(8)
13031 .k(4)
13032 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013033 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013034 }
13035
Marat Dukhande06f492020-04-09 00:19:31 -070013036 TEST(F32_GEMMINC_MINMAX_1X8__SSE_DUP, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013037 TEST_REQUIRES_X86_SSE;
13038 GemmMicrokernelTester()
13039 .mr(1)
13040 .nr(8)
13041 .kr(1)
13042 .sr(1)
13043 .m(1)
13044 .n(8)
13045 .k(4)
13046 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013047 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013048 }
13049#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13050
13051
13052#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070013053 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013054 TEST_REQUIRES_X86_SSE;
13055 GemmMicrokernelTester()
13056 .mr(4)
13057 .nr(8)
13058 .kr(1)
13059 .sr(1)
13060 .m(4)
13061 .n(8)
13062 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013063 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013064 }
13065
Marat Dukhande06f492020-04-09 00:19:31 -070013066 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013067 TEST_REQUIRES_X86_SSE;
13068 GemmMicrokernelTester()
13069 .mr(4)
13070 .nr(8)
13071 .kr(1)
13072 .sr(1)
13073 .m(4)
13074 .n(8)
13075 .k(4)
13076 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013077 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013078 }
13079
Marat Dukhande06f492020-04-09 00:19:31 -070013080 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013081 TEST_REQUIRES_X86_SSE;
13082 GemmMicrokernelTester()
13083 .mr(4)
13084 .nr(8)
13085 .kr(1)
13086 .sr(1)
13087 .m(4)
13088 .n(8)
13089 .k(4)
13090 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013091 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013092 }
13093
Marat Dukhande06f492020-04-09 00:19:31 -070013094 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013095 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013096 for (uint32_t n = 1; n <= 8; n++) {
13097 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013098 GemmMicrokernelTester()
13099 .mr(4)
13100 .nr(8)
13101 .kr(1)
13102 .sr(1)
13103 .m(m)
13104 .n(n)
13105 .k(4)
13106 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013107 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013108 }
13109 }
13110 }
13111
Marat Dukhande06f492020-04-09 00:19:31 -070013112 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013113 TEST_REQUIRES_X86_SSE;
13114 for (uint32_t m = 1; m <= 4; m++) {
13115 GemmMicrokernelTester()
13116 .mr(4)
13117 .nr(8)
13118 .kr(1)
13119 .sr(1)
13120 .m(m)
13121 .n(8)
13122 .k(4)
13123 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013124 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013125 }
13126 }
13127
Marat Dukhande06f492020-04-09 00:19:31 -070013128 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013129 TEST_REQUIRES_X86_SSE;
13130 for (uint32_t n = 1; n <= 8; n++) {
13131 GemmMicrokernelTester()
13132 .mr(4)
13133 .nr(8)
13134 .kr(1)
13135 .sr(1)
13136 .m(4)
13137 .n(n)
13138 .k(4)
13139 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013140 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013141 }
13142 }
13143
Marat Dukhande06f492020-04-09 00:19:31 -070013144 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013145 TEST_REQUIRES_X86_SSE;
13146 for (size_t k = 1; k < 4; k++) {
13147 GemmMicrokernelTester()
13148 .mr(4)
13149 .nr(8)
13150 .kr(1)
13151 .sr(1)
13152 .m(4)
13153 .n(8)
13154 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013155 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013156 }
13157 }
13158
Marat Dukhande06f492020-04-09 00:19:31 -070013159 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013160 TEST_REQUIRES_X86_SSE;
13161 for (size_t k = 1; k < 4; k++) {
13162 GemmMicrokernelTester()
13163 .mr(4)
13164 .nr(8)
13165 .kr(1)
13166 .sr(1)
13167 .m(4)
13168 .n(8)
13169 .k(k)
13170 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013171 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013172 }
13173 }
13174
Marat Dukhande06f492020-04-09 00:19:31 -070013175 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013176 TEST_REQUIRES_X86_SSE;
13177 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013178 for (uint32_t n = 1; n <= 8; n++) {
13179 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013180 GemmMicrokernelTester()
13181 .mr(4)
13182 .nr(8)
13183 .kr(1)
13184 .sr(1)
13185 .m(m)
13186 .n(n)
13187 .k(k)
13188 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013189 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013190 }
13191 }
13192 }
13193 }
13194
Marat Dukhande06f492020-04-09 00:19:31 -070013195 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013196 TEST_REQUIRES_X86_SSE;
13197 for (size_t k = 5; k < 8; k++) {
13198 GemmMicrokernelTester()
13199 .mr(4)
13200 .nr(8)
13201 .kr(1)
13202 .sr(1)
13203 .m(4)
13204 .n(8)
13205 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013206 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013207 }
13208 }
13209
Marat Dukhande06f492020-04-09 00:19:31 -070013210 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013211 TEST_REQUIRES_X86_SSE;
13212 for (size_t k = 5; k < 8; k++) {
13213 GemmMicrokernelTester()
13214 .mr(4)
13215 .nr(8)
13216 .kr(1)
13217 .sr(1)
13218 .m(4)
13219 .n(8)
13220 .k(k)
13221 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013222 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013223 }
13224 }
13225
Marat Dukhande06f492020-04-09 00:19:31 -070013226 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013227 TEST_REQUIRES_X86_SSE;
13228 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013229 for (uint32_t n = 1; n <= 8; n++) {
13230 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013231 GemmMicrokernelTester()
13232 .mr(4)
13233 .nr(8)
13234 .kr(1)
13235 .sr(1)
13236 .m(m)
13237 .n(n)
13238 .k(k)
13239 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013240 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013241 }
13242 }
13243 }
13244 }
13245
Marat Dukhande06f492020-04-09 00:19:31 -070013246 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013247 TEST_REQUIRES_X86_SSE;
13248 for (size_t k = 8; k <= 40; k += 4) {
13249 GemmMicrokernelTester()
13250 .mr(4)
13251 .nr(8)
13252 .kr(1)
13253 .sr(1)
13254 .m(4)
13255 .n(8)
13256 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013257 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013258 }
13259 }
13260
Marat Dukhande06f492020-04-09 00:19:31 -070013261 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013262 TEST_REQUIRES_X86_SSE;
13263 for (size_t k = 8; k <= 40; k += 4) {
13264 GemmMicrokernelTester()
13265 .mr(4)
13266 .nr(8)
13267 .kr(1)
13268 .sr(1)
13269 .m(4)
13270 .n(8)
13271 .k(k)
13272 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013273 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013274 }
13275 }
13276
Marat Dukhande06f492020-04-09 00:19:31 -070013277 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013278 TEST_REQUIRES_X86_SSE;
13279 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013280 for (uint32_t n = 1; n <= 8; n++) {
13281 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013282 GemmMicrokernelTester()
13283 .mr(4)
13284 .nr(8)
13285 .kr(1)
13286 .sr(1)
13287 .m(m)
13288 .n(n)
13289 .k(k)
13290 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013291 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013292 }
13293 }
13294 }
13295 }
13296
Marat Dukhande06f492020-04-09 00:19:31 -070013297 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013298 TEST_REQUIRES_X86_SSE;
13299 for (uint32_t n = 9; n < 16; n++) {
13300 for (size_t k = 1; k <= 20; k += 5) {
13301 GemmMicrokernelTester()
13302 .mr(4)
13303 .nr(8)
13304 .kr(1)
13305 .sr(1)
13306 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013307 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013308 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013309 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013310 }
13311 }
13312 }
13313
Marat Dukhande06f492020-04-09 00:19:31 -070013314 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013315 TEST_REQUIRES_X86_SSE;
13316 for (uint32_t n = 9; n < 16; n++) {
13317 for (size_t k = 1; k <= 20; k += 5) {
13318 GemmMicrokernelTester()
13319 .mr(4)
13320 .nr(8)
13321 .kr(1)
13322 .sr(1)
13323 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013324 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013325 .k(k)
13326 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013327 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013328 }
13329 }
13330 }
13331
Marat Dukhande06f492020-04-09 00:19:31 -070013332 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013333 TEST_REQUIRES_X86_SSE;
13334 for (uint32_t n = 9; n < 16; n++) {
13335 for (size_t k = 1; k <= 20; k += 5) {
13336 GemmMicrokernelTester()
13337 .mr(4)
13338 .nr(8)
13339 .kr(1)
13340 .sr(1)
13341 .m(4)
13342 .n(n)
13343 .k(k)
13344 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013345 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013346 }
13347 }
13348 }
13349
Marat Dukhande06f492020-04-09 00:19:31 -070013350 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013351 TEST_REQUIRES_X86_SSE;
13352 for (uint32_t n = 9; n < 16; n++) {
13353 for (size_t k = 1; k <= 20; k += 5) {
13354 for (uint32_t m = 1; m <= 4; m++) {
13355 GemmMicrokernelTester()
13356 .mr(4)
13357 .nr(8)
13358 .kr(1)
13359 .sr(1)
13360 .m(m)
13361 .n(n)
13362 .k(k)
13363 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013364 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013365 }
13366 }
13367 }
13368 }
13369
Marat Dukhande06f492020-04-09 00:19:31 -070013370 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013371 TEST_REQUIRES_X86_SSE;
13372 for (uint32_t n = 16; n <= 24; n += 8) {
13373 for (size_t k = 1; k <= 20; k += 5) {
13374 GemmMicrokernelTester()
13375 .mr(4)
13376 .nr(8)
13377 .kr(1)
13378 .sr(1)
13379 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013380 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013381 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013382 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013383 }
13384 }
13385 }
13386
Marat Dukhande06f492020-04-09 00:19:31 -070013387 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013388 TEST_REQUIRES_X86_SSE;
13389 for (uint32_t n = 16; n <= 24; n += 8) {
13390 for (size_t k = 1; k <= 20; k += 5) {
13391 GemmMicrokernelTester()
13392 .mr(4)
13393 .nr(8)
13394 .kr(1)
13395 .sr(1)
13396 .m(4)
13397 .n(n)
13398 .k(k)
13399 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013400 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013401 }
13402 }
13403 }
13404
Marat Dukhande06f492020-04-09 00:19:31 -070013405 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013406 TEST_REQUIRES_X86_SSE;
13407 for (uint32_t n = 16; n <= 24; n += 8) {
13408 for (size_t k = 1; k <= 20; k += 5) {
13409 GemmMicrokernelTester()
13410 .mr(4)
13411 .nr(8)
13412 .kr(1)
13413 .sr(1)
13414 .m(4)
13415 .n(n)
13416 .k(k)
13417 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013418 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013419 }
13420 }
13421 }
13422
Marat Dukhande06f492020-04-09 00:19:31 -070013423 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013424 TEST_REQUIRES_X86_SSE;
13425 for (uint32_t n = 16; n <= 24; n += 8) {
13426 for (size_t k = 1; k <= 20; k += 5) {
13427 for (uint32_t m = 1; m <= 4; m++) {
13428 GemmMicrokernelTester()
13429 .mr(4)
13430 .nr(8)
13431 .kr(1)
13432 .sr(1)
13433 .m(m)
13434 .n(n)
13435 .k(k)
13436 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013437 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013438 }
13439 }
13440 }
13441 }
13442
Marat Dukhande06f492020-04-09 00:19:31 -070013443 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013444 TEST_REQUIRES_X86_SSE;
13445 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013446 for (uint32_t n = 1; n <= 8; n++) {
13447 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013448 GemmMicrokernelTester()
13449 .mr(4)
13450 .nr(8)
13451 .kr(1)
13452 .sr(1)
13453 .m(m)
13454 .n(n)
13455 .k(k)
13456 .cm_stride(11)
13457 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013458 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013459 }
13460 }
13461 }
13462 }
13463
Marat Dukhande06f492020-04-09 00:19:31 -070013464 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013465 TEST_REQUIRES_X86_SSE;
13466 GemmMicrokernelTester()
13467 .mr(4)
13468 .nr(8)
13469 .kr(1)
13470 .sr(1)
13471 .m(4)
13472 .n(8)
13473 .k(4)
13474 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013475 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013476 }
13477
Marat Dukhande06f492020-04-09 00:19:31 -070013478 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013479 TEST_REQUIRES_X86_SSE;
13480 GemmMicrokernelTester()
13481 .mr(4)
13482 .nr(8)
13483 .kr(1)
13484 .sr(1)
13485 .m(4)
13486 .n(8)
13487 .k(4)
13488 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013489 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013490 }
13491
Marat Dukhande06f492020-04-09 00:19:31 -070013492 TEST(F32_GEMMINC_MINMAX_4X8__SSE_DUP, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013493 TEST_REQUIRES_X86_SSE;
13494 GemmMicrokernelTester()
13495 .mr(4)
13496 .nr(8)
13497 .kr(1)
13498 .sr(1)
13499 .m(4)
13500 .n(8)
13501 .k(4)
13502 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013503 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013504 }
13505#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13506
13507
13508#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080013509 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4) {
13510 TEST_REQUIRES_X86_SSE;
13511 GemmMicrokernelTester()
13512 .mr(3)
13513 .nr(8)
13514 .kr(1)
13515 .sr(4)
13516 .m(3)
13517 .n(8)
13518 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013519 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013520 }
13521
13522 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, strided_cn) {
13523 TEST_REQUIRES_X86_SSE;
13524 GemmMicrokernelTester()
13525 .mr(3)
13526 .nr(8)
13527 .kr(1)
13528 .sr(4)
13529 .m(3)
13530 .n(8)
13531 .k(4)
13532 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013533 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013534 }
13535
13536 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_strided_a) {
13537 TEST_REQUIRES_X86_SSE;
13538 GemmMicrokernelTester()
13539 .mr(3)
13540 .nr(8)
13541 .kr(1)
13542 .sr(4)
13543 .m(3)
13544 .n(8)
13545 .k(4)
13546 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013547 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013548 }
13549
13550 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_subtile) {
13551 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013552 for (uint32_t n = 1; n <= 8; n++) {
13553 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013554 GemmMicrokernelTester()
13555 .mr(3)
13556 .nr(8)
13557 .kr(1)
13558 .sr(4)
13559 .m(m)
13560 .n(n)
13561 .k(4)
13562 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013563 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013564 }
13565 }
13566 }
13567
13568 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_subtile_m) {
13569 TEST_REQUIRES_X86_SSE;
13570 for (uint32_t m = 1; m <= 3; m++) {
13571 GemmMicrokernelTester()
13572 .mr(3)
13573 .nr(8)
13574 .kr(1)
13575 .sr(4)
13576 .m(m)
13577 .n(8)
13578 .k(4)
13579 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013580 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013581 }
13582 }
13583
13584 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_eq_4_subtile_n) {
13585 TEST_REQUIRES_X86_SSE;
13586 for (uint32_t n = 1; n <= 8; n++) {
13587 GemmMicrokernelTester()
13588 .mr(3)
13589 .nr(8)
13590 .kr(1)
13591 .sr(4)
13592 .m(3)
13593 .n(n)
13594 .k(4)
13595 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013596 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013597 }
13598 }
13599
13600 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_lt_4) {
13601 TEST_REQUIRES_X86_SSE;
13602 for (size_t k = 1; k < 4; k++) {
13603 GemmMicrokernelTester()
13604 .mr(3)
13605 .nr(8)
13606 .kr(1)
13607 .sr(4)
13608 .m(3)
13609 .n(8)
13610 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013611 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013612 }
13613 }
13614
13615 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_lt_4_strided_a) {
13616 TEST_REQUIRES_X86_SSE;
13617 for (size_t k = 1; k < 4; k++) {
13618 GemmMicrokernelTester()
13619 .mr(3)
13620 .nr(8)
13621 .kr(1)
13622 .sr(4)
13623 .m(3)
13624 .n(8)
13625 .k(k)
13626 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013627 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013628 }
13629 }
13630
13631 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_lt_4_subtile) {
13632 TEST_REQUIRES_X86_SSE;
13633 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013634 for (uint32_t n = 1; n <= 8; n++) {
13635 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013636 GemmMicrokernelTester()
13637 .mr(3)
13638 .nr(8)
13639 .kr(1)
13640 .sr(4)
13641 .m(m)
13642 .n(n)
13643 .k(k)
13644 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013645 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013646 }
13647 }
13648 }
13649 }
13650
13651 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_gt_4) {
13652 TEST_REQUIRES_X86_SSE;
13653 for (size_t k = 5; k < 8; k++) {
13654 GemmMicrokernelTester()
13655 .mr(3)
13656 .nr(8)
13657 .kr(1)
13658 .sr(4)
13659 .m(3)
13660 .n(8)
13661 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013662 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013663 }
13664 }
13665
13666 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_gt_4_strided_a) {
13667 TEST_REQUIRES_X86_SSE;
13668 for (size_t k = 5; k < 8; k++) {
13669 GemmMicrokernelTester()
13670 .mr(3)
13671 .nr(8)
13672 .kr(1)
13673 .sr(4)
13674 .m(3)
13675 .n(8)
13676 .k(k)
13677 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013678 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013679 }
13680 }
13681
13682 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_gt_4_subtile) {
13683 TEST_REQUIRES_X86_SSE;
13684 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013685 for (uint32_t n = 1; n <= 8; n++) {
13686 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013687 GemmMicrokernelTester()
13688 .mr(3)
13689 .nr(8)
13690 .kr(1)
13691 .sr(4)
13692 .m(m)
13693 .n(n)
13694 .k(k)
13695 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013696 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013697 }
13698 }
13699 }
13700 }
13701
13702 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_div_4) {
13703 TEST_REQUIRES_X86_SSE;
13704 for (size_t k = 8; k <= 40; k += 4) {
13705 GemmMicrokernelTester()
13706 .mr(3)
13707 .nr(8)
13708 .kr(1)
13709 .sr(4)
13710 .m(3)
13711 .n(8)
13712 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013713 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013714 }
13715 }
13716
13717 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_div_4_strided_a) {
13718 TEST_REQUIRES_X86_SSE;
13719 for (size_t k = 8; k <= 40; k += 4) {
13720 GemmMicrokernelTester()
13721 .mr(3)
13722 .nr(8)
13723 .kr(1)
13724 .sr(4)
13725 .m(3)
13726 .n(8)
13727 .k(k)
13728 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013729 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013730 }
13731 }
13732
13733 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, k_div_4_subtile) {
13734 TEST_REQUIRES_X86_SSE;
13735 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013736 for (uint32_t n = 1; n <= 8; n++) {
13737 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013738 GemmMicrokernelTester()
13739 .mr(3)
13740 .nr(8)
13741 .kr(1)
13742 .sr(4)
13743 .m(m)
13744 .n(n)
13745 .k(k)
13746 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013747 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013748 }
13749 }
13750 }
13751 }
13752
13753 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8) {
13754 TEST_REQUIRES_X86_SSE;
13755 for (uint32_t n = 9; n < 16; n++) {
13756 for (size_t k = 1; k <= 20; k += 5) {
13757 GemmMicrokernelTester()
13758 .mr(3)
13759 .nr(8)
13760 .kr(1)
13761 .sr(4)
13762 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013763 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013764 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013765 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013766 }
13767 }
13768 }
13769
13770 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8_strided_cn) {
13771 TEST_REQUIRES_X86_SSE;
13772 for (uint32_t n = 9; n < 16; n++) {
13773 for (size_t k = 1; k <= 20; k += 5) {
13774 GemmMicrokernelTester()
13775 .mr(3)
13776 .nr(8)
13777 .kr(1)
13778 .sr(4)
13779 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013780 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013781 .k(k)
13782 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013783 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013784 }
13785 }
13786 }
13787
13788 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8_strided_a) {
13789 TEST_REQUIRES_X86_SSE;
13790 for (uint32_t n = 9; n < 16; n++) {
13791 for (size_t k = 1; k <= 20; k += 5) {
13792 GemmMicrokernelTester()
13793 .mr(3)
13794 .nr(8)
13795 .kr(1)
13796 .sr(4)
13797 .m(3)
13798 .n(n)
13799 .k(k)
13800 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013801 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013802 }
13803 }
13804 }
13805
13806 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_gt_8_subtile) {
13807 TEST_REQUIRES_X86_SSE;
13808 for (uint32_t n = 9; n < 16; n++) {
13809 for (size_t k = 1; k <= 20; k += 5) {
13810 for (uint32_t m = 1; m <= 3; m++) {
13811 GemmMicrokernelTester()
13812 .mr(3)
13813 .nr(8)
13814 .kr(1)
13815 .sr(4)
13816 .m(m)
13817 .n(n)
13818 .k(k)
13819 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013820 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013821 }
13822 }
13823 }
13824 }
13825
13826 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8) {
13827 TEST_REQUIRES_X86_SSE;
13828 for (uint32_t n = 16; n <= 24; n += 8) {
13829 for (size_t k = 1; k <= 20; k += 5) {
13830 GemmMicrokernelTester()
13831 .mr(3)
13832 .nr(8)
13833 .kr(1)
13834 .sr(4)
13835 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013836 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013837 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013838 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013839 }
13840 }
13841 }
13842
13843 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8_strided_cn) {
13844 TEST_REQUIRES_X86_SSE;
13845 for (uint32_t n = 16; n <= 24; n += 8) {
13846 for (size_t k = 1; k <= 20; k += 5) {
13847 GemmMicrokernelTester()
13848 .mr(3)
13849 .nr(8)
13850 .kr(1)
13851 .sr(4)
13852 .m(3)
13853 .n(n)
13854 .k(k)
13855 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013856 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013857 }
13858 }
13859 }
13860
13861 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8_strided_a) {
13862 TEST_REQUIRES_X86_SSE;
13863 for (uint32_t n = 16; n <= 24; n += 8) {
13864 for (size_t k = 1; k <= 20; k += 5) {
13865 GemmMicrokernelTester()
13866 .mr(3)
13867 .nr(8)
13868 .kr(1)
13869 .sr(4)
13870 .m(3)
13871 .n(n)
13872 .k(k)
13873 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013874 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013875 }
13876 }
13877 }
13878
13879 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, n_div_8_subtile) {
13880 TEST_REQUIRES_X86_SSE;
13881 for (uint32_t n = 16; n <= 24; n += 8) {
13882 for (size_t k = 1; k <= 20; k += 5) {
13883 for (uint32_t m = 1; m <= 3; m++) {
13884 GemmMicrokernelTester()
13885 .mr(3)
13886 .nr(8)
13887 .kr(1)
13888 .sr(4)
13889 .m(m)
13890 .n(n)
13891 .k(k)
13892 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013893 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013894 }
13895 }
13896 }
13897 }
13898
13899 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, strided_cm_subtile) {
13900 TEST_REQUIRES_X86_SSE;
13901 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013902 for (uint32_t n = 1; n <= 8; n++) {
13903 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013904 GemmMicrokernelTester()
13905 .mr(3)
13906 .nr(8)
13907 .kr(1)
13908 .sr(4)
13909 .m(m)
13910 .n(n)
13911 .k(k)
13912 .cm_stride(11)
13913 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013914 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013915 }
13916 }
13917 }
13918 }
13919
13920 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, qmin) {
13921 TEST_REQUIRES_X86_SSE;
13922 GemmMicrokernelTester()
13923 .mr(3)
13924 .nr(8)
13925 .kr(1)
13926 .sr(4)
13927 .m(3)
13928 .n(8)
13929 .k(4)
13930 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013931 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013932 }
13933
13934 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, qmax) {
13935 TEST_REQUIRES_X86_SSE;
13936 GemmMicrokernelTester()
13937 .mr(3)
13938 .nr(8)
13939 .kr(1)
13940 .sr(4)
13941 .m(3)
13942 .n(8)
13943 .k(4)
13944 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013945 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013946 }
13947
13948 TEST(F32_GEMMINC_MINMAX_3X8S4__SSE, strided_cm) {
13949 TEST_REQUIRES_X86_SSE;
13950 GemmMicrokernelTester()
13951 .mr(3)
13952 .nr(8)
13953 .kr(1)
13954 .sr(4)
13955 .m(3)
13956 .n(8)
13957 .k(4)
13958 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013959 .Test(xnn_f32_gemminc_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013960 }
13961#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13962
13963
13964#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080013965 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4) {
13966 TEST_REQUIRES_X86_SSE2;
13967 GemmMicrokernelTester()
13968 .mr(3)
13969 .nr(8)
13970 .kr(1)
13971 .sr(1)
13972 .m(3)
13973 .n(8)
13974 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013975 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013976 }
13977
13978 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, strided_cn) {
13979 TEST_REQUIRES_X86_SSE2;
13980 GemmMicrokernelTester()
13981 .mr(3)
13982 .nr(8)
13983 .kr(1)
13984 .sr(1)
13985 .m(3)
13986 .n(8)
13987 .k(4)
13988 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013989 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013990 }
13991
13992 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_strided_a) {
13993 TEST_REQUIRES_X86_SSE2;
13994 GemmMicrokernelTester()
13995 .mr(3)
13996 .nr(8)
13997 .kr(1)
13998 .sr(1)
13999 .m(3)
14000 .n(8)
14001 .k(4)
14002 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014003 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014004 }
14005
14006 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile) {
14007 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014008 for (uint32_t n = 1; n <= 8; n++) {
14009 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014010 GemmMicrokernelTester()
14011 .mr(3)
14012 .nr(8)
14013 .kr(1)
14014 .sr(1)
14015 .m(m)
14016 .n(n)
14017 .k(4)
14018 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014019 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014020 }
14021 }
14022 }
14023
14024 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_m) {
14025 TEST_REQUIRES_X86_SSE2;
14026 for (uint32_t m = 1; m <= 3; m++) {
14027 GemmMicrokernelTester()
14028 .mr(3)
14029 .nr(8)
14030 .kr(1)
14031 .sr(1)
14032 .m(m)
14033 .n(8)
14034 .k(4)
14035 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014036 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014037 }
14038 }
14039
14040 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_n) {
14041 TEST_REQUIRES_X86_SSE2;
14042 for (uint32_t n = 1; n <= 8; n++) {
14043 GemmMicrokernelTester()
14044 .mr(3)
14045 .nr(8)
14046 .kr(1)
14047 .sr(1)
14048 .m(3)
14049 .n(n)
14050 .k(4)
14051 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014052 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014053 }
14054 }
14055
14056 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_lt_4) {
14057 TEST_REQUIRES_X86_SSE2;
14058 for (size_t k = 1; k < 4; k++) {
14059 GemmMicrokernelTester()
14060 .mr(3)
14061 .nr(8)
14062 .kr(1)
14063 .sr(1)
14064 .m(3)
14065 .n(8)
14066 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014067 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014068 }
14069 }
14070
14071 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_lt_4_strided_a) {
14072 TEST_REQUIRES_X86_SSE2;
14073 for (size_t k = 1; k < 4; k++) {
14074 GemmMicrokernelTester()
14075 .mr(3)
14076 .nr(8)
14077 .kr(1)
14078 .sr(1)
14079 .m(3)
14080 .n(8)
14081 .k(k)
14082 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014083 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014084 }
14085 }
14086
14087 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_lt_4_subtile) {
14088 TEST_REQUIRES_X86_SSE2;
14089 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014090 for (uint32_t n = 1; n <= 8; n++) {
14091 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014092 GemmMicrokernelTester()
14093 .mr(3)
14094 .nr(8)
14095 .kr(1)
14096 .sr(1)
14097 .m(m)
14098 .n(n)
14099 .k(k)
14100 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014101 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014102 }
14103 }
14104 }
14105 }
14106
14107 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_gt_4) {
14108 TEST_REQUIRES_X86_SSE2;
14109 for (size_t k = 5; k < 8; k++) {
14110 GemmMicrokernelTester()
14111 .mr(3)
14112 .nr(8)
14113 .kr(1)
14114 .sr(1)
14115 .m(3)
14116 .n(8)
14117 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014118 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014119 }
14120 }
14121
14122 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_gt_4_strided_a) {
14123 TEST_REQUIRES_X86_SSE2;
14124 for (size_t k = 5; k < 8; k++) {
14125 GemmMicrokernelTester()
14126 .mr(3)
14127 .nr(8)
14128 .kr(1)
14129 .sr(1)
14130 .m(3)
14131 .n(8)
14132 .k(k)
14133 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014134 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014135 }
14136 }
14137
14138 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_gt_4_subtile) {
14139 TEST_REQUIRES_X86_SSE2;
14140 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014141 for (uint32_t n = 1; n <= 8; n++) {
14142 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014143 GemmMicrokernelTester()
14144 .mr(3)
14145 .nr(8)
14146 .kr(1)
14147 .sr(1)
14148 .m(m)
14149 .n(n)
14150 .k(k)
14151 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014152 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014153 }
14154 }
14155 }
14156 }
14157
14158 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_div_4) {
14159 TEST_REQUIRES_X86_SSE2;
14160 for (size_t k = 8; k <= 40; k += 4) {
14161 GemmMicrokernelTester()
14162 .mr(3)
14163 .nr(8)
14164 .kr(1)
14165 .sr(1)
14166 .m(3)
14167 .n(8)
14168 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014169 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014170 }
14171 }
14172
14173 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_div_4_strided_a) {
14174 TEST_REQUIRES_X86_SSE2;
14175 for (size_t k = 8; k <= 40; k += 4) {
14176 GemmMicrokernelTester()
14177 .mr(3)
14178 .nr(8)
14179 .kr(1)
14180 .sr(1)
14181 .m(3)
14182 .n(8)
14183 .k(k)
14184 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014185 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014186 }
14187 }
14188
14189 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, k_div_4_subtile) {
14190 TEST_REQUIRES_X86_SSE2;
14191 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014192 for (uint32_t n = 1; n <= 8; n++) {
14193 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014194 GemmMicrokernelTester()
14195 .mr(3)
14196 .nr(8)
14197 .kr(1)
14198 .sr(1)
14199 .m(m)
14200 .n(n)
14201 .k(k)
14202 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014203 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014204 }
14205 }
14206 }
14207 }
14208
14209 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8) {
14210 TEST_REQUIRES_X86_SSE2;
14211 for (uint32_t n = 9; n < 16; n++) {
14212 for (size_t k = 1; k <= 20; k += 5) {
14213 GemmMicrokernelTester()
14214 .mr(3)
14215 .nr(8)
14216 .kr(1)
14217 .sr(1)
14218 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014219 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014220 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014221 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014222 }
14223 }
14224 }
14225
14226 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8_strided_cn) {
14227 TEST_REQUIRES_X86_SSE2;
14228 for (uint32_t n = 9; n < 16; n++) {
14229 for (size_t k = 1; k <= 20; k += 5) {
14230 GemmMicrokernelTester()
14231 .mr(3)
14232 .nr(8)
14233 .kr(1)
14234 .sr(1)
14235 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014236 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014237 .k(k)
14238 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014239 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014240 }
14241 }
14242 }
14243
14244 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8_strided_a) {
14245 TEST_REQUIRES_X86_SSE2;
14246 for (uint32_t n = 9; n < 16; n++) {
14247 for (size_t k = 1; k <= 20; k += 5) {
14248 GemmMicrokernelTester()
14249 .mr(3)
14250 .nr(8)
14251 .kr(1)
14252 .sr(1)
14253 .m(3)
14254 .n(n)
14255 .k(k)
14256 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014257 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014258 }
14259 }
14260 }
14261
14262 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_gt_8_subtile) {
14263 TEST_REQUIRES_X86_SSE2;
14264 for (uint32_t n = 9; n < 16; n++) {
14265 for (size_t k = 1; k <= 20; k += 5) {
14266 for (uint32_t m = 1; m <= 3; m++) {
14267 GemmMicrokernelTester()
14268 .mr(3)
14269 .nr(8)
14270 .kr(1)
14271 .sr(1)
14272 .m(m)
14273 .n(n)
14274 .k(k)
14275 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014276 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014277 }
14278 }
14279 }
14280 }
14281
14282 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8) {
14283 TEST_REQUIRES_X86_SSE2;
14284 for (uint32_t n = 16; n <= 24; n += 8) {
14285 for (size_t k = 1; k <= 20; k += 5) {
14286 GemmMicrokernelTester()
14287 .mr(3)
14288 .nr(8)
14289 .kr(1)
14290 .sr(1)
14291 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014292 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014293 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014294 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014295 }
14296 }
14297 }
14298
14299 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8_strided_cn) {
14300 TEST_REQUIRES_X86_SSE2;
14301 for (uint32_t n = 16; n <= 24; n += 8) {
14302 for (size_t k = 1; k <= 20; k += 5) {
14303 GemmMicrokernelTester()
14304 .mr(3)
14305 .nr(8)
14306 .kr(1)
14307 .sr(1)
14308 .m(3)
14309 .n(n)
14310 .k(k)
14311 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014312 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014313 }
14314 }
14315 }
14316
14317 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8_strided_a) {
14318 TEST_REQUIRES_X86_SSE2;
14319 for (uint32_t n = 16; n <= 24; n += 8) {
14320 for (size_t k = 1; k <= 20; k += 5) {
14321 GemmMicrokernelTester()
14322 .mr(3)
14323 .nr(8)
14324 .kr(1)
14325 .sr(1)
14326 .m(3)
14327 .n(n)
14328 .k(k)
14329 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014330 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014331 }
14332 }
14333 }
14334
14335 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, n_div_8_subtile) {
14336 TEST_REQUIRES_X86_SSE2;
14337 for (uint32_t n = 16; n <= 24; n += 8) {
14338 for (size_t k = 1; k <= 20; k += 5) {
14339 for (uint32_t m = 1; m <= 3; m++) {
14340 GemmMicrokernelTester()
14341 .mr(3)
14342 .nr(8)
14343 .kr(1)
14344 .sr(1)
14345 .m(m)
14346 .n(n)
14347 .k(k)
14348 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014349 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014350 }
14351 }
14352 }
14353 }
14354
14355 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, strided_cm_subtile) {
14356 TEST_REQUIRES_X86_SSE2;
14357 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014358 for (uint32_t n = 1; n <= 8; n++) {
14359 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014360 GemmMicrokernelTester()
14361 .mr(3)
14362 .nr(8)
14363 .kr(1)
14364 .sr(1)
14365 .m(m)
14366 .n(n)
14367 .k(k)
14368 .cm_stride(11)
14369 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014370 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014371 }
14372 }
14373 }
14374 }
14375
14376 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, qmin) {
14377 TEST_REQUIRES_X86_SSE2;
14378 GemmMicrokernelTester()
14379 .mr(3)
14380 .nr(8)
14381 .kr(1)
14382 .sr(1)
14383 .m(3)
14384 .n(8)
14385 .k(4)
14386 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014387 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014388 }
14389
14390 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, qmax) {
14391 TEST_REQUIRES_X86_SSE2;
14392 GemmMicrokernelTester()
14393 .mr(3)
14394 .nr(8)
14395 .kr(1)
14396 .sr(1)
14397 .m(3)
14398 .n(8)
14399 .k(4)
14400 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014401 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014402 }
14403
14404 TEST(F32_GEMMINC_MINMAX_3X8__SSE2_DUP, strided_cm) {
14405 TEST_REQUIRES_X86_SSE2;
14406 GemmMicrokernelTester()
14407 .mr(3)
14408 .nr(8)
14409 .kr(1)
14410 .sr(1)
14411 .m(3)
14412 .n(8)
14413 .k(4)
14414 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014415 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014416 }
14417#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14418
14419
14420#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14421 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4) {
14422 TEST_REQUIRES_X86_SSE2;
14423 GemmMicrokernelTester()
14424 .mr(4)
14425 .nr(8)
14426 .kr(1)
14427 .sr(1)
14428 .m(4)
14429 .n(8)
14430 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014431 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014432 }
14433
14434 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, strided_cn) {
14435 TEST_REQUIRES_X86_SSE2;
14436 GemmMicrokernelTester()
14437 .mr(4)
14438 .nr(8)
14439 .kr(1)
14440 .sr(1)
14441 .m(4)
14442 .n(8)
14443 .k(4)
14444 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014445 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014446 }
14447
14448 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_strided_a) {
14449 TEST_REQUIRES_X86_SSE2;
14450 GemmMicrokernelTester()
14451 .mr(4)
14452 .nr(8)
14453 .kr(1)
14454 .sr(1)
14455 .m(4)
14456 .n(8)
14457 .k(4)
14458 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014459 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014460 }
14461
14462 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile) {
14463 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014464 for (uint32_t n = 1; n <= 8; n++) {
14465 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014466 GemmMicrokernelTester()
14467 .mr(4)
14468 .nr(8)
14469 .kr(1)
14470 .sr(1)
14471 .m(m)
14472 .n(n)
14473 .k(4)
14474 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014475 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014476 }
14477 }
14478 }
14479
14480 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_m) {
14481 TEST_REQUIRES_X86_SSE2;
14482 for (uint32_t m = 1; m <= 4; m++) {
14483 GemmMicrokernelTester()
14484 .mr(4)
14485 .nr(8)
14486 .kr(1)
14487 .sr(1)
14488 .m(m)
14489 .n(8)
14490 .k(4)
14491 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014492 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014493 }
14494 }
14495
14496 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_n) {
14497 TEST_REQUIRES_X86_SSE2;
14498 for (uint32_t n = 1; n <= 8; n++) {
14499 GemmMicrokernelTester()
14500 .mr(4)
14501 .nr(8)
14502 .kr(1)
14503 .sr(1)
14504 .m(4)
14505 .n(n)
14506 .k(4)
14507 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014508 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014509 }
14510 }
14511
14512 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_lt_4) {
14513 TEST_REQUIRES_X86_SSE2;
14514 for (size_t k = 1; k < 4; k++) {
14515 GemmMicrokernelTester()
14516 .mr(4)
14517 .nr(8)
14518 .kr(1)
14519 .sr(1)
14520 .m(4)
14521 .n(8)
14522 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014523 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014524 }
14525 }
14526
14527 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_lt_4_strided_a) {
14528 TEST_REQUIRES_X86_SSE2;
14529 for (size_t k = 1; k < 4; k++) {
14530 GemmMicrokernelTester()
14531 .mr(4)
14532 .nr(8)
14533 .kr(1)
14534 .sr(1)
14535 .m(4)
14536 .n(8)
14537 .k(k)
14538 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014539 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014540 }
14541 }
14542
14543 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_lt_4_subtile) {
14544 TEST_REQUIRES_X86_SSE2;
14545 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014546 for (uint32_t n = 1; n <= 8; n++) {
14547 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014548 GemmMicrokernelTester()
14549 .mr(4)
14550 .nr(8)
14551 .kr(1)
14552 .sr(1)
14553 .m(m)
14554 .n(n)
14555 .k(k)
14556 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014557 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014558 }
14559 }
14560 }
14561 }
14562
14563 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_gt_4) {
14564 TEST_REQUIRES_X86_SSE2;
14565 for (size_t k = 5; k < 8; k++) {
14566 GemmMicrokernelTester()
14567 .mr(4)
14568 .nr(8)
14569 .kr(1)
14570 .sr(1)
14571 .m(4)
14572 .n(8)
14573 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014574 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014575 }
14576 }
14577
14578 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_gt_4_strided_a) {
14579 TEST_REQUIRES_X86_SSE2;
14580 for (size_t k = 5; k < 8; k++) {
14581 GemmMicrokernelTester()
14582 .mr(4)
14583 .nr(8)
14584 .kr(1)
14585 .sr(1)
14586 .m(4)
14587 .n(8)
14588 .k(k)
14589 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014590 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014591 }
14592 }
14593
14594 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_gt_4_subtile) {
14595 TEST_REQUIRES_X86_SSE2;
14596 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014597 for (uint32_t n = 1; n <= 8; n++) {
14598 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014599 GemmMicrokernelTester()
14600 .mr(4)
14601 .nr(8)
14602 .kr(1)
14603 .sr(1)
14604 .m(m)
14605 .n(n)
14606 .k(k)
14607 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014608 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014609 }
14610 }
14611 }
14612 }
14613
14614 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_div_4) {
14615 TEST_REQUIRES_X86_SSE2;
14616 for (size_t k = 8; k <= 40; k += 4) {
14617 GemmMicrokernelTester()
14618 .mr(4)
14619 .nr(8)
14620 .kr(1)
14621 .sr(1)
14622 .m(4)
14623 .n(8)
14624 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014625 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014626 }
14627 }
14628
14629 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_div_4_strided_a) {
14630 TEST_REQUIRES_X86_SSE2;
14631 for (size_t k = 8; k <= 40; k += 4) {
14632 GemmMicrokernelTester()
14633 .mr(4)
14634 .nr(8)
14635 .kr(1)
14636 .sr(1)
14637 .m(4)
14638 .n(8)
14639 .k(k)
14640 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014641 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014642 }
14643 }
14644
14645 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, k_div_4_subtile) {
14646 TEST_REQUIRES_X86_SSE2;
14647 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014648 for (uint32_t n = 1; n <= 8; n++) {
14649 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014650 GemmMicrokernelTester()
14651 .mr(4)
14652 .nr(8)
14653 .kr(1)
14654 .sr(1)
14655 .m(m)
14656 .n(n)
14657 .k(k)
14658 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014659 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014660 }
14661 }
14662 }
14663 }
14664
14665 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8) {
14666 TEST_REQUIRES_X86_SSE2;
14667 for (uint32_t n = 9; n < 16; n++) {
14668 for (size_t k = 1; k <= 20; k += 5) {
14669 GemmMicrokernelTester()
14670 .mr(4)
14671 .nr(8)
14672 .kr(1)
14673 .sr(1)
14674 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014675 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014676 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014677 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014678 }
14679 }
14680 }
14681
14682 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8_strided_cn) {
14683 TEST_REQUIRES_X86_SSE2;
14684 for (uint32_t n = 9; n < 16; n++) {
14685 for (size_t k = 1; k <= 20; k += 5) {
14686 GemmMicrokernelTester()
14687 .mr(4)
14688 .nr(8)
14689 .kr(1)
14690 .sr(1)
14691 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014692 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014693 .k(k)
14694 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014695 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014696 }
14697 }
14698 }
14699
14700 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8_strided_a) {
14701 TEST_REQUIRES_X86_SSE2;
14702 for (uint32_t n = 9; n < 16; n++) {
14703 for (size_t k = 1; k <= 20; k += 5) {
14704 GemmMicrokernelTester()
14705 .mr(4)
14706 .nr(8)
14707 .kr(1)
14708 .sr(1)
14709 .m(4)
14710 .n(n)
14711 .k(k)
14712 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014713 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014714 }
14715 }
14716 }
14717
14718 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_gt_8_subtile) {
14719 TEST_REQUIRES_X86_SSE2;
14720 for (uint32_t n = 9; n < 16; n++) {
14721 for (size_t k = 1; k <= 20; k += 5) {
14722 for (uint32_t m = 1; m <= 4; m++) {
14723 GemmMicrokernelTester()
14724 .mr(4)
14725 .nr(8)
14726 .kr(1)
14727 .sr(1)
14728 .m(m)
14729 .n(n)
14730 .k(k)
14731 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014732 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014733 }
14734 }
14735 }
14736 }
14737
14738 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8) {
14739 TEST_REQUIRES_X86_SSE2;
14740 for (uint32_t n = 16; n <= 24; n += 8) {
14741 for (size_t k = 1; k <= 20; k += 5) {
14742 GemmMicrokernelTester()
14743 .mr(4)
14744 .nr(8)
14745 .kr(1)
14746 .sr(1)
14747 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014748 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014749 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014750 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014751 }
14752 }
14753 }
14754
14755 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8_strided_cn) {
14756 TEST_REQUIRES_X86_SSE2;
14757 for (uint32_t n = 16; n <= 24; n += 8) {
14758 for (size_t k = 1; k <= 20; k += 5) {
14759 GemmMicrokernelTester()
14760 .mr(4)
14761 .nr(8)
14762 .kr(1)
14763 .sr(1)
14764 .m(4)
14765 .n(n)
14766 .k(k)
14767 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014768 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014769 }
14770 }
14771 }
14772
14773 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8_strided_a) {
14774 TEST_REQUIRES_X86_SSE2;
14775 for (uint32_t n = 16; n <= 24; n += 8) {
14776 for (size_t k = 1; k <= 20; k += 5) {
14777 GemmMicrokernelTester()
14778 .mr(4)
14779 .nr(8)
14780 .kr(1)
14781 .sr(1)
14782 .m(4)
14783 .n(n)
14784 .k(k)
14785 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014786 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014787 }
14788 }
14789 }
14790
14791 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, n_div_8_subtile) {
14792 TEST_REQUIRES_X86_SSE2;
14793 for (uint32_t n = 16; n <= 24; n += 8) {
14794 for (size_t k = 1; k <= 20; k += 5) {
14795 for (uint32_t m = 1; m <= 4; m++) {
14796 GemmMicrokernelTester()
14797 .mr(4)
14798 .nr(8)
14799 .kr(1)
14800 .sr(1)
14801 .m(m)
14802 .n(n)
14803 .k(k)
14804 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014805 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014806 }
14807 }
14808 }
14809 }
14810
14811 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, strided_cm_subtile) {
14812 TEST_REQUIRES_X86_SSE2;
14813 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014814 for (uint32_t n = 1; n <= 8; n++) {
14815 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014816 GemmMicrokernelTester()
14817 .mr(4)
14818 .nr(8)
14819 .kr(1)
14820 .sr(1)
14821 .m(m)
14822 .n(n)
14823 .k(k)
14824 .cm_stride(11)
14825 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014826 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014827 }
14828 }
14829 }
14830 }
14831
14832 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, qmin) {
14833 TEST_REQUIRES_X86_SSE2;
14834 GemmMicrokernelTester()
14835 .mr(4)
14836 .nr(8)
14837 .kr(1)
14838 .sr(1)
14839 .m(4)
14840 .n(8)
14841 .k(4)
14842 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014843 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014844 }
14845
14846 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, qmax) {
14847 TEST_REQUIRES_X86_SSE2;
14848 GemmMicrokernelTester()
14849 .mr(4)
14850 .nr(8)
14851 .kr(1)
14852 .sr(1)
14853 .m(4)
14854 .n(8)
14855 .k(4)
14856 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014857 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014858 }
14859
14860 TEST(F32_GEMMINC_MINMAX_4X8__SSE2_DUP, strided_cm) {
14861 TEST_REQUIRES_X86_SSE2;
14862 GemmMicrokernelTester()
14863 .mr(4)
14864 .nr(8)
14865 .kr(1)
14866 .sr(1)
14867 .m(4)
14868 .n(8)
14869 .k(4)
14870 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014871 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014872 }
14873#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14874
14875
14876#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070014877 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014878 TEST_REQUIRES_X86_AVX;
14879 GemmMicrokernelTester()
14880 .mr(1)
14881 .nr(8)
14882 .kr(1)
14883 .sr(1)
14884 .m(1)
14885 .n(8)
14886 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014887 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014888 }
14889
Marat Dukhande06f492020-04-09 00:19:31 -070014890 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014891 TEST_REQUIRES_X86_AVX;
14892 GemmMicrokernelTester()
14893 .mr(1)
14894 .nr(8)
14895 .kr(1)
14896 .sr(1)
14897 .m(1)
14898 .n(8)
14899 .k(1)
14900 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014901 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014902 }
14903
Marat Dukhande06f492020-04-09 00:19:31 -070014904 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014905 TEST_REQUIRES_X86_AVX;
14906 GemmMicrokernelTester()
14907 .mr(1)
14908 .nr(8)
14909 .kr(1)
14910 .sr(1)
14911 .m(1)
14912 .n(8)
14913 .k(1)
14914 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014915 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014916 }
14917
Marat Dukhande06f492020-04-09 00:19:31 -070014918 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014919 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014920 for (uint32_t n = 1; n <= 8; n++) {
14921 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014922 GemmMicrokernelTester()
14923 .mr(1)
14924 .nr(8)
14925 .kr(1)
14926 .sr(1)
14927 .m(m)
14928 .n(n)
14929 .k(1)
14930 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014931 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014932 }
14933 }
14934 }
14935
Marat Dukhande06f492020-04-09 00:19:31 -070014936 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014937 TEST_REQUIRES_X86_AVX;
14938 for (uint32_t m = 1; m <= 1; m++) {
14939 GemmMicrokernelTester()
14940 .mr(1)
14941 .nr(8)
14942 .kr(1)
14943 .sr(1)
14944 .m(m)
14945 .n(8)
14946 .k(1)
14947 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014948 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014949 }
14950 }
14951
Marat Dukhande06f492020-04-09 00:19:31 -070014952 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014953 TEST_REQUIRES_X86_AVX;
14954 for (uint32_t n = 1; n <= 8; n++) {
14955 GemmMicrokernelTester()
14956 .mr(1)
14957 .nr(8)
14958 .kr(1)
14959 .sr(1)
14960 .m(1)
14961 .n(n)
14962 .k(1)
14963 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014964 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014965 }
14966 }
14967
Marat Dukhande06f492020-04-09 00:19:31 -070014968 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014969 TEST_REQUIRES_X86_AVX;
14970 for (size_t k = 2; k < 10; k++) {
14971 GemmMicrokernelTester()
14972 .mr(1)
14973 .nr(8)
14974 .kr(1)
14975 .sr(1)
14976 .m(1)
14977 .n(8)
14978 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014979 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014980 }
14981 }
14982
Marat Dukhande06f492020-04-09 00:19:31 -070014983 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014984 TEST_REQUIRES_X86_AVX;
14985 for (size_t k = 2; k < 10; k++) {
14986 GemmMicrokernelTester()
14987 .mr(1)
14988 .nr(8)
14989 .kr(1)
14990 .sr(1)
14991 .m(1)
14992 .n(8)
14993 .k(k)
14994 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014995 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014996 }
14997 }
14998
Marat Dukhande06f492020-04-09 00:19:31 -070014999 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015000 TEST_REQUIRES_X86_AVX;
15001 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015002 for (uint32_t n = 1; n <= 8; n++) {
15003 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015004 GemmMicrokernelTester()
15005 .mr(1)
15006 .nr(8)
15007 .kr(1)
15008 .sr(1)
15009 .m(m)
15010 .n(n)
15011 .k(k)
15012 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015013 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015014 }
15015 }
15016 }
15017 }
15018
Marat Dukhande06f492020-04-09 00:19:31 -070015019 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015020 TEST_REQUIRES_X86_AVX;
15021 for (uint32_t n = 9; n < 16; n++) {
15022 for (size_t k = 1; k <= 5; k += 2) {
15023 GemmMicrokernelTester()
15024 .mr(1)
15025 .nr(8)
15026 .kr(1)
15027 .sr(1)
15028 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015029 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015030 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015031 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015032 }
15033 }
15034 }
15035
Marat Dukhande06f492020-04-09 00:19:31 -070015036 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015037 TEST_REQUIRES_X86_AVX;
15038 for (uint32_t n = 9; n < 16; n++) {
15039 for (size_t k = 1; k <= 5; k += 2) {
15040 GemmMicrokernelTester()
15041 .mr(1)
15042 .nr(8)
15043 .kr(1)
15044 .sr(1)
15045 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015046 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015047 .k(k)
15048 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015049 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015050 }
15051 }
15052 }
15053
Marat Dukhande06f492020-04-09 00:19:31 -070015054 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015055 TEST_REQUIRES_X86_AVX;
15056 for (uint32_t n = 9; n < 16; n++) {
15057 for (size_t k = 1; k <= 5; k += 2) {
15058 GemmMicrokernelTester()
15059 .mr(1)
15060 .nr(8)
15061 .kr(1)
15062 .sr(1)
15063 .m(1)
15064 .n(n)
15065 .k(k)
15066 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015067 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015068 }
15069 }
15070 }
15071
Marat Dukhande06f492020-04-09 00:19:31 -070015072 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015073 TEST_REQUIRES_X86_AVX;
15074 for (uint32_t n = 9; n < 16; n++) {
15075 for (size_t k = 1; k <= 5; k += 2) {
15076 for (uint32_t m = 1; m <= 1; m++) {
15077 GemmMicrokernelTester()
15078 .mr(1)
15079 .nr(8)
15080 .kr(1)
15081 .sr(1)
15082 .m(m)
15083 .n(n)
15084 .k(k)
15085 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015086 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015087 }
15088 }
15089 }
15090 }
15091
Marat Dukhande06f492020-04-09 00:19:31 -070015092 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015093 TEST_REQUIRES_X86_AVX;
15094 for (uint32_t n = 16; n <= 24; n += 8) {
15095 for (size_t k = 1; k <= 5; k += 2) {
15096 GemmMicrokernelTester()
15097 .mr(1)
15098 .nr(8)
15099 .kr(1)
15100 .sr(1)
15101 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015102 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015103 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015104 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015105 }
15106 }
15107 }
15108
Marat Dukhande06f492020-04-09 00:19:31 -070015109 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015110 TEST_REQUIRES_X86_AVX;
15111 for (uint32_t n = 16; n <= 24; n += 8) {
15112 for (size_t k = 1; k <= 5; k += 2) {
15113 GemmMicrokernelTester()
15114 .mr(1)
15115 .nr(8)
15116 .kr(1)
15117 .sr(1)
15118 .m(1)
15119 .n(n)
15120 .k(k)
15121 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015122 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015123 }
15124 }
15125 }
15126
Marat Dukhande06f492020-04-09 00:19:31 -070015127 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015128 TEST_REQUIRES_X86_AVX;
15129 for (uint32_t n = 16; n <= 24; n += 8) {
15130 for (size_t k = 1; k <= 5; k += 2) {
15131 GemmMicrokernelTester()
15132 .mr(1)
15133 .nr(8)
15134 .kr(1)
15135 .sr(1)
15136 .m(1)
15137 .n(n)
15138 .k(k)
15139 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015140 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015141 }
15142 }
15143 }
15144
Marat Dukhande06f492020-04-09 00:19:31 -070015145 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015146 TEST_REQUIRES_X86_AVX;
15147 for (uint32_t n = 16; n <= 24; n += 8) {
15148 for (size_t k = 1; k <= 5; k += 2) {
15149 for (uint32_t m = 1; m <= 1; m++) {
15150 GemmMicrokernelTester()
15151 .mr(1)
15152 .nr(8)
15153 .kr(1)
15154 .sr(1)
15155 .m(m)
15156 .n(n)
15157 .k(k)
15158 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015159 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015160 }
15161 }
15162 }
15163 }
15164
Marat Dukhande06f492020-04-09 00:19:31 -070015165 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015166 TEST_REQUIRES_X86_AVX;
15167 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015168 for (uint32_t n = 1; n <= 8; n++) {
15169 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015170 GemmMicrokernelTester()
15171 .mr(1)
15172 .nr(8)
15173 .kr(1)
15174 .sr(1)
15175 .m(m)
15176 .n(n)
15177 .k(k)
15178 .cm_stride(11)
15179 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015180 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015181 }
15182 }
15183 }
15184 }
15185
Marat Dukhande06f492020-04-09 00:19:31 -070015186 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015187 TEST_REQUIRES_X86_AVX;
15188 GemmMicrokernelTester()
15189 .mr(1)
15190 .nr(8)
15191 .kr(1)
15192 .sr(1)
15193 .m(1)
15194 .n(8)
15195 .k(1)
15196 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015197 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015198 }
15199
Marat Dukhande06f492020-04-09 00:19:31 -070015200 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015201 TEST_REQUIRES_X86_AVX;
15202 GemmMicrokernelTester()
15203 .mr(1)
15204 .nr(8)
15205 .kr(1)
15206 .sr(1)
15207 .m(1)
15208 .n(8)
15209 .k(1)
15210 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015211 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015212 }
15213
Marat Dukhande06f492020-04-09 00:19:31 -070015214 TEST(F32_GEMMINC_MINMAX_1X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015215 TEST_REQUIRES_X86_AVX;
15216 GemmMicrokernelTester()
15217 .mr(1)
15218 .nr(8)
15219 .kr(1)
15220 .sr(1)
15221 .m(1)
15222 .n(8)
15223 .k(1)
15224 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015225 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015226 }
15227#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15228
15229
15230#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070015231 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015232 TEST_REQUIRES_X86_AVX;
15233 GemmMicrokernelTester()
15234 .mr(5)
15235 .nr(8)
15236 .kr(1)
15237 .sr(1)
15238 .m(5)
15239 .n(8)
15240 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015241 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015242 }
15243
Marat Dukhande06f492020-04-09 00:19:31 -070015244 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015245 TEST_REQUIRES_X86_AVX;
15246 GemmMicrokernelTester()
15247 .mr(5)
15248 .nr(8)
15249 .kr(1)
15250 .sr(1)
15251 .m(5)
15252 .n(8)
15253 .k(1)
15254 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015255 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015256 }
15257
Marat Dukhande06f492020-04-09 00:19:31 -070015258 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015259 TEST_REQUIRES_X86_AVX;
15260 GemmMicrokernelTester()
15261 .mr(5)
15262 .nr(8)
15263 .kr(1)
15264 .sr(1)
15265 .m(5)
15266 .n(8)
15267 .k(1)
15268 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015269 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015270 }
15271
Marat Dukhande06f492020-04-09 00:19:31 -070015272 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015273 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015274 for (uint32_t n = 1; n <= 8; n++) {
15275 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015276 GemmMicrokernelTester()
15277 .mr(5)
15278 .nr(8)
15279 .kr(1)
15280 .sr(1)
15281 .m(m)
15282 .n(n)
15283 .k(1)
15284 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015285 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015286 }
15287 }
15288 }
15289
Marat Dukhande06f492020-04-09 00:19:31 -070015290 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015291 TEST_REQUIRES_X86_AVX;
15292 for (uint32_t m = 1; m <= 5; m++) {
15293 GemmMicrokernelTester()
15294 .mr(5)
15295 .nr(8)
15296 .kr(1)
15297 .sr(1)
15298 .m(m)
15299 .n(8)
15300 .k(1)
15301 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015302 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015303 }
15304 }
15305
Marat Dukhande06f492020-04-09 00:19:31 -070015306 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015307 TEST_REQUIRES_X86_AVX;
15308 for (uint32_t n = 1; n <= 8; n++) {
15309 GemmMicrokernelTester()
15310 .mr(5)
15311 .nr(8)
15312 .kr(1)
15313 .sr(1)
15314 .m(5)
15315 .n(n)
15316 .k(1)
15317 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015318 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015319 }
15320 }
15321
Marat Dukhande06f492020-04-09 00:19:31 -070015322 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015323 TEST_REQUIRES_X86_AVX;
15324 for (size_t k = 2; k < 10; k++) {
15325 GemmMicrokernelTester()
15326 .mr(5)
15327 .nr(8)
15328 .kr(1)
15329 .sr(1)
15330 .m(5)
15331 .n(8)
15332 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015333 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015334 }
15335 }
15336
Marat Dukhande06f492020-04-09 00:19:31 -070015337 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015338 TEST_REQUIRES_X86_AVX;
15339 for (size_t k = 2; k < 10; k++) {
15340 GemmMicrokernelTester()
15341 .mr(5)
15342 .nr(8)
15343 .kr(1)
15344 .sr(1)
15345 .m(5)
15346 .n(8)
15347 .k(k)
15348 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015349 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015350 }
15351 }
15352
Marat Dukhande06f492020-04-09 00:19:31 -070015353 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015354 TEST_REQUIRES_X86_AVX;
15355 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015356 for (uint32_t n = 1; n <= 8; n++) {
15357 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015358 GemmMicrokernelTester()
15359 .mr(5)
15360 .nr(8)
15361 .kr(1)
15362 .sr(1)
15363 .m(m)
15364 .n(n)
15365 .k(k)
15366 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015367 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015368 }
15369 }
15370 }
15371 }
15372
Marat Dukhande06f492020-04-09 00:19:31 -070015373 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015374 TEST_REQUIRES_X86_AVX;
15375 for (uint32_t n = 9; n < 16; n++) {
15376 for (size_t k = 1; k <= 5; k += 2) {
15377 GemmMicrokernelTester()
15378 .mr(5)
15379 .nr(8)
15380 .kr(1)
15381 .sr(1)
15382 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015383 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015384 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015385 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015386 }
15387 }
15388 }
15389
Marat Dukhande06f492020-04-09 00:19:31 -070015390 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015391 TEST_REQUIRES_X86_AVX;
15392 for (uint32_t n = 9; n < 16; n++) {
15393 for (size_t k = 1; k <= 5; k += 2) {
15394 GemmMicrokernelTester()
15395 .mr(5)
15396 .nr(8)
15397 .kr(1)
15398 .sr(1)
15399 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015400 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015401 .k(k)
15402 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015403 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015404 }
15405 }
15406 }
15407
Marat Dukhande06f492020-04-09 00:19:31 -070015408 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015409 TEST_REQUIRES_X86_AVX;
15410 for (uint32_t n = 9; n < 16; n++) {
15411 for (size_t k = 1; k <= 5; k += 2) {
15412 GemmMicrokernelTester()
15413 .mr(5)
15414 .nr(8)
15415 .kr(1)
15416 .sr(1)
15417 .m(5)
15418 .n(n)
15419 .k(k)
15420 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015421 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015422 }
15423 }
15424 }
15425
Marat Dukhande06f492020-04-09 00:19:31 -070015426 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015427 TEST_REQUIRES_X86_AVX;
15428 for (uint32_t n = 9; n < 16; n++) {
15429 for (size_t k = 1; k <= 5; k += 2) {
15430 for (uint32_t m = 1; m <= 5; m++) {
15431 GemmMicrokernelTester()
15432 .mr(5)
15433 .nr(8)
15434 .kr(1)
15435 .sr(1)
15436 .m(m)
15437 .n(n)
15438 .k(k)
15439 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015440 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015441 }
15442 }
15443 }
15444 }
15445
Marat Dukhande06f492020-04-09 00:19:31 -070015446 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015447 TEST_REQUIRES_X86_AVX;
15448 for (uint32_t n = 16; n <= 24; n += 8) {
15449 for (size_t k = 1; k <= 5; k += 2) {
15450 GemmMicrokernelTester()
15451 .mr(5)
15452 .nr(8)
15453 .kr(1)
15454 .sr(1)
15455 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015456 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015457 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015458 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015459 }
15460 }
15461 }
15462
Marat Dukhande06f492020-04-09 00:19:31 -070015463 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015464 TEST_REQUIRES_X86_AVX;
15465 for (uint32_t n = 16; n <= 24; n += 8) {
15466 for (size_t k = 1; k <= 5; k += 2) {
15467 GemmMicrokernelTester()
15468 .mr(5)
15469 .nr(8)
15470 .kr(1)
15471 .sr(1)
15472 .m(5)
15473 .n(n)
15474 .k(k)
15475 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015476 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015477 }
15478 }
15479 }
15480
Marat Dukhande06f492020-04-09 00:19:31 -070015481 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015482 TEST_REQUIRES_X86_AVX;
15483 for (uint32_t n = 16; n <= 24; n += 8) {
15484 for (size_t k = 1; k <= 5; k += 2) {
15485 GemmMicrokernelTester()
15486 .mr(5)
15487 .nr(8)
15488 .kr(1)
15489 .sr(1)
15490 .m(5)
15491 .n(n)
15492 .k(k)
15493 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015494 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015495 }
15496 }
15497 }
15498
Marat Dukhande06f492020-04-09 00:19:31 -070015499 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015500 TEST_REQUIRES_X86_AVX;
15501 for (uint32_t n = 16; n <= 24; n += 8) {
15502 for (size_t k = 1; k <= 5; k += 2) {
15503 for (uint32_t m = 1; m <= 5; m++) {
15504 GemmMicrokernelTester()
15505 .mr(5)
15506 .nr(8)
15507 .kr(1)
15508 .sr(1)
15509 .m(m)
15510 .n(n)
15511 .k(k)
15512 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015513 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015514 }
15515 }
15516 }
15517 }
15518
Marat Dukhande06f492020-04-09 00:19:31 -070015519 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015520 TEST_REQUIRES_X86_AVX;
15521 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015522 for (uint32_t n = 1; n <= 8; n++) {
15523 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015524 GemmMicrokernelTester()
15525 .mr(5)
15526 .nr(8)
15527 .kr(1)
15528 .sr(1)
15529 .m(m)
15530 .n(n)
15531 .k(k)
15532 .cm_stride(11)
15533 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015534 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015535 }
15536 }
15537 }
15538 }
15539
Marat Dukhande06f492020-04-09 00:19:31 -070015540 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015541 TEST_REQUIRES_X86_AVX;
15542 GemmMicrokernelTester()
15543 .mr(5)
15544 .nr(8)
15545 .kr(1)
15546 .sr(1)
15547 .m(5)
15548 .n(8)
15549 .k(1)
15550 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015551 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015552 }
15553
Marat Dukhande06f492020-04-09 00:19:31 -070015554 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015555 TEST_REQUIRES_X86_AVX;
15556 GemmMicrokernelTester()
15557 .mr(5)
15558 .nr(8)
15559 .kr(1)
15560 .sr(1)
15561 .m(5)
15562 .n(8)
15563 .k(1)
15564 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015565 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015566 }
15567
Marat Dukhande06f492020-04-09 00:19:31 -070015568 TEST(F32_GEMMINC_MINMAX_5X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015569 TEST_REQUIRES_X86_AVX;
15570 GemmMicrokernelTester()
15571 .mr(5)
15572 .nr(8)
15573 .kr(1)
15574 .sr(1)
15575 .m(5)
15576 .n(8)
15577 .k(1)
15578 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015579 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015580 }
15581#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15582
15583
15584#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070015585 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015586 TEST_REQUIRES_X86_AVX;
15587 GemmMicrokernelTester()
15588 .mr(6)
15589 .nr(8)
15590 .kr(1)
15591 .sr(1)
15592 .m(6)
15593 .n(8)
15594 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015595 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015596 }
15597
Marat Dukhande06f492020-04-09 00:19:31 -070015598 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015599 TEST_REQUIRES_X86_AVX;
15600 GemmMicrokernelTester()
15601 .mr(6)
15602 .nr(8)
15603 .kr(1)
15604 .sr(1)
15605 .m(6)
15606 .n(8)
15607 .k(1)
15608 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015609 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015610 }
15611
Marat Dukhande06f492020-04-09 00:19:31 -070015612 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015613 TEST_REQUIRES_X86_AVX;
15614 GemmMicrokernelTester()
15615 .mr(6)
15616 .nr(8)
15617 .kr(1)
15618 .sr(1)
15619 .m(6)
15620 .n(8)
15621 .k(1)
15622 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015623 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015624 }
15625
Marat Dukhande06f492020-04-09 00:19:31 -070015626 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015627 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015628 for (uint32_t n = 1; n <= 8; n++) {
15629 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015630 GemmMicrokernelTester()
15631 .mr(6)
15632 .nr(8)
15633 .kr(1)
15634 .sr(1)
15635 .m(m)
15636 .n(n)
15637 .k(1)
15638 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015639 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015640 }
15641 }
15642 }
15643
Marat Dukhande06f492020-04-09 00:19:31 -070015644 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015645 TEST_REQUIRES_X86_AVX;
15646 for (uint32_t m = 1; m <= 6; m++) {
15647 GemmMicrokernelTester()
15648 .mr(6)
15649 .nr(8)
15650 .kr(1)
15651 .sr(1)
15652 .m(m)
15653 .n(8)
15654 .k(1)
15655 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015656 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015657 }
15658 }
15659
Marat Dukhande06f492020-04-09 00:19:31 -070015660 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015661 TEST_REQUIRES_X86_AVX;
15662 for (uint32_t n = 1; n <= 8; n++) {
15663 GemmMicrokernelTester()
15664 .mr(6)
15665 .nr(8)
15666 .kr(1)
15667 .sr(1)
15668 .m(6)
15669 .n(n)
15670 .k(1)
15671 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015672 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015673 }
15674 }
15675
Marat Dukhande06f492020-04-09 00:19:31 -070015676 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015677 TEST_REQUIRES_X86_AVX;
15678 for (size_t k = 2; k < 10; k++) {
15679 GemmMicrokernelTester()
15680 .mr(6)
15681 .nr(8)
15682 .kr(1)
15683 .sr(1)
15684 .m(6)
15685 .n(8)
15686 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015687 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015688 }
15689 }
15690
Marat Dukhande06f492020-04-09 00:19:31 -070015691 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015692 TEST_REQUIRES_X86_AVX;
15693 for (size_t k = 2; k < 10; k++) {
15694 GemmMicrokernelTester()
15695 .mr(6)
15696 .nr(8)
15697 .kr(1)
15698 .sr(1)
15699 .m(6)
15700 .n(8)
15701 .k(k)
15702 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015703 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015704 }
15705 }
15706
Marat Dukhande06f492020-04-09 00:19:31 -070015707 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015708 TEST_REQUIRES_X86_AVX;
15709 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015710 for (uint32_t n = 1; n <= 8; n++) {
15711 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015712 GemmMicrokernelTester()
15713 .mr(6)
15714 .nr(8)
15715 .kr(1)
15716 .sr(1)
15717 .m(m)
15718 .n(n)
15719 .k(k)
15720 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015721 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015722 }
15723 }
15724 }
15725 }
15726
Marat Dukhande06f492020-04-09 00:19:31 -070015727 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015728 TEST_REQUIRES_X86_AVX;
15729 for (uint32_t n = 9; n < 16; n++) {
15730 for (size_t k = 1; k <= 5; k += 2) {
15731 GemmMicrokernelTester()
15732 .mr(6)
15733 .nr(8)
15734 .kr(1)
15735 .sr(1)
15736 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015737 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015738 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015739 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015740 }
15741 }
15742 }
15743
Marat Dukhande06f492020-04-09 00:19:31 -070015744 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015745 TEST_REQUIRES_X86_AVX;
15746 for (uint32_t n = 9; n < 16; n++) {
15747 for (size_t k = 1; k <= 5; k += 2) {
15748 GemmMicrokernelTester()
15749 .mr(6)
15750 .nr(8)
15751 .kr(1)
15752 .sr(1)
15753 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015754 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015755 .k(k)
15756 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015757 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015758 }
15759 }
15760 }
15761
Marat Dukhande06f492020-04-09 00:19:31 -070015762 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015763 TEST_REQUIRES_X86_AVX;
15764 for (uint32_t n = 9; n < 16; n++) {
15765 for (size_t k = 1; k <= 5; k += 2) {
15766 GemmMicrokernelTester()
15767 .mr(6)
15768 .nr(8)
15769 .kr(1)
15770 .sr(1)
15771 .m(6)
15772 .n(n)
15773 .k(k)
15774 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015775 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015776 }
15777 }
15778 }
15779
Marat Dukhande06f492020-04-09 00:19:31 -070015780 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015781 TEST_REQUIRES_X86_AVX;
15782 for (uint32_t n = 9; n < 16; n++) {
15783 for (size_t k = 1; k <= 5; k += 2) {
15784 for (uint32_t m = 1; m <= 6; m++) {
15785 GemmMicrokernelTester()
15786 .mr(6)
15787 .nr(8)
15788 .kr(1)
15789 .sr(1)
15790 .m(m)
15791 .n(n)
15792 .k(k)
15793 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015794 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015795 }
15796 }
15797 }
15798 }
15799
Marat Dukhande06f492020-04-09 00:19:31 -070015800 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015801 TEST_REQUIRES_X86_AVX;
15802 for (uint32_t n = 16; n <= 24; n += 8) {
15803 for (size_t k = 1; k <= 5; k += 2) {
15804 GemmMicrokernelTester()
15805 .mr(6)
15806 .nr(8)
15807 .kr(1)
15808 .sr(1)
15809 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015810 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070015811 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015812 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015813 }
15814 }
15815 }
15816
Marat Dukhande06f492020-04-09 00:19:31 -070015817 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015818 TEST_REQUIRES_X86_AVX;
15819 for (uint32_t n = 16; n <= 24; n += 8) {
15820 for (size_t k = 1; k <= 5; k += 2) {
15821 GemmMicrokernelTester()
15822 .mr(6)
15823 .nr(8)
15824 .kr(1)
15825 .sr(1)
15826 .m(6)
15827 .n(n)
15828 .k(k)
15829 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015830 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015831 }
15832 }
15833 }
15834
Marat Dukhande06f492020-04-09 00:19:31 -070015835 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015836 TEST_REQUIRES_X86_AVX;
15837 for (uint32_t n = 16; n <= 24; n += 8) {
15838 for (size_t k = 1; k <= 5; k += 2) {
15839 GemmMicrokernelTester()
15840 .mr(6)
15841 .nr(8)
15842 .kr(1)
15843 .sr(1)
15844 .m(6)
15845 .n(n)
15846 .k(k)
15847 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015848 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015849 }
15850 }
15851 }
15852
Marat Dukhande06f492020-04-09 00:19:31 -070015853 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015854 TEST_REQUIRES_X86_AVX;
15855 for (uint32_t n = 16; n <= 24; n += 8) {
15856 for (size_t k = 1; k <= 5; k += 2) {
15857 for (uint32_t m = 1; m <= 6; m++) {
15858 GemmMicrokernelTester()
15859 .mr(6)
15860 .nr(8)
15861 .kr(1)
15862 .sr(1)
15863 .m(m)
15864 .n(n)
15865 .k(k)
15866 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015867 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015868 }
15869 }
15870 }
15871 }
15872
Marat Dukhande06f492020-04-09 00:19:31 -070015873 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015874 TEST_REQUIRES_X86_AVX;
15875 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015876 for (uint32_t n = 1; n <= 8; n++) {
15877 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015878 GemmMicrokernelTester()
15879 .mr(6)
15880 .nr(8)
15881 .kr(1)
15882 .sr(1)
15883 .m(m)
15884 .n(n)
15885 .k(k)
15886 .cm_stride(11)
15887 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015888 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015889 }
15890 }
15891 }
15892 }
15893
Marat Dukhande06f492020-04-09 00:19:31 -070015894 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015895 TEST_REQUIRES_X86_AVX;
15896 GemmMicrokernelTester()
15897 .mr(6)
15898 .nr(8)
15899 .kr(1)
15900 .sr(1)
15901 .m(6)
15902 .n(8)
15903 .k(1)
15904 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015905 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015906 }
15907
Marat Dukhande06f492020-04-09 00:19:31 -070015908 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015909 TEST_REQUIRES_X86_AVX;
15910 GemmMicrokernelTester()
15911 .mr(6)
15912 .nr(8)
15913 .kr(1)
15914 .sr(1)
15915 .m(6)
15916 .n(8)
15917 .k(1)
15918 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015919 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015920 }
15921
Marat Dukhande06f492020-04-09 00:19:31 -070015922 TEST(F32_GEMMINC_MINMAX_6X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015923 TEST_REQUIRES_X86_AVX;
15924 GemmMicrokernelTester()
15925 .mr(6)
15926 .nr(8)
15927 .kr(1)
15928 .sr(1)
15929 .m(6)
15930 .n(8)
15931 .k(1)
15932 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015933 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015934 }
15935#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15936
15937
15938#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070015939 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015940 TEST_REQUIRES_X86_AVX;
15941 GemmMicrokernelTester()
15942 .mr(1)
15943 .nr(16)
15944 .kr(1)
15945 .sr(1)
15946 .m(1)
15947 .n(16)
15948 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015949 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015950 }
15951
Marat Dukhande06f492020-04-09 00:19:31 -070015952 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015953 TEST_REQUIRES_X86_AVX;
15954 GemmMicrokernelTester()
15955 .mr(1)
15956 .nr(16)
15957 .kr(1)
15958 .sr(1)
15959 .m(1)
15960 .n(16)
15961 .k(1)
15962 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015963 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015964 }
15965
Marat Dukhande06f492020-04-09 00:19:31 -070015966 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015967 TEST_REQUIRES_X86_AVX;
15968 GemmMicrokernelTester()
15969 .mr(1)
15970 .nr(16)
15971 .kr(1)
15972 .sr(1)
15973 .m(1)
15974 .n(16)
15975 .k(1)
15976 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015977 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015978 }
15979
Marat Dukhande06f492020-04-09 00:19:31 -070015980 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015981 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015982 for (uint32_t n = 1; n <= 16; n++) {
15983 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015984 GemmMicrokernelTester()
15985 .mr(1)
15986 .nr(16)
15987 .kr(1)
15988 .sr(1)
15989 .m(m)
15990 .n(n)
15991 .k(1)
15992 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015993 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070015994 }
15995 }
15996 }
15997
Marat Dukhande06f492020-04-09 00:19:31 -070015998 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070015999 TEST_REQUIRES_X86_AVX;
16000 for (uint32_t m = 1; m <= 1; m++) {
16001 GemmMicrokernelTester()
16002 .mr(1)
16003 .nr(16)
16004 .kr(1)
16005 .sr(1)
16006 .m(m)
16007 .n(16)
16008 .k(1)
16009 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016010 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016011 }
16012 }
16013
Marat Dukhande06f492020-04-09 00:19:31 -070016014 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016015 TEST_REQUIRES_X86_AVX;
16016 for (uint32_t n = 1; n <= 16; n++) {
16017 GemmMicrokernelTester()
16018 .mr(1)
16019 .nr(16)
16020 .kr(1)
16021 .sr(1)
16022 .m(1)
16023 .n(n)
16024 .k(1)
16025 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016026 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016027 }
16028 }
16029
Marat Dukhande06f492020-04-09 00:19:31 -070016030 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016031 TEST_REQUIRES_X86_AVX;
16032 for (size_t k = 2; k < 10; k++) {
16033 GemmMicrokernelTester()
16034 .mr(1)
16035 .nr(16)
16036 .kr(1)
16037 .sr(1)
16038 .m(1)
16039 .n(16)
16040 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016041 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016042 }
16043 }
16044
Marat Dukhande06f492020-04-09 00:19:31 -070016045 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016046 TEST_REQUIRES_X86_AVX;
16047 for (size_t k = 2; k < 10; k++) {
16048 GemmMicrokernelTester()
16049 .mr(1)
16050 .nr(16)
16051 .kr(1)
16052 .sr(1)
16053 .m(1)
16054 .n(16)
16055 .k(k)
16056 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016057 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016058 }
16059 }
16060
Marat Dukhande06f492020-04-09 00:19:31 -070016061 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016062 TEST_REQUIRES_X86_AVX;
16063 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016064 for (uint32_t n = 1; n <= 16; n++) {
16065 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016066 GemmMicrokernelTester()
16067 .mr(1)
16068 .nr(16)
16069 .kr(1)
16070 .sr(1)
16071 .m(m)
16072 .n(n)
16073 .k(k)
16074 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016075 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016076 }
16077 }
16078 }
16079 }
16080
Marat Dukhande06f492020-04-09 00:19:31 -070016081 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016082 TEST_REQUIRES_X86_AVX;
16083 for (uint32_t n = 17; n < 32; n++) {
16084 for (size_t k = 1; k <= 5; k += 2) {
16085 GemmMicrokernelTester()
16086 .mr(1)
16087 .nr(16)
16088 .kr(1)
16089 .sr(1)
16090 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016091 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016092 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016093 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016094 }
16095 }
16096 }
16097
Marat Dukhande06f492020-04-09 00:19:31 -070016098 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016099 TEST_REQUIRES_X86_AVX;
16100 for (uint32_t n = 17; n < 32; n++) {
16101 for (size_t k = 1; k <= 5; k += 2) {
16102 GemmMicrokernelTester()
16103 .mr(1)
16104 .nr(16)
16105 .kr(1)
16106 .sr(1)
16107 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016108 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016109 .k(k)
16110 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016111 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016112 }
16113 }
16114 }
16115
Marat Dukhande06f492020-04-09 00:19:31 -070016116 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016117 TEST_REQUIRES_X86_AVX;
16118 for (uint32_t n = 17; n < 32; n++) {
16119 for (size_t k = 1; k <= 5; k += 2) {
16120 GemmMicrokernelTester()
16121 .mr(1)
16122 .nr(16)
16123 .kr(1)
16124 .sr(1)
16125 .m(1)
16126 .n(n)
16127 .k(k)
16128 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016129 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016130 }
16131 }
16132 }
16133
Marat Dukhande06f492020-04-09 00:19:31 -070016134 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016135 TEST_REQUIRES_X86_AVX;
16136 for (uint32_t n = 17; n < 32; n++) {
16137 for (size_t k = 1; k <= 5; k += 2) {
16138 for (uint32_t m = 1; m <= 1; m++) {
16139 GemmMicrokernelTester()
16140 .mr(1)
16141 .nr(16)
16142 .kr(1)
16143 .sr(1)
16144 .m(m)
16145 .n(n)
16146 .k(k)
16147 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016148 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016149 }
16150 }
16151 }
16152 }
16153
Marat Dukhande06f492020-04-09 00:19:31 -070016154 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016155 TEST_REQUIRES_X86_AVX;
16156 for (uint32_t n = 32; n <= 48; n += 16) {
16157 for (size_t k = 1; k <= 5; k += 2) {
16158 GemmMicrokernelTester()
16159 .mr(1)
16160 .nr(16)
16161 .kr(1)
16162 .sr(1)
16163 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016164 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016165 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016166 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016167 }
16168 }
16169 }
16170
Marat Dukhande06f492020-04-09 00:19:31 -070016171 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016172 TEST_REQUIRES_X86_AVX;
16173 for (uint32_t n = 32; n <= 48; n += 16) {
16174 for (size_t k = 1; k <= 5; k += 2) {
16175 GemmMicrokernelTester()
16176 .mr(1)
16177 .nr(16)
16178 .kr(1)
16179 .sr(1)
16180 .m(1)
16181 .n(n)
16182 .k(k)
16183 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016184 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016185 }
16186 }
16187 }
16188
Marat Dukhande06f492020-04-09 00:19:31 -070016189 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016190 TEST_REQUIRES_X86_AVX;
16191 for (uint32_t n = 32; n <= 48; n += 16) {
16192 for (size_t k = 1; k <= 5; k += 2) {
16193 GemmMicrokernelTester()
16194 .mr(1)
16195 .nr(16)
16196 .kr(1)
16197 .sr(1)
16198 .m(1)
16199 .n(n)
16200 .k(k)
16201 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016202 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016203 }
16204 }
16205 }
16206
Marat Dukhande06f492020-04-09 00:19:31 -070016207 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016208 TEST_REQUIRES_X86_AVX;
16209 for (uint32_t n = 32; n <= 48; n += 16) {
16210 for (size_t k = 1; k <= 5; k += 2) {
16211 for (uint32_t m = 1; m <= 1; m++) {
16212 GemmMicrokernelTester()
16213 .mr(1)
16214 .nr(16)
16215 .kr(1)
16216 .sr(1)
16217 .m(m)
16218 .n(n)
16219 .k(k)
16220 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016221 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016222 }
16223 }
16224 }
16225 }
16226
Marat Dukhande06f492020-04-09 00:19:31 -070016227 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016228 TEST_REQUIRES_X86_AVX;
16229 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016230 for (uint32_t n = 1; n <= 16; n++) {
16231 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016232 GemmMicrokernelTester()
16233 .mr(1)
16234 .nr(16)
16235 .kr(1)
16236 .sr(1)
16237 .m(m)
16238 .n(n)
16239 .k(k)
16240 .cm_stride(19)
16241 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016242 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016243 }
16244 }
16245 }
16246 }
16247
Marat Dukhande06f492020-04-09 00:19:31 -070016248 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016249 TEST_REQUIRES_X86_AVX;
16250 GemmMicrokernelTester()
16251 .mr(1)
16252 .nr(16)
16253 .kr(1)
16254 .sr(1)
16255 .m(1)
16256 .n(16)
16257 .k(1)
16258 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016259 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016260 }
16261
Marat Dukhande06f492020-04-09 00:19:31 -070016262 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016263 TEST_REQUIRES_X86_AVX;
16264 GemmMicrokernelTester()
16265 .mr(1)
16266 .nr(16)
16267 .kr(1)
16268 .sr(1)
16269 .m(1)
16270 .n(16)
16271 .k(1)
16272 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016273 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016274 }
16275
Marat Dukhande06f492020-04-09 00:19:31 -070016276 TEST(F32_GEMMINC_MINMAX_1X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016277 TEST_REQUIRES_X86_AVX;
16278 GemmMicrokernelTester()
16279 .mr(1)
16280 .nr(16)
16281 .kr(1)
16282 .sr(1)
16283 .m(1)
16284 .n(16)
16285 .k(1)
16286 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016287 .Test(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016288 }
16289#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16290
16291
16292#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070016293 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016294 TEST_REQUIRES_X86_AVX;
16295 GemmMicrokernelTester()
16296 .mr(4)
16297 .nr(16)
16298 .kr(1)
16299 .sr(1)
16300 .m(4)
16301 .n(16)
16302 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016303 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016304 }
16305
Marat Dukhande06f492020-04-09 00:19:31 -070016306 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016307 TEST_REQUIRES_X86_AVX;
16308 GemmMicrokernelTester()
16309 .mr(4)
16310 .nr(16)
16311 .kr(1)
16312 .sr(1)
16313 .m(4)
16314 .n(16)
16315 .k(1)
16316 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016317 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016318 }
16319
Marat Dukhande06f492020-04-09 00:19:31 -070016320 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016321 TEST_REQUIRES_X86_AVX;
16322 GemmMicrokernelTester()
16323 .mr(4)
16324 .nr(16)
16325 .kr(1)
16326 .sr(1)
16327 .m(4)
16328 .n(16)
16329 .k(1)
16330 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016331 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016332 }
16333
Marat Dukhande06f492020-04-09 00:19:31 -070016334 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016335 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016336 for (uint32_t n = 1; n <= 16; n++) {
16337 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016338 GemmMicrokernelTester()
16339 .mr(4)
16340 .nr(16)
16341 .kr(1)
16342 .sr(1)
16343 .m(m)
16344 .n(n)
16345 .k(1)
16346 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016347 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016348 }
16349 }
16350 }
16351
Marat Dukhande06f492020-04-09 00:19:31 -070016352 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016353 TEST_REQUIRES_X86_AVX;
16354 for (uint32_t m = 1; m <= 4; m++) {
16355 GemmMicrokernelTester()
16356 .mr(4)
16357 .nr(16)
16358 .kr(1)
16359 .sr(1)
16360 .m(m)
16361 .n(16)
16362 .k(1)
16363 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016364 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016365 }
16366 }
16367
Marat Dukhande06f492020-04-09 00:19:31 -070016368 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016369 TEST_REQUIRES_X86_AVX;
16370 for (uint32_t n = 1; n <= 16; n++) {
16371 GemmMicrokernelTester()
16372 .mr(4)
16373 .nr(16)
16374 .kr(1)
16375 .sr(1)
16376 .m(4)
16377 .n(n)
16378 .k(1)
16379 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016380 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016381 }
16382 }
16383
Marat Dukhande06f492020-04-09 00:19:31 -070016384 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016385 TEST_REQUIRES_X86_AVX;
16386 for (size_t k = 2; k < 10; k++) {
16387 GemmMicrokernelTester()
16388 .mr(4)
16389 .nr(16)
16390 .kr(1)
16391 .sr(1)
16392 .m(4)
16393 .n(16)
16394 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016395 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016396 }
16397 }
16398
Marat Dukhande06f492020-04-09 00:19:31 -070016399 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016400 TEST_REQUIRES_X86_AVX;
16401 for (size_t k = 2; k < 10; k++) {
16402 GemmMicrokernelTester()
16403 .mr(4)
16404 .nr(16)
16405 .kr(1)
16406 .sr(1)
16407 .m(4)
16408 .n(16)
16409 .k(k)
16410 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016411 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016412 }
16413 }
16414
Marat Dukhande06f492020-04-09 00:19:31 -070016415 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016416 TEST_REQUIRES_X86_AVX;
16417 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016418 for (uint32_t n = 1; n <= 16; n++) {
16419 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016420 GemmMicrokernelTester()
16421 .mr(4)
16422 .nr(16)
16423 .kr(1)
16424 .sr(1)
16425 .m(m)
16426 .n(n)
16427 .k(k)
16428 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016429 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016430 }
16431 }
16432 }
16433 }
16434
Marat Dukhande06f492020-04-09 00:19:31 -070016435 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016436 TEST_REQUIRES_X86_AVX;
16437 for (uint32_t n = 17; n < 32; n++) {
16438 for (size_t k = 1; k <= 5; k += 2) {
16439 GemmMicrokernelTester()
16440 .mr(4)
16441 .nr(16)
16442 .kr(1)
16443 .sr(1)
16444 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016445 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016446 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016447 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016448 }
16449 }
16450 }
16451
Marat Dukhande06f492020-04-09 00:19:31 -070016452 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016453 TEST_REQUIRES_X86_AVX;
16454 for (uint32_t n = 17; n < 32; n++) {
16455 for (size_t k = 1; k <= 5; k += 2) {
16456 GemmMicrokernelTester()
16457 .mr(4)
16458 .nr(16)
16459 .kr(1)
16460 .sr(1)
16461 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016462 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016463 .k(k)
16464 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016465 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016466 }
16467 }
16468 }
16469
Marat Dukhande06f492020-04-09 00:19:31 -070016470 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016471 TEST_REQUIRES_X86_AVX;
16472 for (uint32_t n = 17; n < 32; n++) {
16473 for (size_t k = 1; k <= 5; k += 2) {
16474 GemmMicrokernelTester()
16475 .mr(4)
16476 .nr(16)
16477 .kr(1)
16478 .sr(1)
16479 .m(4)
16480 .n(n)
16481 .k(k)
16482 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016483 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016484 }
16485 }
16486 }
16487
Marat Dukhande06f492020-04-09 00:19:31 -070016488 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016489 TEST_REQUIRES_X86_AVX;
16490 for (uint32_t n = 17; n < 32; n++) {
16491 for (size_t k = 1; k <= 5; k += 2) {
16492 for (uint32_t m = 1; m <= 4; m++) {
16493 GemmMicrokernelTester()
16494 .mr(4)
16495 .nr(16)
16496 .kr(1)
16497 .sr(1)
16498 .m(m)
16499 .n(n)
16500 .k(k)
16501 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016502 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016503 }
16504 }
16505 }
16506 }
16507
Marat Dukhande06f492020-04-09 00:19:31 -070016508 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016509 TEST_REQUIRES_X86_AVX;
16510 for (uint32_t n = 32; n <= 48; n += 16) {
16511 for (size_t k = 1; k <= 5; k += 2) {
16512 GemmMicrokernelTester()
16513 .mr(4)
16514 .nr(16)
16515 .kr(1)
16516 .sr(1)
16517 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016518 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016519 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016520 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016521 }
16522 }
16523 }
16524
Marat Dukhande06f492020-04-09 00:19:31 -070016525 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016526 TEST_REQUIRES_X86_AVX;
16527 for (uint32_t n = 32; n <= 48; n += 16) {
16528 for (size_t k = 1; k <= 5; k += 2) {
16529 GemmMicrokernelTester()
16530 .mr(4)
16531 .nr(16)
16532 .kr(1)
16533 .sr(1)
16534 .m(4)
16535 .n(n)
16536 .k(k)
16537 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016538 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016539 }
16540 }
16541 }
16542
Marat Dukhande06f492020-04-09 00:19:31 -070016543 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016544 TEST_REQUIRES_X86_AVX;
16545 for (uint32_t n = 32; n <= 48; n += 16) {
16546 for (size_t k = 1; k <= 5; k += 2) {
16547 GemmMicrokernelTester()
16548 .mr(4)
16549 .nr(16)
16550 .kr(1)
16551 .sr(1)
16552 .m(4)
16553 .n(n)
16554 .k(k)
16555 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016556 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016557 }
16558 }
16559 }
16560
Marat Dukhande06f492020-04-09 00:19:31 -070016561 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016562 TEST_REQUIRES_X86_AVX;
16563 for (uint32_t n = 32; n <= 48; n += 16) {
16564 for (size_t k = 1; k <= 5; k += 2) {
16565 for (uint32_t m = 1; m <= 4; m++) {
16566 GemmMicrokernelTester()
16567 .mr(4)
16568 .nr(16)
16569 .kr(1)
16570 .sr(1)
16571 .m(m)
16572 .n(n)
16573 .k(k)
16574 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016575 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016576 }
16577 }
16578 }
16579 }
16580
Marat Dukhande06f492020-04-09 00:19:31 -070016581 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016582 TEST_REQUIRES_X86_AVX;
16583 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016584 for (uint32_t n = 1; n <= 16; n++) {
16585 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016586 GemmMicrokernelTester()
16587 .mr(4)
16588 .nr(16)
16589 .kr(1)
16590 .sr(1)
16591 .m(m)
16592 .n(n)
16593 .k(k)
16594 .cm_stride(19)
16595 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016596 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016597 }
16598 }
16599 }
16600 }
16601
Marat Dukhande06f492020-04-09 00:19:31 -070016602 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016603 TEST_REQUIRES_X86_AVX;
16604 GemmMicrokernelTester()
16605 .mr(4)
16606 .nr(16)
16607 .kr(1)
16608 .sr(1)
16609 .m(4)
16610 .n(16)
16611 .k(1)
16612 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016613 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016614 }
16615
Marat Dukhande06f492020-04-09 00:19:31 -070016616 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016617 TEST_REQUIRES_X86_AVX;
16618 GemmMicrokernelTester()
16619 .mr(4)
16620 .nr(16)
16621 .kr(1)
16622 .sr(1)
16623 .m(4)
16624 .n(16)
16625 .k(1)
16626 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016627 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016628 }
16629
Marat Dukhande06f492020-04-09 00:19:31 -070016630 TEST(F32_GEMMINC_MINMAX_4X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016631 TEST_REQUIRES_X86_AVX;
16632 GemmMicrokernelTester()
16633 .mr(4)
16634 .nr(16)
16635 .kr(1)
16636 .sr(1)
16637 .m(4)
16638 .n(16)
16639 .k(1)
16640 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016641 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016642 }
16643#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16644
16645
16646#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070016647 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016648 TEST_REQUIRES_X86_AVX;
16649 GemmMicrokernelTester()
16650 .mr(5)
16651 .nr(16)
16652 .kr(1)
16653 .sr(1)
16654 .m(5)
16655 .n(16)
16656 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016657 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016658 }
16659
Marat Dukhande06f492020-04-09 00:19:31 -070016660 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016661 TEST_REQUIRES_X86_AVX;
16662 GemmMicrokernelTester()
16663 .mr(5)
16664 .nr(16)
16665 .kr(1)
16666 .sr(1)
16667 .m(5)
16668 .n(16)
16669 .k(1)
16670 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016671 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016672 }
16673
Marat Dukhande06f492020-04-09 00:19:31 -070016674 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016675 TEST_REQUIRES_X86_AVX;
16676 GemmMicrokernelTester()
16677 .mr(5)
16678 .nr(16)
16679 .kr(1)
16680 .sr(1)
16681 .m(5)
16682 .n(16)
16683 .k(1)
16684 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016685 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016686 }
16687
Marat Dukhande06f492020-04-09 00:19:31 -070016688 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016689 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016690 for (uint32_t n = 1; n <= 16; n++) {
16691 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016692 GemmMicrokernelTester()
16693 .mr(5)
16694 .nr(16)
16695 .kr(1)
16696 .sr(1)
16697 .m(m)
16698 .n(n)
16699 .k(1)
16700 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016701 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016702 }
16703 }
16704 }
16705
Marat Dukhande06f492020-04-09 00:19:31 -070016706 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016707 TEST_REQUIRES_X86_AVX;
16708 for (uint32_t m = 1; m <= 5; m++) {
16709 GemmMicrokernelTester()
16710 .mr(5)
16711 .nr(16)
16712 .kr(1)
16713 .sr(1)
16714 .m(m)
16715 .n(16)
16716 .k(1)
16717 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016718 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016719 }
16720 }
16721
Marat Dukhande06f492020-04-09 00:19:31 -070016722 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016723 TEST_REQUIRES_X86_AVX;
16724 for (uint32_t n = 1; n <= 16; n++) {
16725 GemmMicrokernelTester()
16726 .mr(5)
16727 .nr(16)
16728 .kr(1)
16729 .sr(1)
16730 .m(5)
16731 .n(n)
16732 .k(1)
16733 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016734 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016735 }
16736 }
16737
Marat Dukhande06f492020-04-09 00:19:31 -070016738 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016739 TEST_REQUIRES_X86_AVX;
16740 for (size_t k = 2; k < 10; k++) {
16741 GemmMicrokernelTester()
16742 .mr(5)
16743 .nr(16)
16744 .kr(1)
16745 .sr(1)
16746 .m(5)
16747 .n(16)
16748 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016749 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016750 }
16751 }
16752
Marat Dukhande06f492020-04-09 00:19:31 -070016753 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016754 TEST_REQUIRES_X86_AVX;
16755 for (size_t k = 2; k < 10; k++) {
16756 GemmMicrokernelTester()
16757 .mr(5)
16758 .nr(16)
16759 .kr(1)
16760 .sr(1)
16761 .m(5)
16762 .n(16)
16763 .k(k)
16764 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016765 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016766 }
16767 }
16768
Marat Dukhande06f492020-04-09 00:19:31 -070016769 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016770 TEST_REQUIRES_X86_AVX;
16771 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016772 for (uint32_t n = 1; n <= 16; n++) {
16773 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016774 GemmMicrokernelTester()
16775 .mr(5)
16776 .nr(16)
16777 .kr(1)
16778 .sr(1)
16779 .m(m)
16780 .n(n)
16781 .k(k)
16782 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016783 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016784 }
16785 }
16786 }
16787 }
16788
Marat Dukhande06f492020-04-09 00:19:31 -070016789 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016790 TEST_REQUIRES_X86_AVX;
16791 for (uint32_t n = 17; n < 32; n++) {
16792 for (size_t k = 1; k <= 5; k += 2) {
16793 GemmMicrokernelTester()
16794 .mr(5)
16795 .nr(16)
16796 .kr(1)
16797 .sr(1)
16798 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016799 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016800 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016801 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016802 }
16803 }
16804 }
16805
Marat Dukhande06f492020-04-09 00:19:31 -070016806 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016807 TEST_REQUIRES_X86_AVX;
16808 for (uint32_t n = 17; n < 32; n++) {
16809 for (size_t k = 1; k <= 5; k += 2) {
16810 GemmMicrokernelTester()
16811 .mr(5)
16812 .nr(16)
16813 .kr(1)
16814 .sr(1)
16815 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016816 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016817 .k(k)
16818 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016819 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016820 }
16821 }
16822 }
16823
Marat Dukhande06f492020-04-09 00:19:31 -070016824 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016825 TEST_REQUIRES_X86_AVX;
16826 for (uint32_t n = 17; n < 32; n++) {
16827 for (size_t k = 1; k <= 5; k += 2) {
16828 GemmMicrokernelTester()
16829 .mr(5)
16830 .nr(16)
16831 .kr(1)
16832 .sr(1)
16833 .m(5)
16834 .n(n)
16835 .k(k)
16836 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016837 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016838 }
16839 }
16840 }
16841
Marat Dukhande06f492020-04-09 00:19:31 -070016842 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016843 TEST_REQUIRES_X86_AVX;
16844 for (uint32_t n = 17; n < 32; n++) {
16845 for (size_t k = 1; k <= 5; k += 2) {
16846 for (uint32_t m = 1; m <= 5; m++) {
16847 GemmMicrokernelTester()
16848 .mr(5)
16849 .nr(16)
16850 .kr(1)
16851 .sr(1)
16852 .m(m)
16853 .n(n)
16854 .k(k)
16855 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016856 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016857 }
16858 }
16859 }
16860 }
16861
Marat Dukhande06f492020-04-09 00:19:31 -070016862 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016863 TEST_REQUIRES_X86_AVX;
16864 for (uint32_t n = 32; n <= 48; n += 16) {
16865 for (size_t k = 1; k <= 5; k += 2) {
16866 GemmMicrokernelTester()
16867 .mr(5)
16868 .nr(16)
16869 .kr(1)
16870 .sr(1)
16871 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016872 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070016873 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016874 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016875 }
16876 }
16877 }
16878
Marat Dukhande06f492020-04-09 00:19:31 -070016879 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016880 TEST_REQUIRES_X86_AVX;
16881 for (uint32_t n = 32; n <= 48; n += 16) {
16882 for (size_t k = 1; k <= 5; k += 2) {
16883 GemmMicrokernelTester()
16884 .mr(5)
16885 .nr(16)
16886 .kr(1)
16887 .sr(1)
16888 .m(5)
16889 .n(n)
16890 .k(k)
16891 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016892 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016893 }
16894 }
16895 }
16896
Marat Dukhande06f492020-04-09 00:19:31 -070016897 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016898 TEST_REQUIRES_X86_AVX;
16899 for (uint32_t n = 32; n <= 48; n += 16) {
16900 for (size_t k = 1; k <= 5; k += 2) {
16901 GemmMicrokernelTester()
16902 .mr(5)
16903 .nr(16)
16904 .kr(1)
16905 .sr(1)
16906 .m(5)
16907 .n(n)
16908 .k(k)
16909 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016910 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016911 }
16912 }
16913 }
16914
Marat Dukhande06f492020-04-09 00:19:31 -070016915 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016916 TEST_REQUIRES_X86_AVX;
16917 for (uint32_t n = 32; n <= 48; n += 16) {
16918 for (size_t k = 1; k <= 5; k += 2) {
16919 for (uint32_t m = 1; m <= 5; m++) {
16920 GemmMicrokernelTester()
16921 .mr(5)
16922 .nr(16)
16923 .kr(1)
16924 .sr(1)
16925 .m(m)
16926 .n(n)
16927 .k(k)
16928 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016929 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016930 }
16931 }
16932 }
16933 }
16934
Marat Dukhande06f492020-04-09 00:19:31 -070016935 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016936 TEST_REQUIRES_X86_AVX;
16937 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016938 for (uint32_t n = 1; n <= 16; n++) {
16939 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016940 GemmMicrokernelTester()
16941 .mr(5)
16942 .nr(16)
16943 .kr(1)
16944 .sr(1)
16945 .m(m)
16946 .n(n)
16947 .k(k)
16948 .cm_stride(19)
16949 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016950 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016951 }
16952 }
16953 }
16954 }
16955
Marat Dukhande06f492020-04-09 00:19:31 -070016956 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016957 TEST_REQUIRES_X86_AVX;
16958 GemmMicrokernelTester()
16959 .mr(5)
16960 .nr(16)
16961 .kr(1)
16962 .sr(1)
16963 .m(5)
16964 .n(16)
16965 .k(1)
16966 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016967 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016968 }
16969
Marat Dukhande06f492020-04-09 00:19:31 -070016970 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016971 TEST_REQUIRES_X86_AVX;
16972 GemmMicrokernelTester()
16973 .mr(5)
16974 .nr(16)
16975 .kr(1)
16976 .sr(1)
16977 .m(5)
16978 .n(16)
16979 .k(1)
16980 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016981 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016982 }
16983
Marat Dukhande06f492020-04-09 00:19:31 -070016984 TEST(F32_GEMMINC_MINMAX_5X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016985 TEST_REQUIRES_X86_AVX;
16986 GemmMicrokernelTester()
16987 .mr(5)
16988 .nr(16)
16989 .kr(1)
16990 .sr(1)
16991 .m(5)
16992 .n(16)
16993 .k(1)
16994 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016995 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016996 }
16997#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16998
16999
17000#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017001 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017002 TEST_REQUIRES_X86_FMA3;
17003 GemmMicrokernelTester()
17004 .mr(6)
17005 .nr(8)
17006 .kr(1)
17007 .sr(1)
17008 .m(6)
17009 .n(8)
17010 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017011 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017012 }
17013
Marat Dukhande06f492020-04-09 00:19:31 -070017014 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017015 TEST_REQUIRES_X86_FMA3;
17016 GemmMicrokernelTester()
17017 .mr(6)
17018 .nr(8)
17019 .kr(1)
17020 .sr(1)
17021 .m(6)
17022 .n(8)
17023 .k(1)
17024 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017025 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017026 }
17027
Marat Dukhande06f492020-04-09 00:19:31 -070017028 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017029 TEST_REQUIRES_X86_FMA3;
17030 GemmMicrokernelTester()
17031 .mr(6)
17032 .nr(8)
17033 .kr(1)
17034 .sr(1)
17035 .m(6)
17036 .n(8)
17037 .k(1)
17038 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017039 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017040 }
17041
Marat Dukhande06f492020-04-09 00:19:31 -070017042 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017043 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017044 for (uint32_t n = 1; n <= 8; n++) {
17045 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017046 GemmMicrokernelTester()
17047 .mr(6)
17048 .nr(8)
17049 .kr(1)
17050 .sr(1)
17051 .m(m)
17052 .n(n)
17053 .k(1)
17054 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017055 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017056 }
17057 }
17058 }
17059
Marat Dukhande06f492020-04-09 00:19:31 -070017060 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017061 TEST_REQUIRES_X86_FMA3;
17062 for (uint32_t m = 1; m <= 6; m++) {
17063 GemmMicrokernelTester()
17064 .mr(6)
17065 .nr(8)
17066 .kr(1)
17067 .sr(1)
17068 .m(m)
17069 .n(8)
17070 .k(1)
17071 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017072 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017073 }
17074 }
17075
Marat Dukhande06f492020-04-09 00:19:31 -070017076 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017077 TEST_REQUIRES_X86_FMA3;
17078 for (uint32_t n = 1; n <= 8; n++) {
17079 GemmMicrokernelTester()
17080 .mr(6)
17081 .nr(8)
17082 .kr(1)
17083 .sr(1)
17084 .m(6)
17085 .n(n)
17086 .k(1)
17087 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017088 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017089 }
17090 }
17091
Marat Dukhande06f492020-04-09 00:19:31 -070017092 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017093 TEST_REQUIRES_X86_FMA3;
17094 for (size_t k = 2; k < 10; k++) {
17095 GemmMicrokernelTester()
17096 .mr(6)
17097 .nr(8)
17098 .kr(1)
17099 .sr(1)
17100 .m(6)
17101 .n(8)
17102 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017103 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017104 }
17105 }
17106
Marat Dukhande06f492020-04-09 00:19:31 -070017107 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017108 TEST_REQUIRES_X86_FMA3;
17109 for (size_t k = 2; k < 10; k++) {
17110 GemmMicrokernelTester()
17111 .mr(6)
17112 .nr(8)
17113 .kr(1)
17114 .sr(1)
17115 .m(6)
17116 .n(8)
17117 .k(k)
17118 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017119 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017120 }
17121 }
17122
Marat Dukhande06f492020-04-09 00:19:31 -070017123 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017124 TEST_REQUIRES_X86_FMA3;
17125 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017126 for (uint32_t n = 1; n <= 8; n++) {
17127 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017128 GemmMicrokernelTester()
17129 .mr(6)
17130 .nr(8)
17131 .kr(1)
17132 .sr(1)
17133 .m(m)
17134 .n(n)
17135 .k(k)
17136 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017137 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017138 }
17139 }
17140 }
17141 }
17142
Marat Dukhande06f492020-04-09 00:19:31 -070017143 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017144 TEST_REQUIRES_X86_FMA3;
17145 for (uint32_t n = 9; n < 16; n++) {
17146 for (size_t k = 1; k <= 5; k += 2) {
17147 GemmMicrokernelTester()
17148 .mr(6)
17149 .nr(8)
17150 .kr(1)
17151 .sr(1)
17152 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017153 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017154 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017155 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017156 }
17157 }
17158 }
17159
Marat Dukhande06f492020-04-09 00:19:31 -070017160 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017161 TEST_REQUIRES_X86_FMA3;
17162 for (uint32_t n = 9; n < 16; n++) {
17163 for (size_t k = 1; k <= 5; k += 2) {
17164 GemmMicrokernelTester()
17165 .mr(6)
17166 .nr(8)
17167 .kr(1)
17168 .sr(1)
17169 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017170 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017171 .k(k)
17172 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017173 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017174 }
17175 }
17176 }
17177
Marat Dukhande06f492020-04-09 00:19:31 -070017178 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017179 TEST_REQUIRES_X86_FMA3;
17180 for (uint32_t n = 9; n < 16; n++) {
17181 for (size_t k = 1; k <= 5; k += 2) {
17182 GemmMicrokernelTester()
17183 .mr(6)
17184 .nr(8)
17185 .kr(1)
17186 .sr(1)
17187 .m(6)
17188 .n(n)
17189 .k(k)
17190 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017191 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017192 }
17193 }
17194 }
17195
Marat Dukhande06f492020-04-09 00:19:31 -070017196 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017197 TEST_REQUIRES_X86_FMA3;
17198 for (uint32_t n = 9; n < 16; n++) {
17199 for (size_t k = 1; k <= 5; k += 2) {
17200 for (uint32_t m = 1; m <= 6; m++) {
17201 GemmMicrokernelTester()
17202 .mr(6)
17203 .nr(8)
17204 .kr(1)
17205 .sr(1)
17206 .m(m)
17207 .n(n)
17208 .k(k)
17209 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017210 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017211 }
17212 }
17213 }
17214 }
17215
Marat Dukhande06f492020-04-09 00:19:31 -070017216 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017217 TEST_REQUIRES_X86_FMA3;
17218 for (uint32_t n = 16; n <= 24; n += 8) {
17219 for (size_t k = 1; k <= 5; k += 2) {
17220 GemmMicrokernelTester()
17221 .mr(6)
17222 .nr(8)
17223 .kr(1)
17224 .sr(1)
17225 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017226 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017227 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017228 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017229 }
17230 }
17231 }
17232
Marat Dukhande06f492020-04-09 00:19:31 -070017233 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017234 TEST_REQUIRES_X86_FMA3;
17235 for (uint32_t n = 16; n <= 24; n += 8) {
17236 for (size_t k = 1; k <= 5; k += 2) {
17237 GemmMicrokernelTester()
17238 .mr(6)
17239 .nr(8)
17240 .kr(1)
17241 .sr(1)
17242 .m(6)
17243 .n(n)
17244 .k(k)
17245 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017246 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017247 }
17248 }
17249 }
17250
Marat Dukhande06f492020-04-09 00:19:31 -070017251 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017252 TEST_REQUIRES_X86_FMA3;
17253 for (uint32_t n = 16; n <= 24; n += 8) {
17254 for (size_t k = 1; k <= 5; k += 2) {
17255 GemmMicrokernelTester()
17256 .mr(6)
17257 .nr(8)
17258 .kr(1)
17259 .sr(1)
17260 .m(6)
17261 .n(n)
17262 .k(k)
17263 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017264 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017265 }
17266 }
17267 }
17268
Marat Dukhande06f492020-04-09 00:19:31 -070017269 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017270 TEST_REQUIRES_X86_FMA3;
17271 for (uint32_t n = 16; n <= 24; n += 8) {
17272 for (size_t k = 1; k <= 5; k += 2) {
17273 for (uint32_t m = 1; m <= 6; m++) {
17274 GemmMicrokernelTester()
17275 .mr(6)
17276 .nr(8)
17277 .kr(1)
17278 .sr(1)
17279 .m(m)
17280 .n(n)
17281 .k(k)
17282 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017283 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017284 }
17285 }
17286 }
17287 }
17288
Marat Dukhande06f492020-04-09 00:19:31 -070017289 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017290 TEST_REQUIRES_X86_FMA3;
17291 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017292 for (uint32_t n = 1; n <= 8; n++) {
17293 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017294 GemmMicrokernelTester()
17295 .mr(6)
17296 .nr(8)
17297 .kr(1)
17298 .sr(1)
17299 .m(m)
17300 .n(n)
17301 .k(k)
17302 .cm_stride(11)
17303 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017304 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017305 }
17306 }
17307 }
17308 }
17309
Marat Dukhande06f492020-04-09 00:19:31 -070017310 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017311 TEST_REQUIRES_X86_FMA3;
17312 GemmMicrokernelTester()
17313 .mr(6)
17314 .nr(8)
17315 .kr(1)
17316 .sr(1)
17317 .m(6)
17318 .n(8)
17319 .k(1)
17320 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017321 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017322 }
17323
Marat Dukhande06f492020-04-09 00:19:31 -070017324 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017325 TEST_REQUIRES_X86_FMA3;
17326 GemmMicrokernelTester()
17327 .mr(6)
17328 .nr(8)
17329 .kr(1)
17330 .sr(1)
17331 .m(6)
17332 .n(8)
17333 .k(1)
17334 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017335 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017336 }
17337
Marat Dukhande06f492020-04-09 00:19:31 -070017338 TEST(F32_GEMMINC_MINMAX_6X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017339 TEST_REQUIRES_X86_FMA3;
17340 GemmMicrokernelTester()
17341 .mr(6)
17342 .nr(8)
17343 .kr(1)
17344 .sr(1)
17345 .m(6)
17346 .n(8)
17347 .k(1)
17348 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017349 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017350 }
17351#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17352
17353
17354#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017355 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017356 TEST_REQUIRES_X86_FMA3;
17357 GemmMicrokernelTester()
17358 .mr(7)
17359 .nr(8)
17360 .kr(1)
17361 .sr(1)
17362 .m(7)
17363 .n(8)
17364 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017365 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017366 }
17367
Marat Dukhande06f492020-04-09 00:19:31 -070017368 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017369 TEST_REQUIRES_X86_FMA3;
17370 GemmMicrokernelTester()
17371 .mr(7)
17372 .nr(8)
17373 .kr(1)
17374 .sr(1)
17375 .m(7)
17376 .n(8)
17377 .k(1)
17378 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017379 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017380 }
17381
Marat Dukhande06f492020-04-09 00:19:31 -070017382 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017383 TEST_REQUIRES_X86_FMA3;
17384 GemmMicrokernelTester()
17385 .mr(7)
17386 .nr(8)
17387 .kr(1)
17388 .sr(1)
17389 .m(7)
17390 .n(8)
17391 .k(1)
17392 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017393 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017394 }
17395
Marat Dukhande06f492020-04-09 00:19:31 -070017396 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017397 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017398 for (uint32_t n = 1; n <= 8; n++) {
17399 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017400 GemmMicrokernelTester()
17401 .mr(7)
17402 .nr(8)
17403 .kr(1)
17404 .sr(1)
17405 .m(m)
17406 .n(n)
17407 .k(1)
17408 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017409 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017410 }
17411 }
17412 }
17413
Marat Dukhande06f492020-04-09 00:19:31 -070017414 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017415 TEST_REQUIRES_X86_FMA3;
17416 for (uint32_t m = 1; m <= 7; m++) {
17417 GemmMicrokernelTester()
17418 .mr(7)
17419 .nr(8)
17420 .kr(1)
17421 .sr(1)
17422 .m(m)
17423 .n(8)
17424 .k(1)
17425 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017426 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017427 }
17428 }
17429
Marat Dukhande06f492020-04-09 00:19:31 -070017430 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017431 TEST_REQUIRES_X86_FMA3;
17432 for (uint32_t n = 1; n <= 8; n++) {
17433 GemmMicrokernelTester()
17434 .mr(7)
17435 .nr(8)
17436 .kr(1)
17437 .sr(1)
17438 .m(7)
17439 .n(n)
17440 .k(1)
17441 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017442 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017443 }
17444 }
17445
Marat Dukhande06f492020-04-09 00:19:31 -070017446 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017447 TEST_REQUIRES_X86_FMA3;
17448 for (size_t k = 2; k < 10; k++) {
17449 GemmMicrokernelTester()
17450 .mr(7)
17451 .nr(8)
17452 .kr(1)
17453 .sr(1)
17454 .m(7)
17455 .n(8)
17456 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017457 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017458 }
17459 }
17460
Marat Dukhande06f492020-04-09 00:19:31 -070017461 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017462 TEST_REQUIRES_X86_FMA3;
17463 for (size_t k = 2; k < 10; k++) {
17464 GemmMicrokernelTester()
17465 .mr(7)
17466 .nr(8)
17467 .kr(1)
17468 .sr(1)
17469 .m(7)
17470 .n(8)
17471 .k(k)
17472 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017473 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017474 }
17475 }
17476
Marat Dukhande06f492020-04-09 00:19:31 -070017477 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017478 TEST_REQUIRES_X86_FMA3;
17479 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017480 for (uint32_t n = 1; n <= 8; n++) {
17481 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017482 GemmMicrokernelTester()
17483 .mr(7)
17484 .nr(8)
17485 .kr(1)
17486 .sr(1)
17487 .m(m)
17488 .n(n)
17489 .k(k)
17490 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017491 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017492 }
17493 }
17494 }
17495 }
17496
Marat Dukhande06f492020-04-09 00:19:31 -070017497 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017498 TEST_REQUIRES_X86_FMA3;
17499 for (uint32_t n = 9; n < 16; n++) {
17500 for (size_t k = 1; k <= 5; k += 2) {
17501 GemmMicrokernelTester()
17502 .mr(7)
17503 .nr(8)
17504 .kr(1)
17505 .sr(1)
17506 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017507 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017508 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017509 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017510 }
17511 }
17512 }
17513
Marat Dukhande06f492020-04-09 00:19:31 -070017514 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017515 TEST_REQUIRES_X86_FMA3;
17516 for (uint32_t n = 9; n < 16; n++) {
17517 for (size_t k = 1; k <= 5; k += 2) {
17518 GemmMicrokernelTester()
17519 .mr(7)
17520 .nr(8)
17521 .kr(1)
17522 .sr(1)
17523 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017524 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017525 .k(k)
17526 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017527 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017528 }
17529 }
17530 }
17531
Marat Dukhande06f492020-04-09 00:19:31 -070017532 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017533 TEST_REQUIRES_X86_FMA3;
17534 for (uint32_t n = 9; n < 16; n++) {
17535 for (size_t k = 1; k <= 5; k += 2) {
17536 GemmMicrokernelTester()
17537 .mr(7)
17538 .nr(8)
17539 .kr(1)
17540 .sr(1)
17541 .m(7)
17542 .n(n)
17543 .k(k)
17544 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017545 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017546 }
17547 }
17548 }
17549
Marat Dukhande06f492020-04-09 00:19:31 -070017550 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017551 TEST_REQUIRES_X86_FMA3;
17552 for (uint32_t n = 9; n < 16; n++) {
17553 for (size_t k = 1; k <= 5; k += 2) {
17554 for (uint32_t m = 1; m <= 7; m++) {
17555 GemmMicrokernelTester()
17556 .mr(7)
17557 .nr(8)
17558 .kr(1)
17559 .sr(1)
17560 .m(m)
17561 .n(n)
17562 .k(k)
17563 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017564 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017565 }
17566 }
17567 }
17568 }
17569
Marat Dukhande06f492020-04-09 00:19:31 -070017570 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017571 TEST_REQUIRES_X86_FMA3;
17572 for (uint32_t n = 16; n <= 24; n += 8) {
17573 for (size_t k = 1; k <= 5; k += 2) {
17574 GemmMicrokernelTester()
17575 .mr(7)
17576 .nr(8)
17577 .kr(1)
17578 .sr(1)
17579 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017580 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017581 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017582 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017583 }
17584 }
17585 }
17586
Marat Dukhande06f492020-04-09 00:19:31 -070017587 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017588 TEST_REQUIRES_X86_FMA3;
17589 for (uint32_t n = 16; n <= 24; n += 8) {
17590 for (size_t k = 1; k <= 5; k += 2) {
17591 GemmMicrokernelTester()
17592 .mr(7)
17593 .nr(8)
17594 .kr(1)
17595 .sr(1)
17596 .m(7)
17597 .n(n)
17598 .k(k)
17599 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017600 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017601 }
17602 }
17603 }
17604
Marat Dukhande06f492020-04-09 00:19:31 -070017605 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017606 TEST_REQUIRES_X86_FMA3;
17607 for (uint32_t n = 16; n <= 24; n += 8) {
17608 for (size_t k = 1; k <= 5; k += 2) {
17609 GemmMicrokernelTester()
17610 .mr(7)
17611 .nr(8)
17612 .kr(1)
17613 .sr(1)
17614 .m(7)
17615 .n(n)
17616 .k(k)
17617 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017618 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017619 }
17620 }
17621 }
17622
Marat Dukhande06f492020-04-09 00:19:31 -070017623 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017624 TEST_REQUIRES_X86_FMA3;
17625 for (uint32_t n = 16; n <= 24; n += 8) {
17626 for (size_t k = 1; k <= 5; k += 2) {
17627 for (uint32_t m = 1; m <= 7; m++) {
17628 GemmMicrokernelTester()
17629 .mr(7)
17630 .nr(8)
17631 .kr(1)
17632 .sr(1)
17633 .m(m)
17634 .n(n)
17635 .k(k)
17636 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017637 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017638 }
17639 }
17640 }
17641 }
17642
Marat Dukhande06f492020-04-09 00:19:31 -070017643 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017644 TEST_REQUIRES_X86_FMA3;
17645 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017646 for (uint32_t n = 1; n <= 8; n++) {
17647 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017648 GemmMicrokernelTester()
17649 .mr(7)
17650 .nr(8)
17651 .kr(1)
17652 .sr(1)
17653 .m(m)
17654 .n(n)
17655 .k(k)
17656 .cm_stride(11)
17657 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017658 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017659 }
17660 }
17661 }
17662 }
17663
Marat Dukhande06f492020-04-09 00:19:31 -070017664 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017665 TEST_REQUIRES_X86_FMA3;
17666 GemmMicrokernelTester()
17667 .mr(7)
17668 .nr(8)
17669 .kr(1)
17670 .sr(1)
17671 .m(7)
17672 .n(8)
17673 .k(1)
17674 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017675 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017676 }
17677
Marat Dukhande06f492020-04-09 00:19:31 -070017678 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017679 TEST_REQUIRES_X86_FMA3;
17680 GemmMicrokernelTester()
17681 .mr(7)
17682 .nr(8)
17683 .kr(1)
17684 .sr(1)
17685 .m(7)
17686 .n(8)
17687 .k(1)
17688 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017689 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017690 }
17691
Marat Dukhande06f492020-04-09 00:19:31 -070017692 TEST(F32_GEMMINC_MINMAX_7X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017693 TEST_REQUIRES_X86_FMA3;
17694 GemmMicrokernelTester()
17695 .mr(7)
17696 .nr(8)
17697 .kr(1)
17698 .sr(1)
17699 .m(7)
17700 .n(8)
17701 .k(1)
17702 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017703 .Test(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017704 }
17705#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17706
17707
17708#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017709 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017710 TEST_REQUIRES_X86_FMA3;
17711 GemmMicrokernelTester()
17712 .mr(8)
17713 .nr(8)
17714 .kr(1)
17715 .sr(1)
17716 .m(8)
17717 .n(8)
17718 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017719 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017720 }
17721
Marat Dukhande06f492020-04-09 00:19:31 -070017722 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017723 TEST_REQUIRES_X86_FMA3;
17724 GemmMicrokernelTester()
17725 .mr(8)
17726 .nr(8)
17727 .kr(1)
17728 .sr(1)
17729 .m(8)
17730 .n(8)
17731 .k(1)
17732 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017733 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017734 }
17735
Marat Dukhande06f492020-04-09 00:19:31 -070017736 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017737 TEST_REQUIRES_X86_FMA3;
17738 GemmMicrokernelTester()
17739 .mr(8)
17740 .nr(8)
17741 .kr(1)
17742 .sr(1)
17743 .m(8)
17744 .n(8)
17745 .k(1)
17746 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017747 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017748 }
17749
Marat Dukhande06f492020-04-09 00:19:31 -070017750 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017751 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017752 for (uint32_t n = 1; n <= 8; n++) {
17753 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017754 GemmMicrokernelTester()
17755 .mr(8)
17756 .nr(8)
17757 .kr(1)
17758 .sr(1)
17759 .m(m)
17760 .n(n)
17761 .k(1)
17762 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017763 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017764 }
17765 }
17766 }
17767
Marat Dukhande06f492020-04-09 00:19:31 -070017768 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017769 TEST_REQUIRES_X86_FMA3;
17770 for (uint32_t m = 1; m <= 8; m++) {
17771 GemmMicrokernelTester()
17772 .mr(8)
17773 .nr(8)
17774 .kr(1)
17775 .sr(1)
17776 .m(m)
17777 .n(8)
17778 .k(1)
17779 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017780 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017781 }
17782 }
17783
Marat Dukhande06f492020-04-09 00:19:31 -070017784 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017785 TEST_REQUIRES_X86_FMA3;
17786 for (uint32_t n = 1; n <= 8; n++) {
17787 GemmMicrokernelTester()
17788 .mr(8)
17789 .nr(8)
17790 .kr(1)
17791 .sr(1)
17792 .m(8)
17793 .n(n)
17794 .k(1)
17795 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017796 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017797 }
17798 }
17799
Marat Dukhande06f492020-04-09 00:19:31 -070017800 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017801 TEST_REQUIRES_X86_FMA3;
17802 for (size_t k = 2; k < 10; k++) {
17803 GemmMicrokernelTester()
17804 .mr(8)
17805 .nr(8)
17806 .kr(1)
17807 .sr(1)
17808 .m(8)
17809 .n(8)
17810 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017811 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017812 }
17813 }
17814
Marat Dukhande06f492020-04-09 00:19:31 -070017815 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017816 TEST_REQUIRES_X86_FMA3;
17817 for (size_t k = 2; k < 10; k++) {
17818 GemmMicrokernelTester()
17819 .mr(8)
17820 .nr(8)
17821 .kr(1)
17822 .sr(1)
17823 .m(8)
17824 .n(8)
17825 .k(k)
17826 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017827 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017828 }
17829 }
17830
Marat Dukhande06f492020-04-09 00:19:31 -070017831 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017832 TEST_REQUIRES_X86_FMA3;
17833 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017834 for (uint32_t n = 1; n <= 8; n++) {
17835 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017836 GemmMicrokernelTester()
17837 .mr(8)
17838 .nr(8)
17839 .kr(1)
17840 .sr(1)
17841 .m(m)
17842 .n(n)
17843 .k(k)
17844 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017845 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017846 }
17847 }
17848 }
17849 }
17850
Marat Dukhande06f492020-04-09 00:19:31 -070017851 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017852 TEST_REQUIRES_X86_FMA3;
17853 for (uint32_t n = 9; n < 16; n++) {
17854 for (size_t k = 1; k <= 5; k += 2) {
17855 GemmMicrokernelTester()
17856 .mr(8)
17857 .nr(8)
17858 .kr(1)
17859 .sr(1)
17860 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017861 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017862 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017863 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017864 }
17865 }
17866 }
17867
Marat Dukhande06f492020-04-09 00:19:31 -070017868 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017869 TEST_REQUIRES_X86_FMA3;
17870 for (uint32_t n = 9; n < 16; n++) {
17871 for (size_t k = 1; k <= 5; k += 2) {
17872 GemmMicrokernelTester()
17873 .mr(8)
17874 .nr(8)
17875 .kr(1)
17876 .sr(1)
17877 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017878 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017879 .k(k)
17880 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017881 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017882 }
17883 }
17884 }
17885
Marat Dukhande06f492020-04-09 00:19:31 -070017886 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017887 TEST_REQUIRES_X86_FMA3;
17888 for (uint32_t n = 9; n < 16; n++) {
17889 for (size_t k = 1; k <= 5; k += 2) {
17890 GemmMicrokernelTester()
17891 .mr(8)
17892 .nr(8)
17893 .kr(1)
17894 .sr(1)
17895 .m(8)
17896 .n(n)
17897 .k(k)
17898 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017899 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017900 }
17901 }
17902 }
17903
Marat Dukhande06f492020-04-09 00:19:31 -070017904 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017905 TEST_REQUIRES_X86_FMA3;
17906 for (uint32_t n = 9; n < 16; n++) {
17907 for (size_t k = 1; k <= 5; k += 2) {
17908 for (uint32_t m = 1; m <= 8; m++) {
17909 GemmMicrokernelTester()
17910 .mr(8)
17911 .nr(8)
17912 .kr(1)
17913 .sr(1)
17914 .m(m)
17915 .n(n)
17916 .k(k)
17917 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017918 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017919 }
17920 }
17921 }
17922 }
17923
Marat Dukhande06f492020-04-09 00:19:31 -070017924 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017925 TEST_REQUIRES_X86_FMA3;
17926 for (uint32_t n = 16; n <= 24; n += 8) {
17927 for (size_t k = 1; k <= 5; k += 2) {
17928 GemmMicrokernelTester()
17929 .mr(8)
17930 .nr(8)
17931 .kr(1)
17932 .sr(1)
17933 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017934 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017935 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017936 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017937 }
17938 }
17939 }
17940
Marat Dukhande06f492020-04-09 00:19:31 -070017941 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017942 TEST_REQUIRES_X86_FMA3;
17943 for (uint32_t n = 16; n <= 24; n += 8) {
17944 for (size_t k = 1; k <= 5; k += 2) {
17945 GemmMicrokernelTester()
17946 .mr(8)
17947 .nr(8)
17948 .kr(1)
17949 .sr(1)
17950 .m(8)
17951 .n(n)
17952 .k(k)
17953 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017954 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017955 }
17956 }
17957 }
17958
Marat Dukhande06f492020-04-09 00:19:31 -070017959 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017960 TEST_REQUIRES_X86_FMA3;
17961 for (uint32_t n = 16; n <= 24; n += 8) {
17962 for (size_t k = 1; k <= 5; k += 2) {
17963 GemmMicrokernelTester()
17964 .mr(8)
17965 .nr(8)
17966 .kr(1)
17967 .sr(1)
17968 .m(8)
17969 .n(n)
17970 .k(k)
17971 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017972 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017973 }
17974 }
17975 }
17976
Marat Dukhande06f492020-04-09 00:19:31 -070017977 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017978 TEST_REQUIRES_X86_FMA3;
17979 for (uint32_t n = 16; n <= 24; n += 8) {
17980 for (size_t k = 1; k <= 5; k += 2) {
17981 for (uint32_t m = 1; m <= 8; m++) {
17982 GemmMicrokernelTester()
17983 .mr(8)
17984 .nr(8)
17985 .kr(1)
17986 .sr(1)
17987 .m(m)
17988 .n(n)
17989 .k(k)
17990 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017991 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017992 }
17993 }
17994 }
17995 }
17996
Marat Dukhande06f492020-04-09 00:19:31 -070017997 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017998 TEST_REQUIRES_X86_FMA3;
17999 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018000 for (uint32_t n = 1; n <= 8; n++) {
18001 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018002 GemmMicrokernelTester()
18003 .mr(8)
18004 .nr(8)
18005 .kr(1)
18006 .sr(1)
18007 .m(m)
18008 .n(n)
18009 .k(k)
18010 .cm_stride(11)
18011 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018012 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018013 }
18014 }
18015 }
18016 }
18017
Marat Dukhande06f492020-04-09 00:19:31 -070018018 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018019 TEST_REQUIRES_X86_FMA3;
18020 GemmMicrokernelTester()
18021 .mr(8)
18022 .nr(8)
18023 .kr(1)
18024 .sr(1)
18025 .m(8)
18026 .n(8)
18027 .k(1)
18028 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018029 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018030 }
18031
Marat Dukhande06f492020-04-09 00:19:31 -070018032 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018033 TEST_REQUIRES_X86_FMA3;
18034 GemmMicrokernelTester()
18035 .mr(8)
18036 .nr(8)
18037 .kr(1)
18038 .sr(1)
18039 .m(8)
18040 .n(8)
18041 .k(1)
18042 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018043 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018044 }
18045
Marat Dukhande06f492020-04-09 00:19:31 -070018046 TEST(F32_GEMMINC_MINMAX_8X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018047 TEST_REQUIRES_X86_FMA3;
18048 GemmMicrokernelTester()
18049 .mr(8)
18050 .nr(8)
18051 .kr(1)
18052 .sr(1)
18053 .m(8)
18054 .n(8)
18055 .k(1)
18056 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018057 .Test(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018058 }
18059#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18060
18061
18062#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018063 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018064 TEST_REQUIRES_X86_FMA3;
18065 GemmMicrokernelTester()
18066 .mr(4)
18067 .nr(16)
18068 .kr(1)
18069 .sr(1)
18070 .m(4)
18071 .n(16)
18072 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018073 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018074 }
18075
Marat Dukhande06f492020-04-09 00:19:31 -070018076 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018077 TEST_REQUIRES_X86_FMA3;
18078 GemmMicrokernelTester()
18079 .mr(4)
18080 .nr(16)
18081 .kr(1)
18082 .sr(1)
18083 .m(4)
18084 .n(16)
18085 .k(1)
18086 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018087 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018088 }
18089
Marat Dukhande06f492020-04-09 00:19:31 -070018090 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018091 TEST_REQUIRES_X86_FMA3;
18092 GemmMicrokernelTester()
18093 .mr(4)
18094 .nr(16)
18095 .kr(1)
18096 .sr(1)
18097 .m(4)
18098 .n(16)
18099 .k(1)
18100 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018101 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018102 }
18103
Marat Dukhande06f492020-04-09 00:19:31 -070018104 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018105 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018106 for (uint32_t n = 1; n <= 16; n++) {
18107 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018108 GemmMicrokernelTester()
18109 .mr(4)
18110 .nr(16)
18111 .kr(1)
18112 .sr(1)
18113 .m(m)
18114 .n(n)
18115 .k(1)
18116 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018117 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018118 }
18119 }
18120 }
18121
Marat Dukhande06f492020-04-09 00:19:31 -070018122 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018123 TEST_REQUIRES_X86_FMA3;
18124 for (uint32_t m = 1; m <= 4; m++) {
18125 GemmMicrokernelTester()
18126 .mr(4)
18127 .nr(16)
18128 .kr(1)
18129 .sr(1)
18130 .m(m)
18131 .n(16)
18132 .k(1)
18133 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018134 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018135 }
18136 }
18137
Marat Dukhande06f492020-04-09 00:19:31 -070018138 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018139 TEST_REQUIRES_X86_FMA3;
18140 for (uint32_t n = 1; n <= 16; n++) {
18141 GemmMicrokernelTester()
18142 .mr(4)
18143 .nr(16)
18144 .kr(1)
18145 .sr(1)
18146 .m(4)
18147 .n(n)
18148 .k(1)
18149 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018150 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018151 }
18152 }
18153
Marat Dukhande06f492020-04-09 00:19:31 -070018154 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018155 TEST_REQUIRES_X86_FMA3;
18156 for (size_t k = 2; k < 10; k++) {
18157 GemmMicrokernelTester()
18158 .mr(4)
18159 .nr(16)
18160 .kr(1)
18161 .sr(1)
18162 .m(4)
18163 .n(16)
18164 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018165 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018166 }
18167 }
18168
Marat Dukhande06f492020-04-09 00:19:31 -070018169 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018170 TEST_REQUIRES_X86_FMA3;
18171 for (size_t k = 2; k < 10; k++) {
18172 GemmMicrokernelTester()
18173 .mr(4)
18174 .nr(16)
18175 .kr(1)
18176 .sr(1)
18177 .m(4)
18178 .n(16)
18179 .k(k)
18180 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018181 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018182 }
18183 }
18184
Marat Dukhande06f492020-04-09 00:19:31 -070018185 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018186 TEST_REQUIRES_X86_FMA3;
18187 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018188 for (uint32_t n = 1; n <= 16; n++) {
18189 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018190 GemmMicrokernelTester()
18191 .mr(4)
18192 .nr(16)
18193 .kr(1)
18194 .sr(1)
18195 .m(m)
18196 .n(n)
18197 .k(k)
18198 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018199 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018200 }
18201 }
18202 }
18203 }
18204
Marat Dukhande06f492020-04-09 00:19:31 -070018205 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018206 TEST_REQUIRES_X86_FMA3;
18207 for (uint32_t n = 17; n < 32; n++) {
18208 for (size_t k = 1; k <= 5; k += 2) {
18209 GemmMicrokernelTester()
18210 .mr(4)
18211 .nr(16)
18212 .kr(1)
18213 .sr(1)
18214 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018215 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018216 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018217 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018218 }
18219 }
18220 }
18221
Marat Dukhande06f492020-04-09 00:19:31 -070018222 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018223 TEST_REQUIRES_X86_FMA3;
18224 for (uint32_t n = 17; n < 32; n++) {
18225 for (size_t k = 1; k <= 5; k += 2) {
18226 GemmMicrokernelTester()
18227 .mr(4)
18228 .nr(16)
18229 .kr(1)
18230 .sr(1)
18231 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018232 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018233 .k(k)
18234 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018235 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018236 }
18237 }
18238 }
18239
Marat Dukhande06f492020-04-09 00:19:31 -070018240 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018241 TEST_REQUIRES_X86_FMA3;
18242 for (uint32_t n = 17; n < 32; n++) {
18243 for (size_t k = 1; k <= 5; k += 2) {
18244 GemmMicrokernelTester()
18245 .mr(4)
18246 .nr(16)
18247 .kr(1)
18248 .sr(1)
18249 .m(4)
18250 .n(n)
18251 .k(k)
18252 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018253 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018254 }
18255 }
18256 }
18257
Marat Dukhande06f492020-04-09 00:19:31 -070018258 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018259 TEST_REQUIRES_X86_FMA3;
18260 for (uint32_t n = 17; n < 32; n++) {
18261 for (size_t k = 1; k <= 5; k += 2) {
18262 for (uint32_t m = 1; m <= 4; m++) {
18263 GemmMicrokernelTester()
18264 .mr(4)
18265 .nr(16)
18266 .kr(1)
18267 .sr(1)
18268 .m(m)
18269 .n(n)
18270 .k(k)
18271 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018272 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018273 }
18274 }
18275 }
18276 }
18277
Marat Dukhande06f492020-04-09 00:19:31 -070018278 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018279 TEST_REQUIRES_X86_FMA3;
18280 for (uint32_t n = 32; n <= 48; n += 16) {
18281 for (size_t k = 1; k <= 5; k += 2) {
18282 GemmMicrokernelTester()
18283 .mr(4)
18284 .nr(16)
18285 .kr(1)
18286 .sr(1)
18287 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018288 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018289 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018290 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018291 }
18292 }
18293 }
18294
Marat Dukhande06f492020-04-09 00:19:31 -070018295 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018296 TEST_REQUIRES_X86_FMA3;
18297 for (uint32_t n = 32; n <= 48; n += 16) {
18298 for (size_t k = 1; k <= 5; k += 2) {
18299 GemmMicrokernelTester()
18300 .mr(4)
18301 .nr(16)
18302 .kr(1)
18303 .sr(1)
18304 .m(4)
18305 .n(n)
18306 .k(k)
18307 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018308 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018309 }
18310 }
18311 }
18312
Marat Dukhande06f492020-04-09 00:19:31 -070018313 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018314 TEST_REQUIRES_X86_FMA3;
18315 for (uint32_t n = 32; n <= 48; n += 16) {
18316 for (size_t k = 1; k <= 5; k += 2) {
18317 GemmMicrokernelTester()
18318 .mr(4)
18319 .nr(16)
18320 .kr(1)
18321 .sr(1)
18322 .m(4)
18323 .n(n)
18324 .k(k)
18325 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018326 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018327 }
18328 }
18329 }
18330
Marat Dukhande06f492020-04-09 00:19:31 -070018331 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018332 TEST_REQUIRES_X86_FMA3;
18333 for (uint32_t n = 32; n <= 48; n += 16) {
18334 for (size_t k = 1; k <= 5; k += 2) {
18335 for (uint32_t m = 1; m <= 4; m++) {
18336 GemmMicrokernelTester()
18337 .mr(4)
18338 .nr(16)
18339 .kr(1)
18340 .sr(1)
18341 .m(m)
18342 .n(n)
18343 .k(k)
18344 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018345 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018346 }
18347 }
18348 }
18349 }
18350
Marat Dukhande06f492020-04-09 00:19:31 -070018351 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018352 TEST_REQUIRES_X86_FMA3;
18353 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018354 for (uint32_t n = 1; n <= 16; n++) {
18355 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018356 GemmMicrokernelTester()
18357 .mr(4)
18358 .nr(16)
18359 .kr(1)
18360 .sr(1)
18361 .m(m)
18362 .n(n)
18363 .k(k)
18364 .cm_stride(19)
18365 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018366 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018367 }
18368 }
18369 }
18370 }
18371
Marat Dukhande06f492020-04-09 00:19:31 -070018372 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018373 TEST_REQUIRES_X86_FMA3;
18374 GemmMicrokernelTester()
18375 .mr(4)
18376 .nr(16)
18377 .kr(1)
18378 .sr(1)
18379 .m(4)
18380 .n(16)
18381 .k(1)
18382 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018383 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018384 }
18385
Marat Dukhande06f492020-04-09 00:19:31 -070018386 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018387 TEST_REQUIRES_X86_FMA3;
18388 GemmMicrokernelTester()
18389 .mr(4)
18390 .nr(16)
18391 .kr(1)
18392 .sr(1)
18393 .m(4)
18394 .n(16)
18395 .k(1)
18396 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018397 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018398 }
18399
Marat Dukhande06f492020-04-09 00:19:31 -070018400 TEST(F32_GEMMINC_MINMAX_4X16__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018401 TEST_REQUIRES_X86_FMA3;
18402 GemmMicrokernelTester()
18403 .mr(4)
18404 .nr(16)
18405 .kr(1)
18406 .sr(1)
18407 .m(4)
18408 .n(16)
18409 .k(1)
18410 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018411 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018412 }
18413#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18414
18415
18416#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018417 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018418 TEST_REQUIRES_X86_FMA3;
18419 GemmMicrokernelTester()
18420 .mr(5)
18421 .nr(16)
18422 .kr(1)
18423 .sr(1)
18424 .m(5)
18425 .n(16)
18426 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018427 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018428 }
18429
Marat Dukhande06f492020-04-09 00:19:31 -070018430 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018431 TEST_REQUIRES_X86_FMA3;
18432 GemmMicrokernelTester()
18433 .mr(5)
18434 .nr(16)
18435 .kr(1)
18436 .sr(1)
18437 .m(5)
18438 .n(16)
18439 .k(1)
18440 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018441 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018442 }
18443
Marat Dukhande06f492020-04-09 00:19:31 -070018444 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018445 TEST_REQUIRES_X86_FMA3;
18446 GemmMicrokernelTester()
18447 .mr(5)
18448 .nr(16)
18449 .kr(1)
18450 .sr(1)
18451 .m(5)
18452 .n(16)
18453 .k(1)
18454 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018455 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018456 }
18457
Marat Dukhande06f492020-04-09 00:19:31 -070018458 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018459 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018460 for (uint32_t n = 1; n <= 16; n++) {
18461 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018462 GemmMicrokernelTester()
18463 .mr(5)
18464 .nr(16)
18465 .kr(1)
18466 .sr(1)
18467 .m(m)
18468 .n(n)
18469 .k(1)
18470 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018471 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018472 }
18473 }
18474 }
18475
Marat Dukhande06f492020-04-09 00:19:31 -070018476 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018477 TEST_REQUIRES_X86_FMA3;
18478 for (uint32_t m = 1; m <= 5; m++) {
18479 GemmMicrokernelTester()
18480 .mr(5)
18481 .nr(16)
18482 .kr(1)
18483 .sr(1)
18484 .m(m)
18485 .n(16)
18486 .k(1)
18487 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018488 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018489 }
18490 }
18491
Marat Dukhande06f492020-04-09 00:19:31 -070018492 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018493 TEST_REQUIRES_X86_FMA3;
18494 for (uint32_t n = 1; n <= 16; n++) {
18495 GemmMicrokernelTester()
18496 .mr(5)
18497 .nr(16)
18498 .kr(1)
18499 .sr(1)
18500 .m(5)
18501 .n(n)
18502 .k(1)
18503 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018504 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018505 }
18506 }
18507
Marat Dukhande06f492020-04-09 00:19:31 -070018508 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018509 TEST_REQUIRES_X86_FMA3;
18510 for (size_t k = 2; k < 10; k++) {
18511 GemmMicrokernelTester()
18512 .mr(5)
18513 .nr(16)
18514 .kr(1)
18515 .sr(1)
18516 .m(5)
18517 .n(16)
18518 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018519 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018520 }
18521 }
18522
Marat Dukhande06f492020-04-09 00:19:31 -070018523 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018524 TEST_REQUIRES_X86_FMA3;
18525 for (size_t k = 2; k < 10; k++) {
18526 GemmMicrokernelTester()
18527 .mr(5)
18528 .nr(16)
18529 .kr(1)
18530 .sr(1)
18531 .m(5)
18532 .n(16)
18533 .k(k)
18534 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018535 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018536 }
18537 }
18538
Marat Dukhande06f492020-04-09 00:19:31 -070018539 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018540 TEST_REQUIRES_X86_FMA3;
18541 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018542 for (uint32_t n = 1; n <= 16; n++) {
18543 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018544 GemmMicrokernelTester()
18545 .mr(5)
18546 .nr(16)
18547 .kr(1)
18548 .sr(1)
18549 .m(m)
18550 .n(n)
18551 .k(k)
18552 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018553 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018554 }
18555 }
18556 }
18557 }
18558
Marat Dukhande06f492020-04-09 00:19:31 -070018559 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018560 TEST_REQUIRES_X86_FMA3;
18561 for (uint32_t n = 17; n < 32; n++) {
18562 for (size_t k = 1; k <= 5; k += 2) {
18563 GemmMicrokernelTester()
18564 .mr(5)
18565 .nr(16)
18566 .kr(1)
18567 .sr(1)
18568 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018569 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018570 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018571 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018572 }
18573 }
18574 }
18575
Marat Dukhande06f492020-04-09 00:19:31 -070018576 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018577 TEST_REQUIRES_X86_FMA3;
18578 for (uint32_t n = 17; n < 32; n++) {
18579 for (size_t k = 1; k <= 5; k += 2) {
18580 GemmMicrokernelTester()
18581 .mr(5)
18582 .nr(16)
18583 .kr(1)
18584 .sr(1)
18585 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018586 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018587 .k(k)
18588 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018589 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018590 }
18591 }
18592 }
18593
Marat Dukhande06f492020-04-09 00:19:31 -070018594 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018595 TEST_REQUIRES_X86_FMA3;
18596 for (uint32_t n = 17; n < 32; n++) {
18597 for (size_t k = 1; k <= 5; k += 2) {
18598 GemmMicrokernelTester()
18599 .mr(5)
18600 .nr(16)
18601 .kr(1)
18602 .sr(1)
18603 .m(5)
18604 .n(n)
18605 .k(k)
18606 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018607 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018608 }
18609 }
18610 }
18611
Marat Dukhande06f492020-04-09 00:19:31 -070018612 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018613 TEST_REQUIRES_X86_FMA3;
18614 for (uint32_t n = 17; n < 32; n++) {
18615 for (size_t k = 1; k <= 5; k += 2) {
18616 for (uint32_t m = 1; m <= 5; m++) {
18617 GemmMicrokernelTester()
18618 .mr(5)
18619 .nr(16)
18620 .kr(1)
18621 .sr(1)
18622 .m(m)
18623 .n(n)
18624 .k(k)
18625 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018626 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018627 }
18628 }
18629 }
18630 }
18631
Marat Dukhande06f492020-04-09 00:19:31 -070018632 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018633 TEST_REQUIRES_X86_FMA3;
18634 for (uint32_t n = 32; n <= 48; n += 16) {
18635 for (size_t k = 1; k <= 5; k += 2) {
18636 GemmMicrokernelTester()
18637 .mr(5)
18638 .nr(16)
18639 .kr(1)
18640 .sr(1)
18641 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018642 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018643 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018644 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018645 }
18646 }
18647 }
18648
Marat Dukhande06f492020-04-09 00:19:31 -070018649 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018650 TEST_REQUIRES_X86_FMA3;
18651 for (uint32_t n = 32; n <= 48; n += 16) {
18652 for (size_t k = 1; k <= 5; k += 2) {
18653 GemmMicrokernelTester()
18654 .mr(5)
18655 .nr(16)
18656 .kr(1)
18657 .sr(1)
18658 .m(5)
18659 .n(n)
18660 .k(k)
18661 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018662 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018663 }
18664 }
18665 }
18666
Marat Dukhande06f492020-04-09 00:19:31 -070018667 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018668 TEST_REQUIRES_X86_FMA3;
18669 for (uint32_t n = 32; n <= 48; n += 16) {
18670 for (size_t k = 1; k <= 5; k += 2) {
18671 GemmMicrokernelTester()
18672 .mr(5)
18673 .nr(16)
18674 .kr(1)
18675 .sr(1)
18676 .m(5)
18677 .n(n)
18678 .k(k)
18679 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018680 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018681 }
18682 }
18683 }
18684
Marat Dukhande06f492020-04-09 00:19:31 -070018685 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018686 TEST_REQUIRES_X86_FMA3;
18687 for (uint32_t n = 32; n <= 48; n += 16) {
18688 for (size_t k = 1; k <= 5; k += 2) {
18689 for (uint32_t m = 1; m <= 5; m++) {
18690 GemmMicrokernelTester()
18691 .mr(5)
18692 .nr(16)
18693 .kr(1)
18694 .sr(1)
18695 .m(m)
18696 .n(n)
18697 .k(k)
18698 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018699 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018700 }
18701 }
18702 }
18703 }
18704
Marat Dukhande06f492020-04-09 00:19:31 -070018705 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018706 TEST_REQUIRES_X86_FMA3;
18707 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018708 for (uint32_t n = 1; n <= 16; n++) {
18709 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018710 GemmMicrokernelTester()
18711 .mr(5)
18712 .nr(16)
18713 .kr(1)
18714 .sr(1)
18715 .m(m)
18716 .n(n)
18717 .k(k)
18718 .cm_stride(19)
18719 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018720 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018721 }
18722 }
18723 }
18724 }
18725
Marat Dukhande06f492020-04-09 00:19:31 -070018726 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018727 TEST_REQUIRES_X86_FMA3;
18728 GemmMicrokernelTester()
18729 .mr(5)
18730 .nr(16)
18731 .kr(1)
18732 .sr(1)
18733 .m(5)
18734 .n(16)
18735 .k(1)
18736 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018737 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018738 }
18739
Marat Dukhande06f492020-04-09 00:19:31 -070018740 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018741 TEST_REQUIRES_X86_FMA3;
18742 GemmMicrokernelTester()
18743 .mr(5)
18744 .nr(16)
18745 .kr(1)
18746 .sr(1)
18747 .m(5)
18748 .n(16)
18749 .k(1)
18750 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018751 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018752 }
18753
Marat Dukhande06f492020-04-09 00:19:31 -070018754 TEST(F32_GEMMINC_MINMAX_5X16__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018755 TEST_REQUIRES_X86_FMA3;
18756 GemmMicrokernelTester()
18757 .mr(5)
18758 .nr(16)
18759 .kr(1)
18760 .sr(1)
18761 .m(5)
18762 .n(16)
18763 .k(1)
18764 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018765 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018766 }
18767#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18768
18769
18770#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018771 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018772 TEST_REQUIRES_X86_FMA3;
18773 GemmMicrokernelTester()
18774 .mr(1)
18775 .nr(16)
18776 .kr(1)
18777 .sr(4)
18778 .m(1)
18779 .n(16)
18780 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018781 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018782 }
18783
Marat Dukhande06f492020-04-09 00:19:31 -070018784 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018785 TEST_REQUIRES_X86_FMA3;
18786 GemmMicrokernelTester()
18787 .mr(1)
18788 .nr(16)
18789 .kr(1)
18790 .sr(4)
18791 .m(1)
18792 .n(16)
18793 .k(4)
18794 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018795 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018796 }
18797
Marat Dukhande06f492020-04-09 00:19:31 -070018798 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018799 TEST_REQUIRES_X86_FMA3;
18800 GemmMicrokernelTester()
18801 .mr(1)
18802 .nr(16)
18803 .kr(1)
18804 .sr(4)
18805 .m(1)
18806 .n(16)
18807 .k(4)
18808 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018809 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018810 }
18811
Marat Dukhande06f492020-04-09 00:19:31 -070018812 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018813 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018814 for (uint32_t n = 1; n <= 16; n++) {
18815 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018816 GemmMicrokernelTester()
18817 .mr(1)
18818 .nr(16)
18819 .kr(1)
18820 .sr(4)
18821 .m(m)
18822 .n(n)
18823 .k(4)
18824 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018825 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018826 }
18827 }
18828 }
18829
Marat Dukhande06f492020-04-09 00:19:31 -070018830 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018831 TEST_REQUIRES_X86_FMA3;
18832 for (uint32_t m = 1; m <= 1; m++) {
18833 GemmMicrokernelTester()
18834 .mr(1)
18835 .nr(16)
18836 .kr(1)
18837 .sr(4)
18838 .m(m)
18839 .n(16)
18840 .k(4)
18841 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018842 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018843 }
18844 }
18845
Marat Dukhande06f492020-04-09 00:19:31 -070018846 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018847 TEST_REQUIRES_X86_FMA3;
18848 for (uint32_t n = 1; n <= 16; n++) {
18849 GemmMicrokernelTester()
18850 .mr(1)
18851 .nr(16)
18852 .kr(1)
18853 .sr(4)
18854 .m(1)
18855 .n(n)
18856 .k(4)
18857 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018858 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018859 }
18860 }
18861
Marat Dukhande06f492020-04-09 00:19:31 -070018862 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018863 TEST_REQUIRES_X86_FMA3;
18864 for (size_t k = 1; k < 4; k++) {
18865 GemmMicrokernelTester()
18866 .mr(1)
18867 .nr(16)
18868 .kr(1)
18869 .sr(4)
18870 .m(1)
18871 .n(16)
18872 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018873 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018874 }
18875 }
18876
Marat Dukhande06f492020-04-09 00:19:31 -070018877 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018878 TEST_REQUIRES_X86_FMA3;
18879 for (size_t k = 1; k < 4; k++) {
18880 GemmMicrokernelTester()
18881 .mr(1)
18882 .nr(16)
18883 .kr(1)
18884 .sr(4)
18885 .m(1)
18886 .n(16)
18887 .k(k)
18888 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018889 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018890 }
18891 }
18892
Marat Dukhande06f492020-04-09 00:19:31 -070018893 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018894 TEST_REQUIRES_X86_FMA3;
18895 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018896 for (uint32_t n = 1; n <= 16; n++) {
18897 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018898 GemmMicrokernelTester()
18899 .mr(1)
18900 .nr(16)
18901 .kr(1)
18902 .sr(4)
18903 .m(m)
18904 .n(n)
18905 .k(k)
18906 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018907 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018908 }
18909 }
18910 }
18911 }
18912
Marat Dukhande06f492020-04-09 00:19:31 -070018913 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018914 TEST_REQUIRES_X86_FMA3;
18915 for (size_t k = 5; k < 8; k++) {
18916 GemmMicrokernelTester()
18917 .mr(1)
18918 .nr(16)
18919 .kr(1)
18920 .sr(4)
18921 .m(1)
18922 .n(16)
18923 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018924 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018925 }
18926 }
18927
Marat Dukhande06f492020-04-09 00:19:31 -070018928 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018929 TEST_REQUIRES_X86_FMA3;
18930 for (size_t k = 5; k < 8; k++) {
18931 GemmMicrokernelTester()
18932 .mr(1)
18933 .nr(16)
18934 .kr(1)
18935 .sr(4)
18936 .m(1)
18937 .n(16)
18938 .k(k)
18939 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018940 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018941 }
18942 }
18943
Marat Dukhande06f492020-04-09 00:19:31 -070018944 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018945 TEST_REQUIRES_X86_FMA3;
18946 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018947 for (uint32_t n = 1; n <= 16; n++) {
18948 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018949 GemmMicrokernelTester()
18950 .mr(1)
18951 .nr(16)
18952 .kr(1)
18953 .sr(4)
18954 .m(m)
18955 .n(n)
18956 .k(k)
18957 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018958 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018959 }
18960 }
18961 }
18962 }
18963
Marat Dukhande06f492020-04-09 00:19:31 -070018964 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018965 TEST_REQUIRES_X86_FMA3;
18966 for (size_t k = 8; k <= 40; k += 4) {
18967 GemmMicrokernelTester()
18968 .mr(1)
18969 .nr(16)
18970 .kr(1)
18971 .sr(4)
18972 .m(1)
18973 .n(16)
18974 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018975 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018976 }
18977 }
18978
Marat Dukhande06f492020-04-09 00:19:31 -070018979 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018980 TEST_REQUIRES_X86_FMA3;
18981 for (size_t k = 8; k <= 40; k += 4) {
18982 GemmMicrokernelTester()
18983 .mr(1)
18984 .nr(16)
18985 .kr(1)
18986 .sr(4)
18987 .m(1)
18988 .n(16)
18989 .k(k)
18990 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018991 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018992 }
18993 }
18994
Marat Dukhande06f492020-04-09 00:19:31 -070018995 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018996 TEST_REQUIRES_X86_FMA3;
18997 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018998 for (uint32_t n = 1; n <= 16; n++) {
18999 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019000 GemmMicrokernelTester()
19001 .mr(1)
19002 .nr(16)
19003 .kr(1)
19004 .sr(4)
19005 .m(m)
19006 .n(n)
19007 .k(k)
19008 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019009 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019010 }
19011 }
19012 }
19013 }
19014
Marat Dukhande06f492020-04-09 00:19:31 -070019015 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019016 TEST_REQUIRES_X86_FMA3;
19017 for (uint32_t n = 17; n < 32; n++) {
19018 for (size_t k = 1; k <= 20; k += 5) {
19019 GemmMicrokernelTester()
19020 .mr(1)
19021 .nr(16)
19022 .kr(1)
19023 .sr(4)
19024 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019025 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019026 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019027 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019028 }
19029 }
19030 }
19031
Marat Dukhande06f492020-04-09 00:19:31 -070019032 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019033 TEST_REQUIRES_X86_FMA3;
19034 for (uint32_t n = 17; n < 32; n++) {
19035 for (size_t k = 1; k <= 20; k += 5) {
19036 GemmMicrokernelTester()
19037 .mr(1)
19038 .nr(16)
19039 .kr(1)
19040 .sr(4)
19041 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019042 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019043 .k(k)
19044 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019045 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019046 }
19047 }
19048 }
19049
Marat Dukhande06f492020-04-09 00:19:31 -070019050 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019051 TEST_REQUIRES_X86_FMA3;
19052 for (uint32_t n = 17; n < 32; n++) {
19053 for (size_t k = 1; k <= 20; k += 5) {
19054 GemmMicrokernelTester()
19055 .mr(1)
19056 .nr(16)
19057 .kr(1)
19058 .sr(4)
19059 .m(1)
19060 .n(n)
19061 .k(k)
19062 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019063 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019064 }
19065 }
19066 }
19067
Marat Dukhande06f492020-04-09 00:19:31 -070019068 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019069 TEST_REQUIRES_X86_FMA3;
19070 for (uint32_t n = 17; n < 32; n++) {
19071 for (size_t k = 1; k <= 20; k += 5) {
19072 for (uint32_t m = 1; m <= 1; m++) {
19073 GemmMicrokernelTester()
19074 .mr(1)
19075 .nr(16)
19076 .kr(1)
19077 .sr(4)
19078 .m(m)
19079 .n(n)
19080 .k(k)
19081 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019082 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019083 }
19084 }
19085 }
19086 }
19087
Marat Dukhande06f492020-04-09 00:19:31 -070019088 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019089 TEST_REQUIRES_X86_FMA3;
19090 for (uint32_t n = 32; n <= 48; n += 16) {
19091 for (size_t k = 1; k <= 20; k += 5) {
19092 GemmMicrokernelTester()
19093 .mr(1)
19094 .nr(16)
19095 .kr(1)
19096 .sr(4)
19097 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019098 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019099 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019100 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019101 }
19102 }
19103 }
19104
Marat Dukhande06f492020-04-09 00:19:31 -070019105 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019106 TEST_REQUIRES_X86_FMA3;
19107 for (uint32_t n = 32; n <= 48; n += 16) {
19108 for (size_t k = 1; k <= 20; k += 5) {
19109 GemmMicrokernelTester()
19110 .mr(1)
19111 .nr(16)
19112 .kr(1)
19113 .sr(4)
19114 .m(1)
19115 .n(n)
19116 .k(k)
19117 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019118 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019119 }
19120 }
19121 }
19122
Marat Dukhande06f492020-04-09 00:19:31 -070019123 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019124 TEST_REQUIRES_X86_FMA3;
19125 for (uint32_t n = 32; n <= 48; n += 16) {
19126 for (size_t k = 1; k <= 20; k += 5) {
19127 GemmMicrokernelTester()
19128 .mr(1)
19129 .nr(16)
19130 .kr(1)
19131 .sr(4)
19132 .m(1)
19133 .n(n)
19134 .k(k)
19135 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019136 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019137 }
19138 }
19139 }
19140
Marat Dukhande06f492020-04-09 00:19:31 -070019141 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019142 TEST_REQUIRES_X86_FMA3;
19143 for (uint32_t n = 32; n <= 48; n += 16) {
19144 for (size_t k = 1; k <= 20; k += 5) {
19145 for (uint32_t m = 1; m <= 1; m++) {
19146 GemmMicrokernelTester()
19147 .mr(1)
19148 .nr(16)
19149 .kr(1)
19150 .sr(4)
19151 .m(m)
19152 .n(n)
19153 .k(k)
19154 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019155 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019156 }
19157 }
19158 }
19159 }
19160
Marat Dukhande06f492020-04-09 00:19:31 -070019161 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019162 TEST_REQUIRES_X86_FMA3;
19163 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019164 for (uint32_t n = 1; n <= 16; n++) {
19165 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019166 GemmMicrokernelTester()
19167 .mr(1)
19168 .nr(16)
19169 .kr(1)
19170 .sr(4)
19171 .m(m)
19172 .n(n)
19173 .k(k)
19174 .cm_stride(19)
19175 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019176 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019177 }
19178 }
19179 }
19180 }
19181
Marat Dukhande06f492020-04-09 00:19:31 -070019182 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019183 TEST_REQUIRES_X86_FMA3;
19184 GemmMicrokernelTester()
19185 .mr(1)
19186 .nr(16)
19187 .kr(1)
19188 .sr(4)
19189 .m(1)
19190 .n(16)
19191 .k(4)
19192 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019193 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019194 }
19195
Marat Dukhande06f492020-04-09 00:19:31 -070019196 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019197 TEST_REQUIRES_X86_FMA3;
19198 GemmMicrokernelTester()
19199 .mr(1)
19200 .nr(16)
19201 .kr(1)
19202 .sr(4)
19203 .m(1)
19204 .n(16)
19205 .k(4)
19206 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019207 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019208 }
19209
Marat Dukhande06f492020-04-09 00:19:31 -070019210 TEST(F32_GEMMINC_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019211 TEST_REQUIRES_X86_FMA3;
19212 GemmMicrokernelTester()
19213 .mr(1)
19214 .nr(16)
19215 .kr(1)
19216 .sr(4)
19217 .m(1)
19218 .n(16)
19219 .k(4)
19220 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019221 .Test(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019222 }
19223#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19224
19225
19226#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019227 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019228 TEST_REQUIRES_X86_FMA3;
19229 GemmMicrokernelTester()
19230 .mr(3)
19231 .nr(16)
19232 .kr(1)
19233 .sr(4)
19234 .m(3)
19235 .n(16)
19236 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019237 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019238 }
19239
Marat Dukhande06f492020-04-09 00:19:31 -070019240 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019241 TEST_REQUIRES_X86_FMA3;
19242 GemmMicrokernelTester()
19243 .mr(3)
19244 .nr(16)
19245 .kr(1)
19246 .sr(4)
19247 .m(3)
19248 .n(16)
19249 .k(4)
19250 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019251 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019252 }
19253
Marat Dukhande06f492020-04-09 00:19:31 -070019254 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019255 TEST_REQUIRES_X86_FMA3;
19256 GemmMicrokernelTester()
19257 .mr(3)
19258 .nr(16)
19259 .kr(1)
19260 .sr(4)
19261 .m(3)
19262 .n(16)
19263 .k(4)
19264 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019265 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019266 }
19267
Marat Dukhande06f492020-04-09 00:19:31 -070019268 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019269 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019270 for (uint32_t n = 1; n <= 16; n++) {
19271 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019272 GemmMicrokernelTester()
19273 .mr(3)
19274 .nr(16)
19275 .kr(1)
19276 .sr(4)
19277 .m(m)
19278 .n(n)
19279 .k(4)
19280 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019281 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019282 }
19283 }
19284 }
19285
Marat Dukhande06f492020-04-09 00:19:31 -070019286 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019287 TEST_REQUIRES_X86_FMA3;
19288 for (uint32_t m = 1; m <= 3; m++) {
19289 GemmMicrokernelTester()
19290 .mr(3)
19291 .nr(16)
19292 .kr(1)
19293 .sr(4)
19294 .m(m)
19295 .n(16)
19296 .k(4)
19297 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019298 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019299 }
19300 }
19301
Marat Dukhande06f492020-04-09 00:19:31 -070019302 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019303 TEST_REQUIRES_X86_FMA3;
19304 for (uint32_t n = 1; n <= 16; n++) {
19305 GemmMicrokernelTester()
19306 .mr(3)
19307 .nr(16)
19308 .kr(1)
19309 .sr(4)
19310 .m(3)
19311 .n(n)
19312 .k(4)
19313 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019314 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019315 }
19316 }
19317
Marat Dukhande06f492020-04-09 00:19:31 -070019318 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019319 TEST_REQUIRES_X86_FMA3;
19320 for (size_t k = 1; k < 4; k++) {
19321 GemmMicrokernelTester()
19322 .mr(3)
19323 .nr(16)
19324 .kr(1)
19325 .sr(4)
19326 .m(3)
19327 .n(16)
19328 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019329 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019330 }
19331 }
19332
Marat Dukhande06f492020-04-09 00:19:31 -070019333 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019334 TEST_REQUIRES_X86_FMA3;
19335 for (size_t k = 1; k < 4; k++) {
19336 GemmMicrokernelTester()
19337 .mr(3)
19338 .nr(16)
19339 .kr(1)
19340 .sr(4)
19341 .m(3)
19342 .n(16)
19343 .k(k)
19344 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019345 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019346 }
19347 }
19348
Marat Dukhande06f492020-04-09 00:19:31 -070019349 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019350 TEST_REQUIRES_X86_FMA3;
19351 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019352 for (uint32_t n = 1; n <= 16; n++) {
19353 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019354 GemmMicrokernelTester()
19355 .mr(3)
19356 .nr(16)
19357 .kr(1)
19358 .sr(4)
19359 .m(m)
19360 .n(n)
19361 .k(k)
19362 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019363 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019364 }
19365 }
19366 }
19367 }
19368
Marat Dukhande06f492020-04-09 00:19:31 -070019369 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019370 TEST_REQUIRES_X86_FMA3;
19371 for (size_t k = 5; k < 8; k++) {
19372 GemmMicrokernelTester()
19373 .mr(3)
19374 .nr(16)
19375 .kr(1)
19376 .sr(4)
19377 .m(3)
19378 .n(16)
19379 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019380 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019381 }
19382 }
19383
Marat Dukhande06f492020-04-09 00:19:31 -070019384 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019385 TEST_REQUIRES_X86_FMA3;
19386 for (size_t k = 5; k < 8; k++) {
19387 GemmMicrokernelTester()
19388 .mr(3)
19389 .nr(16)
19390 .kr(1)
19391 .sr(4)
19392 .m(3)
19393 .n(16)
19394 .k(k)
19395 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019396 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019397 }
19398 }
19399
Marat Dukhande06f492020-04-09 00:19:31 -070019400 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019401 TEST_REQUIRES_X86_FMA3;
19402 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019403 for (uint32_t n = 1; n <= 16; n++) {
19404 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019405 GemmMicrokernelTester()
19406 .mr(3)
19407 .nr(16)
19408 .kr(1)
19409 .sr(4)
19410 .m(m)
19411 .n(n)
19412 .k(k)
19413 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019414 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019415 }
19416 }
19417 }
19418 }
19419
Marat Dukhande06f492020-04-09 00:19:31 -070019420 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019421 TEST_REQUIRES_X86_FMA3;
19422 for (size_t k = 8; k <= 40; k += 4) {
19423 GemmMicrokernelTester()
19424 .mr(3)
19425 .nr(16)
19426 .kr(1)
19427 .sr(4)
19428 .m(3)
19429 .n(16)
19430 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019431 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019432 }
19433 }
19434
Marat Dukhande06f492020-04-09 00:19:31 -070019435 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019436 TEST_REQUIRES_X86_FMA3;
19437 for (size_t k = 8; k <= 40; k += 4) {
19438 GemmMicrokernelTester()
19439 .mr(3)
19440 .nr(16)
19441 .kr(1)
19442 .sr(4)
19443 .m(3)
19444 .n(16)
19445 .k(k)
19446 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019447 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019448 }
19449 }
19450
Marat Dukhande06f492020-04-09 00:19:31 -070019451 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019452 TEST_REQUIRES_X86_FMA3;
19453 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019454 for (uint32_t n = 1; n <= 16; n++) {
19455 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019456 GemmMicrokernelTester()
19457 .mr(3)
19458 .nr(16)
19459 .kr(1)
19460 .sr(4)
19461 .m(m)
19462 .n(n)
19463 .k(k)
19464 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019465 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019466 }
19467 }
19468 }
19469 }
19470
Marat Dukhande06f492020-04-09 00:19:31 -070019471 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019472 TEST_REQUIRES_X86_FMA3;
19473 for (uint32_t n = 17; n < 32; n++) {
19474 for (size_t k = 1; k <= 20; k += 5) {
19475 GemmMicrokernelTester()
19476 .mr(3)
19477 .nr(16)
19478 .kr(1)
19479 .sr(4)
19480 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019481 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019482 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019483 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019484 }
19485 }
19486 }
19487
Marat Dukhande06f492020-04-09 00:19:31 -070019488 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019489 TEST_REQUIRES_X86_FMA3;
19490 for (uint32_t n = 17; n < 32; n++) {
19491 for (size_t k = 1; k <= 20; k += 5) {
19492 GemmMicrokernelTester()
19493 .mr(3)
19494 .nr(16)
19495 .kr(1)
19496 .sr(4)
19497 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019498 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019499 .k(k)
19500 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019501 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019502 }
19503 }
19504 }
19505
Marat Dukhande06f492020-04-09 00:19:31 -070019506 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019507 TEST_REQUIRES_X86_FMA3;
19508 for (uint32_t n = 17; n < 32; n++) {
19509 for (size_t k = 1; k <= 20; k += 5) {
19510 GemmMicrokernelTester()
19511 .mr(3)
19512 .nr(16)
19513 .kr(1)
19514 .sr(4)
19515 .m(3)
19516 .n(n)
19517 .k(k)
19518 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019519 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019520 }
19521 }
19522 }
19523
Marat Dukhande06f492020-04-09 00:19:31 -070019524 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019525 TEST_REQUIRES_X86_FMA3;
19526 for (uint32_t n = 17; n < 32; n++) {
19527 for (size_t k = 1; k <= 20; k += 5) {
19528 for (uint32_t m = 1; m <= 3; m++) {
19529 GemmMicrokernelTester()
19530 .mr(3)
19531 .nr(16)
19532 .kr(1)
19533 .sr(4)
19534 .m(m)
19535 .n(n)
19536 .k(k)
19537 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019538 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019539 }
19540 }
19541 }
19542 }
19543
Marat Dukhande06f492020-04-09 00:19:31 -070019544 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019545 TEST_REQUIRES_X86_FMA3;
19546 for (uint32_t n = 32; n <= 48; n += 16) {
19547 for (size_t k = 1; k <= 20; k += 5) {
19548 GemmMicrokernelTester()
19549 .mr(3)
19550 .nr(16)
19551 .kr(1)
19552 .sr(4)
19553 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019554 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019555 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019556 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019557 }
19558 }
19559 }
19560
Marat Dukhande06f492020-04-09 00:19:31 -070019561 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019562 TEST_REQUIRES_X86_FMA3;
19563 for (uint32_t n = 32; n <= 48; n += 16) {
19564 for (size_t k = 1; k <= 20; k += 5) {
19565 GemmMicrokernelTester()
19566 .mr(3)
19567 .nr(16)
19568 .kr(1)
19569 .sr(4)
19570 .m(3)
19571 .n(n)
19572 .k(k)
19573 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019574 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019575 }
19576 }
19577 }
19578
Marat Dukhande06f492020-04-09 00:19:31 -070019579 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019580 TEST_REQUIRES_X86_FMA3;
19581 for (uint32_t n = 32; n <= 48; n += 16) {
19582 for (size_t k = 1; k <= 20; k += 5) {
19583 GemmMicrokernelTester()
19584 .mr(3)
19585 .nr(16)
19586 .kr(1)
19587 .sr(4)
19588 .m(3)
19589 .n(n)
19590 .k(k)
19591 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019592 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019593 }
19594 }
19595 }
19596
Marat Dukhande06f492020-04-09 00:19:31 -070019597 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019598 TEST_REQUIRES_X86_FMA3;
19599 for (uint32_t n = 32; n <= 48; n += 16) {
19600 for (size_t k = 1; k <= 20; k += 5) {
19601 for (uint32_t m = 1; m <= 3; m++) {
19602 GemmMicrokernelTester()
19603 .mr(3)
19604 .nr(16)
19605 .kr(1)
19606 .sr(4)
19607 .m(m)
19608 .n(n)
19609 .k(k)
19610 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019611 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019612 }
19613 }
19614 }
19615 }
19616
Marat Dukhande06f492020-04-09 00:19:31 -070019617 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019618 TEST_REQUIRES_X86_FMA3;
19619 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019620 for (uint32_t n = 1; n <= 16; n++) {
19621 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019622 GemmMicrokernelTester()
19623 .mr(3)
19624 .nr(16)
19625 .kr(1)
19626 .sr(4)
19627 .m(m)
19628 .n(n)
19629 .k(k)
19630 .cm_stride(19)
19631 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019632 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019633 }
19634 }
19635 }
19636 }
19637
Marat Dukhande06f492020-04-09 00:19:31 -070019638 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019639 TEST_REQUIRES_X86_FMA3;
19640 GemmMicrokernelTester()
19641 .mr(3)
19642 .nr(16)
19643 .kr(1)
19644 .sr(4)
19645 .m(3)
19646 .n(16)
19647 .k(4)
19648 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019649 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019650 }
19651
Marat Dukhande06f492020-04-09 00:19:31 -070019652 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019653 TEST_REQUIRES_X86_FMA3;
19654 GemmMicrokernelTester()
19655 .mr(3)
19656 .nr(16)
19657 .kr(1)
19658 .sr(4)
19659 .m(3)
19660 .n(16)
19661 .k(4)
19662 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019663 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019664 }
19665
Marat Dukhande06f492020-04-09 00:19:31 -070019666 TEST(F32_GEMMINC_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019667 TEST_REQUIRES_X86_FMA3;
19668 GemmMicrokernelTester()
19669 .mr(3)
19670 .nr(16)
19671 .kr(1)
19672 .sr(4)
19673 .m(3)
19674 .n(16)
19675 .k(4)
19676 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019677 .Test(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019678 }
19679#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19680
19681
19682#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019683 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019684 TEST_REQUIRES_X86_FMA3;
19685 GemmMicrokernelTester()
19686 .mr(4)
19687 .nr(16)
19688 .kr(1)
19689 .sr(4)
19690 .m(4)
19691 .n(16)
19692 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019693 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019694 }
19695
Marat Dukhande06f492020-04-09 00:19:31 -070019696 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019697 TEST_REQUIRES_X86_FMA3;
19698 GemmMicrokernelTester()
19699 .mr(4)
19700 .nr(16)
19701 .kr(1)
19702 .sr(4)
19703 .m(4)
19704 .n(16)
19705 .k(4)
19706 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019707 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019708 }
19709
Marat Dukhande06f492020-04-09 00:19:31 -070019710 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019711 TEST_REQUIRES_X86_FMA3;
19712 GemmMicrokernelTester()
19713 .mr(4)
19714 .nr(16)
19715 .kr(1)
19716 .sr(4)
19717 .m(4)
19718 .n(16)
19719 .k(4)
19720 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019721 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019722 }
19723
Marat Dukhande06f492020-04-09 00:19:31 -070019724 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019725 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019726 for (uint32_t n = 1; n <= 16; n++) {
19727 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019728 GemmMicrokernelTester()
19729 .mr(4)
19730 .nr(16)
19731 .kr(1)
19732 .sr(4)
19733 .m(m)
19734 .n(n)
19735 .k(4)
19736 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019737 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019738 }
19739 }
19740 }
19741
Marat Dukhande06f492020-04-09 00:19:31 -070019742 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019743 TEST_REQUIRES_X86_FMA3;
19744 for (uint32_t m = 1; m <= 4; m++) {
19745 GemmMicrokernelTester()
19746 .mr(4)
19747 .nr(16)
19748 .kr(1)
19749 .sr(4)
19750 .m(m)
19751 .n(16)
19752 .k(4)
19753 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019754 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019755 }
19756 }
19757
Marat Dukhande06f492020-04-09 00:19:31 -070019758 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019759 TEST_REQUIRES_X86_FMA3;
19760 for (uint32_t n = 1; n <= 16; n++) {
19761 GemmMicrokernelTester()
19762 .mr(4)
19763 .nr(16)
19764 .kr(1)
19765 .sr(4)
19766 .m(4)
19767 .n(n)
19768 .k(4)
19769 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019770 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019771 }
19772 }
19773
Marat Dukhande06f492020-04-09 00:19:31 -070019774 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019775 TEST_REQUIRES_X86_FMA3;
19776 for (size_t k = 1; k < 4; k++) {
19777 GemmMicrokernelTester()
19778 .mr(4)
19779 .nr(16)
19780 .kr(1)
19781 .sr(4)
19782 .m(4)
19783 .n(16)
19784 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019785 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019786 }
19787 }
19788
Marat Dukhande06f492020-04-09 00:19:31 -070019789 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019790 TEST_REQUIRES_X86_FMA3;
19791 for (size_t k = 1; k < 4; k++) {
19792 GemmMicrokernelTester()
19793 .mr(4)
19794 .nr(16)
19795 .kr(1)
19796 .sr(4)
19797 .m(4)
19798 .n(16)
19799 .k(k)
19800 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019801 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019802 }
19803 }
19804
Marat Dukhande06f492020-04-09 00:19:31 -070019805 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019806 TEST_REQUIRES_X86_FMA3;
19807 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019808 for (uint32_t n = 1; n <= 16; n++) {
19809 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019810 GemmMicrokernelTester()
19811 .mr(4)
19812 .nr(16)
19813 .kr(1)
19814 .sr(4)
19815 .m(m)
19816 .n(n)
19817 .k(k)
19818 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019819 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019820 }
19821 }
19822 }
19823 }
19824
Marat Dukhande06f492020-04-09 00:19:31 -070019825 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019826 TEST_REQUIRES_X86_FMA3;
19827 for (size_t k = 5; k < 8; k++) {
19828 GemmMicrokernelTester()
19829 .mr(4)
19830 .nr(16)
19831 .kr(1)
19832 .sr(4)
19833 .m(4)
19834 .n(16)
19835 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019836 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019837 }
19838 }
19839
Marat Dukhande06f492020-04-09 00:19:31 -070019840 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019841 TEST_REQUIRES_X86_FMA3;
19842 for (size_t k = 5; k < 8; k++) {
19843 GemmMicrokernelTester()
19844 .mr(4)
19845 .nr(16)
19846 .kr(1)
19847 .sr(4)
19848 .m(4)
19849 .n(16)
19850 .k(k)
19851 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019852 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019853 }
19854 }
19855
Marat Dukhande06f492020-04-09 00:19:31 -070019856 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019857 TEST_REQUIRES_X86_FMA3;
19858 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019859 for (uint32_t n = 1; n <= 16; n++) {
19860 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019861 GemmMicrokernelTester()
19862 .mr(4)
19863 .nr(16)
19864 .kr(1)
19865 .sr(4)
19866 .m(m)
19867 .n(n)
19868 .k(k)
19869 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019870 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019871 }
19872 }
19873 }
19874 }
19875
Marat Dukhande06f492020-04-09 00:19:31 -070019876 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019877 TEST_REQUIRES_X86_FMA3;
19878 for (size_t k = 8; k <= 40; k += 4) {
19879 GemmMicrokernelTester()
19880 .mr(4)
19881 .nr(16)
19882 .kr(1)
19883 .sr(4)
19884 .m(4)
19885 .n(16)
19886 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019887 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019888 }
19889 }
19890
Marat Dukhande06f492020-04-09 00:19:31 -070019891 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019892 TEST_REQUIRES_X86_FMA3;
19893 for (size_t k = 8; k <= 40; k += 4) {
19894 GemmMicrokernelTester()
19895 .mr(4)
19896 .nr(16)
19897 .kr(1)
19898 .sr(4)
19899 .m(4)
19900 .n(16)
19901 .k(k)
19902 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019903 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019904 }
19905 }
19906
Marat Dukhande06f492020-04-09 00:19:31 -070019907 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019908 TEST_REQUIRES_X86_FMA3;
19909 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019910 for (uint32_t n = 1; n <= 16; n++) {
19911 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019912 GemmMicrokernelTester()
19913 .mr(4)
19914 .nr(16)
19915 .kr(1)
19916 .sr(4)
19917 .m(m)
19918 .n(n)
19919 .k(k)
19920 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019921 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019922 }
19923 }
19924 }
19925 }
19926
Marat Dukhande06f492020-04-09 00:19:31 -070019927 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019928 TEST_REQUIRES_X86_FMA3;
19929 for (uint32_t n = 17; n < 32; n++) {
19930 for (size_t k = 1; k <= 20; k += 5) {
19931 GemmMicrokernelTester()
19932 .mr(4)
19933 .nr(16)
19934 .kr(1)
19935 .sr(4)
19936 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019937 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019938 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019939 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019940 }
19941 }
19942 }
19943
Marat Dukhande06f492020-04-09 00:19:31 -070019944 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019945 TEST_REQUIRES_X86_FMA3;
19946 for (uint32_t n = 17; n < 32; n++) {
19947 for (size_t k = 1; k <= 20; k += 5) {
19948 GemmMicrokernelTester()
19949 .mr(4)
19950 .nr(16)
19951 .kr(1)
19952 .sr(4)
19953 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019954 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019955 .k(k)
19956 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019957 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019958 }
19959 }
19960 }
19961
Marat Dukhande06f492020-04-09 00:19:31 -070019962 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019963 TEST_REQUIRES_X86_FMA3;
19964 for (uint32_t n = 17; n < 32; n++) {
19965 for (size_t k = 1; k <= 20; k += 5) {
19966 GemmMicrokernelTester()
19967 .mr(4)
19968 .nr(16)
19969 .kr(1)
19970 .sr(4)
19971 .m(4)
19972 .n(n)
19973 .k(k)
19974 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019975 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019976 }
19977 }
19978 }
19979
Marat Dukhande06f492020-04-09 00:19:31 -070019980 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019981 TEST_REQUIRES_X86_FMA3;
19982 for (uint32_t n = 17; n < 32; n++) {
19983 for (size_t k = 1; k <= 20; k += 5) {
19984 for (uint32_t m = 1; m <= 4; m++) {
19985 GemmMicrokernelTester()
19986 .mr(4)
19987 .nr(16)
19988 .kr(1)
19989 .sr(4)
19990 .m(m)
19991 .n(n)
19992 .k(k)
19993 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019994 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019995 }
19996 }
19997 }
19998 }
19999
Marat Dukhande06f492020-04-09 00:19:31 -070020000 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020001 TEST_REQUIRES_X86_FMA3;
20002 for (uint32_t n = 32; n <= 48; n += 16) {
20003 for (size_t k = 1; k <= 20; k += 5) {
20004 GemmMicrokernelTester()
20005 .mr(4)
20006 .nr(16)
20007 .kr(1)
20008 .sr(4)
20009 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020010 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020011 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020012 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020013 }
20014 }
20015 }
20016
Marat Dukhande06f492020-04-09 00:19:31 -070020017 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020018 TEST_REQUIRES_X86_FMA3;
20019 for (uint32_t n = 32; n <= 48; n += 16) {
20020 for (size_t k = 1; k <= 20; k += 5) {
20021 GemmMicrokernelTester()
20022 .mr(4)
20023 .nr(16)
20024 .kr(1)
20025 .sr(4)
20026 .m(4)
20027 .n(n)
20028 .k(k)
20029 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020030 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020031 }
20032 }
20033 }
20034
Marat Dukhande06f492020-04-09 00:19:31 -070020035 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020036 TEST_REQUIRES_X86_FMA3;
20037 for (uint32_t n = 32; n <= 48; n += 16) {
20038 for (size_t k = 1; k <= 20; k += 5) {
20039 GemmMicrokernelTester()
20040 .mr(4)
20041 .nr(16)
20042 .kr(1)
20043 .sr(4)
20044 .m(4)
20045 .n(n)
20046 .k(k)
20047 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020048 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020049 }
20050 }
20051 }
20052
Marat Dukhande06f492020-04-09 00:19:31 -070020053 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020054 TEST_REQUIRES_X86_FMA3;
20055 for (uint32_t n = 32; n <= 48; n += 16) {
20056 for (size_t k = 1; k <= 20; k += 5) {
20057 for (uint32_t m = 1; m <= 4; m++) {
20058 GemmMicrokernelTester()
20059 .mr(4)
20060 .nr(16)
20061 .kr(1)
20062 .sr(4)
20063 .m(m)
20064 .n(n)
20065 .k(k)
20066 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020067 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020068 }
20069 }
20070 }
20071 }
20072
Marat Dukhande06f492020-04-09 00:19:31 -070020073 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020074 TEST_REQUIRES_X86_FMA3;
20075 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020076 for (uint32_t n = 1; n <= 16; n++) {
20077 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020078 GemmMicrokernelTester()
20079 .mr(4)
20080 .nr(16)
20081 .kr(1)
20082 .sr(4)
20083 .m(m)
20084 .n(n)
20085 .k(k)
20086 .cm_stride(19)
20087 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020088 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020089 }
20090 }
20091 }
20092 }
20093
Marat Dukhande06f492020-04-09 00:19:31 -070020094 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020095 TEST_REQUIRES_X86_FMA3;
20096 GemmMicrokernelTester()
20097 .mr(4)
20098 .nr(16)
20099 .kr(1)
20100 .sr(4)
20101 .m(4)
20102 .n(16)
20103 .k(4)
20104 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020105 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020106 }
20107
Marat Dukhande06f492020-04-09 00:19:31 -070020108 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020109 TEST_REQUIRES_X86_FMA3;
20110 GemmMicrokernelTester()
20111 .mr(4)
20112 .nr(16)
20113 .kr(1)
20114 .sr(4)
20115 .m(4)
20116 .n(16)
20117 .k(4)
20118 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020119 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020120 }
20121
Marat Dukhande06f492020-04-09 00:19:31 -070020122 TEST(F32_GEMMINC_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020123 TEST_REQUIRES_X86_FMA3;
20124 GemmMicrokernelTester()
20125 .mr(4)
20126 .nr(16)
20127 .kr(1)
20128 .sr(4)
20129 .m(4)
20130 .n(16)
20131 .k(4)
20132 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020133 .Test(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020134 }
20135#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20136
20137
20138#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020139 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020140 TEST_REQUIRES_X86_FMA3;
20141 GemmMicrokernelTester()
20142 .mr(5)
20143 .nr(16)
20144 .kr(1)
20145 .sr(4)
20146 .m(5)
20147 .n(16)
20148 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020149 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020150 }
20151
Marat Dukhande06f492020-04-09 00:19:31 -070020152 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020153 TEST_REQUIRES_X86_FMA3;
20154 GemmMicrokernelTester()
20155 .mr(5)
20156 .nr(16)
20157 .kr(1)
20158 .sr(4)
20159 .m(5)
20160 .n(16)
20161 .k(4)
20162 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020163 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020164 }
20165
Marat Dukhande06f492020-04-09 00:19:31 -070020166 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020167 TEST_REQUIRES_X86_FMA3;
20168 GemmMicrokernelTester()
20169 .mr(5)
20170 .nr(16)
20171 .kr(1)
20172 .sr(4)
20173 .m(5)
20174 .n(16)
20175 .k(4)
20176 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020177 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020178 }
20179
Marat Dukhande06f492020-04-09 00:19:31 -070020180 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020181 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020182 for (uint32_t n = 1; n <= 16; n++) {
20183 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020184 GemmMicrokernelTester()
20185 .mr(5)
20186 .nr(16)
20187 .kr(1)
20188 .sr(4)
20189 .m(m)
20190 .n(n)
20191 .k(4)
20192 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020193 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020194 }
20195 }
20196 }
20197
Marat Dukhande06f492020-04-09 00:19:31 -070020198 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020199 TEST_REQUIRES_X86_FMA3;
20200 for (uint32_t m = 1; m <= 5; m++) {
20201 GemmMicrokernelTester()
20202 .mr(5)
20203 .nr(16)
20204 .kr(1)
20205 .sr(4)
20206 .m(m)
20207 .n(16)
20208 .k(4)
20209 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020210 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020211 }
20212 }
20213
Marat Dukhande06f492020-04-09 00:19:31 -070020214 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020215 TEST_REQUIRES_X86_FMA3;
20216 for (uint32_t n = 1; n <= 16; n++) {
20217 GemmMicrokernelTester()
20218 .mr(5)
20219 .nr(16)
20220 .kr(1)
20221 .sr(4)
20222 .m(5)
20223 .n(n)
20224 .k(4)
20225 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020226 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020227 }
20228 }
20229
Marat Dukhande06f492020-04-09 00:19:31 -070020230 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020231 TEST_REQUIRES_X86_FMA3;
20232 for (size_t k = 1; k < 4; k++) {
20233 GemmMicrokernelTester()
20234 .mr(5)
20235 .nr(16)
20236 .kr(1)
20237 .sr(4)
20238 .m(5)
20239 .n(16)
20240 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020241 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020242 }
20243 }
20244
Marat Dukhande06f492020-04-09 00:19:31 -070020245 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020246 TEST_REQUIRES_X86_FMA3;
20247 for (size_t k = 1; k < 4; k++) {
20248 GemmMicrokernelTester()
20249 .mr(5)
20250 .nr(16)
20251 .kr(1)
20252 .sr(4)
20253 .m(5)
20254 .n(16)
20255 .k(k)
20256 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020257 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020258 }
20259 }
20260
Marat Dukhande06f492020-04-09 00:19:31 -070020261 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020262 TEST_REQUIRES_X86_FMA3;
20263 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020264 for (uint32_t n = 1; n <= 16; n++) {
20265 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020266 GemmMicrokernelTester()
20267 .mr(5)
20268 .nr(16)
20269 .kr(1)
20270 .sr(4)
20271 .m(m)
20272 .n(n)
20273 .k(k)
20274 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020275 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020276 }
20277 }
20278 }
20279 }
20280
Marat Dukhande06f492020-04-09 00:19:31 -070020281 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020282 TEST_REQUIRES_X86_FMA3;
20283 for (size_t k = 5; k < 8; k++) {
20284 GemmMicrokernelTester()
20285 .mr(5)
20286 .nr(16)
20287 .kr(1)
20288 .sr(4)
20289 .m(5)
20290 .n(16)
20291 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020292 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020293 }
20294 }
20295
Marat Dukhande06f492020-04-09 00:19:31 -070020296 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020297 TEST_REQUIRES_X86_FMA3;
20298 for (size_t k = 5; k < 8; k++) {
20299 GemmMicrokernelTester()
20300 .mr(5)
20301 .nr(16)
20302 .kr(1)
20303 .sr(4)
20304 .m(5)
20305 .n(16)
20306 .k(k)
20307 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020308 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020309 }
20310 }
20311
Marat Dukhande06f492020-04-09 00:19:31 -070020312 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020313 TEST_REQUIRES_X86_FMA3;
20314 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020315 for (uint32_t n = 1; n <= 16; n++) {
20316 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020317 GemmMicrokernelTester()
20318 .mr(5)
20319 .nr(16)
20320 .kr(1)
20321 .sr(4)
20322 .m(m)
20323 .n(n)
20324 .k(k)
20325 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020326 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020327 }
20328 }
20329 }
20330 }
20331
Marat Dukhande06f492020-04-09 00:19:31 -070020332 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020333 TEST_REQUIRES_X86_FMA3;
20334 for (size_t k = 8; k <= 40; k += 4) {
20335 GemmMicrokernelTester()
20336 .mr(5)
20337 .nr(16)
20338 .kr(1)
20339 .sr(4)
20340 .m(5)
20341 .n(16)
20342 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020343 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020344 }
20345 }
20346
Marat Dukhande06f492020-04-09 00:19:31 -070020347 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020348 TEST_REQUIRES_X86_FMA3;
20349 for (size_t k = 8; k <= 40; k += 4) {
20350 GemmMicrokernelTester()
20351 .mr(5)
20352 .nr(16)
20353 .kr(1)
20354 .sr(4)
20355 .m(5)
20356 .n(16)
20357 .k(k)
20358 .a_stride(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020359 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020360 }
20361 }
20362
Marat Dukhande06f492020-04-09 00:19:31 -070020363 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020364 TEST_REQUIRES_X86_FMA3;
20365 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020366 for (uint32_t n = 1; n <= 16; n++) {
20367 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020368 GemmMicrokernelTester()
20369 .mr(5)
20370 .nr(16)
20371 .kr(1)
20372 .sr(4)
20373 .m(m)
20374 .n(n)
20375 .k(k)
20376 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020377 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020378 }
20379 }
20380 }
20381 }
20382
Marat Dukhande06f492020-04-09 00:19:31 -070020383 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020384 TEST_REQUIRES_X86_FMA3;
20385 for (uint32_t n = 17; n < 32; n++) {
20386 for (size_t k = 1; k <= 20; k += 5) {
20387 GemmMicrokernelTester()
20388 .mr(5)
20389 .nr(16)
20390 .kr(1)
20391 .sr(4)
20392 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020393 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020394 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020395 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020396 }
20397 }
20398 }
20399
Marat Dukhande06f492020-04-09 00:19:31 -070020400 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020401 TEST_REQUIRES_X86_FMA3;
20402 for (uint32_t n = 17; n < 32; n++) {
20403 for (size_t k = 1; k <= 20; k += 5) {
20404 GemmMicrokernelTester()
20405 .mr(5)
20406 .nr(16)
20407 .kr(1)
20408 .sr(4)
20409 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020410 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020411 .k(k)
20412 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020413 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020414 }
20415 }
20416 }
20417
Marat Dukhande06f492020-04-09 00:19:31 -070020418 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020419 TEST_REQUIRES_X86_FMA3;
20420 for (uint32_t n = 17; n < 32; n++) {
20421 for (size_t k = 1; k <= 20; k += 5) {
20422 GemmMicrokernelTester()
20423 .mr(5)
20424 .nr(16)
20425 .kr(1)
20426 .sr(4)
20427 .m(5)
20428 .n(n)
20429 .k(k)
20430 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020431 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020432 }
20433 }
20434 }
20435
Marat Dukhande06f492020-04-09 00:19:31 -070020436 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020437 TEST_REQUIRES_X86_FMA3;
20438 for (uint32_t n = 17; n < 32; n++) {
20439 for (size_t k = 1; k <= 20; k += 5) {
20440 for (uint32_t m = 1; m <= 5; m++) {
20441 GemmMicrokernelTester()
20442 .mr(5)
20443 .nr(16)
20444 .kr(1)
20445 .sr(4)
20446 .m(m)
20447 .n(n)
20448 .k(k)
20449 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020450 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020451 }
20452 }
20453 }
20454 }
20455
Marat Dukhande06f492020-04-09 00:19:31 -070020456 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020457 TEST_REQUIRES_X86_FMA3;
20458 for (uint32_t n = 32; n <= 48; n += 16) {
20459 for (size_t k = 1; k <= 20; k += 5) {
20460 GemmMicrokernelTester()
20461 .mr(5)
20462 .nr(16)
20463 .kr(1)
20464 .sr(4)
20465 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020466 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020467 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020468 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020469 }
20470 }
20471 }
20472
Marat Dukhande06f492020-04-09 00:19:31 -070020473 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020474 TEST_REQUIRES_X86_FMA3;
20475 for (uint32_t n = 32; n <= 48; n += 16) {
20476 for (size_t k = 1; k <= 20; k += 5) {
20477 GemmMicrokernelTester()
20478 .mr(5)
20479 .nr(16)
20480 .kr(1)
20481 .sr(4)
20482 .m(5)
20483 .n(n)
20484 .k(k)
20485 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020486 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020487 }
20488 }
20489 }
20490
Marat Dukhande06f492020-04-09 00:19:31 -070020491 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020492 TEST_REQUIRES_X86_FMA3;
20493 for (uint32_t n = 32; n <= 48; n += 16) {
20494 for (size_t k = 1; k <= 20; k += 5) {
20495 GemmMicrokernelTester()
20496 .mr(5)
20497 .nr(16)
20498 .kr(1)
20499 .sr(4)
20500 .m(5)
20501 .n(n)
20502 .k(k)
20503 .a_stride(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020504 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020505 }
20506 }
20507 }
20508
Marat Dukhande06f492020-04-09 00:19:31 -070020509 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020510 TEST_REQUIRES_X86_FMA3;
20511 for (uint32_t n = 32; n <= 48; n += 16) {
20512 for (size_t k = 1; k <= 20; k += 5) {
20513 for (uint32_t m = 1; m <= 5; m++) {
20514 GemmMicrokernelTester()
20515 .mr(5)
20516 .nr(16)
20517 .kr(1)
20518 .sr(4)
20519 .m(m)
20520 .n(n)
20521 .k(k)
20522 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020523 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020524 }
20525 }
20526 }
20527 }
20528
Marat Dukhande06f492020-04-09 00:19:31 -070020529 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020530 TEST_REQUIRES_X86_FMA3;
20531 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020532 for (uint32_t n = 1; n <= 16; n++) {
20533 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020534 GemmMicrokernelTester()
20535 .mr(5)
20536 .nr(16)
20537 .kr(1)
20538 .sr(4)
20539 .m(m)
20540 .n(n)
20541 .k(k)
20542 .cm_stride(19)
20543 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020544 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020545 }
20546 }
20547 }
20548 }
20549
Marat Dukhande06f492020-04-09 00:19:31 -070020550 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020551 TEST_REQUIRES_X86_FMA3;
20552 GemmMicrokernelTester()
20553 .mr(5)
20554 .nr(16)
20555 .kr(1)
20556 .sr(4)
20557 .m(5)
20558 .n(16)
20559 .k(4)
20560 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020561 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020562 }
20563
Marat Dukhande06f492020-04-09 00:19:31 -070020564 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020565 TEST_REQUIRES_X86_FMA3;
20566 GemmMicrokernelTester()
20567 .mr(5)
20568 .nr(16)
20569 .kr(1)
20570 .sr(4)
20571 .m(5)
20572 .n(16)
20573 .k(4)
20574 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020575 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020576 }
20577
Marat Dukhande06f492020-04-09 00:19:31 -070020578 TEST(F32_GEMMINC_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020579 TEST_REQUIRES_X86_FMA3;
20580 GemmMicrokernelTester()
20581 .mr(5)
20582 .nr(16)
20583 .kr(1)
20584 .sr(4)
20585 .m(5)
20586 .n(16)
20587 .k(4)
20588 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020589 .Test(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020590 }
20591#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20592
20593
20594#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020595 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020596 TEST_REQUIRES_X86_AVX512F;
20597 GemmMicrokernelTester()
20598 .mr(4)
20599 .nr(16)
20600 .kr(1)
20601 .sr(1)
20602 .m(4)
20603 .n(16)
20604 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020605 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020606 }
20607
Marat Dukhande06f492020-04-09 00:19:31 -070020608 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020609 TEST_REQUIRES_X86_AVX512F;
20610 GemmMicrokernelTester()
20611 .mr(4)
20612 .nr(16)
20613 .kr(1)
20614 .sr(1)
20615 .m(4)
20616 .n(16)
20617 .k(1)
20618 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020619 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020620 }
20621
Marat Dukhande06f492020-04-09 00:19:31 -070020622 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020623 TEST_REQUIRES_X86_AVX512F;
20624 GemmMicrokernelTester()
20625 .mr(4)
20626 .nr(16)
20627 .kr(1)
20628 .sr(1)
20629 .m(4)
20630 .n(16)
20631 .k(1)
20632 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020633 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020634 }
20635
Marat Dukhande06f492020-04-09 00:19:31 -070020636 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020637 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020638 for (uint32_t n = 1; n <= 16; n++) {
20639 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020640 GemmMicrokernelTester()
20641 .mr(4)
20642 .nr(16)
20643 .kr(1)
20644 .sr(1)
20645 .m(m)
20646 .n(n)
20647 .k(1)
20648 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020649 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020650 }
20651 }
20652 }
20653
Marat Dukhande06f492020-04-09 00:19:31 -070020654 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020655 TEST_REQUIRES_X86_AVX512F;
20656 for (uint32_t m = 1; m <= 4; m++) {
20657 GemmMicrokernelTester()
20658 .mr(4)
20659 .nr(16)
20660 .kr(1)
20661 .sr(1)
20662 .m(m)
20663 .n(16)
20664 .k(1)
20665 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020666 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020667 }
20668 }
20669
Marat Dukhande06f492020-04-09 00:19:31 -070020670 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020671 TEST_REQUIRES_X86_AVX512F;
20672 for (uint32_t n = 1; n <= 16; n++) {
20673 GemmMicrokernelTester()
20674 .mr(4)
20675 .nr(16)
20676 .kr(1)
20677 .sr(1)
20678 .m(4)
20679 .n(n)
20680 .k(1)
20681 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020682 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020683 }
20684 }
20685
Marat Dukhande06f492020-04-09 00:19:31 -070020686 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020687 TEST_REQUIRES_X86_AVX512F;
20688 for (size_t k = 2; k < 10; k++) {
20689 GemmMicrokernelTester()
20690 .mr(4)
20691 .nr(16)
20692 .kr(1)
20693 .sr(1)
20694 .m(4)
20695 .n(16)
20696 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020697 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020698 }
20699 }
20700
Marat Dukhande06f492020-04-09 00:19:31 -070020701 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020702 TEST_REQUIRES_X86_AVX512F;
20703 for (size_t k = 2; k < 10; k++) {
20704 GemmMicrokernelTester()
20705 .mr(4)
20706 .nr(16)
20707 .kr(1)
20708 .sr(1)
20709 .m(4)
20710 .n(16)
20711 .k(k)
20712 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020713 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020714 }
20715 }
20716
Marat Dukhande06f492020-04-09 00:19:31 -070020717 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020718 TEST_REQUIRES_X86_AVX512F;
20719 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020720 for (uint32_t n = 1; n <= 16; n++) {
20721 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020722 GemmMicrokernelTester()
20723 .mr(4)
20724 .nr(16)
20725 .kr(1)
20726 .sr(1)
20727 .m(m)
20728 .n(n)
20729 .k(k)
20730 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020731 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020732 }
20733 }
20734 }
20735 }
20736
Marat Dukhande06f492020-04-09 00:19:31 -070020737 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020738 TEST_REQUIRES_X86_AVX512F;
20739 for (uint32_t n = 17; n < 32; n++) {
20740 for (size_t k = 1; k <= 5; k += 2) {
20741 GemmMicrokernelTester()
20742 .mr(4)
20743 .nr(16)
20744 .kr(1)
20745 .sr(1)
20746 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020747 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020748 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020749 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020750 }
20751 }
20752 }
20753
Marat Dukhande06f492020-04-09 00:19:31 -070020754 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020755 TEST_REQUIRES_X86_AVX512F;
20756 for (uint32_t n = 17; n < 32; n++) {
20757 for (size_t k = 1; k <= 5; k += 2) {
20758 GemmMicrokernelTester()
20759 .mr(4)
20760 .nr(16)
20761 .kr(1)
20762 .sr(1)
20763 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020764 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020765 .k(k)
20766 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020767 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020768 }
20769 }
20770 }
20771
Marat Dukhande06f492020-04-09 00:19:31 -070020772 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020773 TEST_REQUIRES_X86_AVX512F;
20774 for (uint32_t n = 17; n < 32; n++) {
20775 for (size_t k = 1; k <= 5; k += 2) {
20776 GemmMicrokernelTester()
20777 .mr(4)
20778 .nr(16)
20779 .kr(1)
20780 .sr(1)
20781 .m(4)
20782 .n(n)
20783 .k(k)
20784 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020785 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020786 }
20787 }
20788 }
20789
Marat Dukhande06f492020-04-09 00:19:31 -070020790 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020791 TEST_REQUIRES_X86_AVX512F;
20792 for (uint32_t n = 17; n < 32; n++) {
20793 for (size_t k = 1; k <= 5; k += 2) {
20794 for (uint32_t m = 1; m <= 4; m++) {
20795 GemmMicrokernelTester()
20796 .mr(4)
20797 .nr(16)
20798 .kr(1)
20799 .sr(1)
20800 .m(m)
20801 .n(n)
20802 .k(k)
20803 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020804 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020805 }
20806 }
20807 }
20808 }
20809
Marat Dukhande06f492020-04-09 00:19:31 -070020810 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020811 TEST_REQUIRES_X86_AVX512F;
20812 for (uint32_t n = 32; n <= 48; n += 16) {
20813 for (size_t k = 1; k <= 5; k += 2) {
20814 GemmMicrokernelTester()
20815 .mr(4)
20816 .nr(16)
20817 .kr(1)
20818 .sr(1)
20819 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020820 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020821 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020822 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020823 }
20824 }
20825 }
20826
Marat Dukhande06f492020-04-09 00:19:31 -070020827 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020828 TEST_REQUIRES_X86_AVX512F;
20829 for (uint32_t n = 32; n <= 48; n += 16) {
20830 for (size_t k = 1; k <= 5; k += 2) {
20831 GemmMicrokernelTester()
20832 .mr(4)
20833 .nr(16)
20834 .kr(1)
20835 .sr(1)
20836 .m(4)
20837 .n(n)
20838 .k(k)
20839 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020840 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020841 }
20842 }
20843 }
20844
Marat Dukhande06f492020-04-09 00:19:31 -070020845 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020846 TEST_REQUIRES_X86_AVX512F;
20847 for (uint32_t n = 32; n <= 48; n += 16) {
20848 for (size_t k = 1; k <= 5; k += 2) {
20849 GemmMicrokernelTester()
20850 .mr(4)
20851 .nr(16)
20852 .kr(1)
20853 .sr(1)
20854 .m(4)
20855 .n(n)
20856 .k(k)
20857 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020858 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020859 }
20860 }
20861 }
20862
Marat Dukhande06f492020-04-09 00:19:31 -070020863 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020864 TEST_REQUIRES_X86_AVX512F;
20865 for (uint32_t n = 32; n <= 48; n += 16) {
20866 for (size_t k = 1; k <= 5; k += 2) {
20867 for (uint32_t m = 1; m <= 4; m++) {
20868 GemmMicrokernelTester()
20869 .mr(4)
20870 .nr(16)
20871 .kr(1)
20872 .sr(1)
20873 .m(m)
20874 .n(n)
20875 .k(k)
20876 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020877 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020878 }
20879 }
20880 }
20881 }
20882
Marat Dukhande06f492020-04-09 00:19:31 -070020883 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020884 TEST_REQUIRES_X86_AVX512F;
20885 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020886 for (uint32_t n = 1; n <= 16; n++) {
20887 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020888 GemmMicrokernelTester()
20889 .mr(4)
20890 .nr(16)
20891 .kr(1)
20892 .sr(1)
20893 .m(m)
20894 .n(n)
20895 .k(k)
20896 .cm_stride(19)
20897 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020898 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020899 }
20900 }
20901 }
20902 }
20903
Marat Dukhande06f492020-04-09 00:19:31 -070020904 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020905 TEST_REQUIRES_X86_AVX512F;
20906 GemmMicrokernelTester()
20907 .mr(4)
20908 .nr(16)
20909 .kr(1)
20910 .sr(1)
20911 .m(4)
20912 .n(16)
20913 .k(1)
20914 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020915 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020916 }
20917
Marat Dukhande06f492020-04-09 00:19:31 -070020918 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020919 TEST_REQUIRES_X86_AVX512F;
20920 GemmMicrokernelTester()
20921 .mr(4)
20922 .nr(16)
20923 .kr(1)
20924 .sr(1)
20925 .m(4)
20926 .n(16)
20927 .k(1)
20928 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020929 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020930 }
20931
Marat Dukhande06f492020-04-09 00:19:31 -070020932 TEST(F32_GEMMINC_MINMAX_4X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020933 TEST_REQUIRES_X86_AVX512F;
20934 GemmMicrokernelTester()
20935 .mr(4)
20936 .nr(16)
20937 .kr(1)
20938 .sr(1)
20939 .m(4)
20940 .n(16)
20941 .k(1)
20942 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020943 .Test(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020944 }
20945#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20946
20947
20948#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020949 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020950 TEST_REQUIRES_X86_AVX512F;
20951 GemmMicrokernelTester()
20952 .mr(5)
20953 .nr(16)
20954 .kr(1)
20955 .sr(1)
20956 .m(5)
20957 .n(16)
20958 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020959 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020960 }
20961
Marat Dukhande06f492020-04-09 00:19:31 -070020962 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020963 TEST_REQUIRES_X86_AVX512F;
20964 GemmMicrokernelTester()
20965 .mr(5)
20966 .nr(16)
20967 .kr(1)
20968 .sr(1)
20969 .m(5)
20970 .n(16)
20971 .k(1)
20972 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020973 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020974 }
20975
Marat Dukhande06f492020-04-09 00:19:31 -070020976 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020977 TEST_REQUIRES_X86_AVX512F;
20978 GemmMicrokernelTester()
20979 .mr(5)
20980 .nr(16)
20981 .kr(1)
20982 .sr(1)
20983 .m(5)
20984 .n(16)
20985 .k(1)
20986 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020987 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020988 }
20989
Marat Dukhande06f492020-04-09 00:19:31 -070020990 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020991 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020992 for (uint32_t n = 1; n <= 16; n++) {
20993 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020994 GemmMicrokernelTester()
20995 .mr(5)
20996 .nr(16)
20997 .kr(1)
20998 .sr(1)
20999 .m(m)
21000 .n(n)
21001 .k(1)
21002 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021003 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021004 }
21005 }
21006 }
21007
Marat Dukhande06f492020-04-09 00:19:31 -070021008 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021009 TEST_REQUIRES_X86_AVX512F;
21010 for (uint32_t m = 1; m <= 5; m++) {
21011 GemmMicrokernelTester()
21012 .mr(5)
21013 .nr(16)
21014 .kr(1)
21015 .sr(1)
21016 .m(m)
21017 .n(16)
21018 .k(1)
21019 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021020 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021021 }
21022 }
21023
Marat Dukhande06f492020-04-09 00:19:31 -070021024 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021025 TEST_REQUIRES_X86_AVX512F;
21026 for (uint32_t n = 1; n <= 16; n++) {
21027 GemmMicrokernelTester()
21028 .mr(5)
21029 .nr(16)
21030 .kr(1)
21031 .sr(1)
21032 .m(5)
21033 .n(n)
21034 .k(1)
21035 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021036 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021037 }
21038 }
21039
Marat Dukhande06f492020-04-09 00:19:31 -070021040 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021041 TEST_REQUIRES_X86_AVX512F;
21042 for (size_t k = 2; k < 10; k++) {
21043 GemmMicrokernelTester()
21044 .mr(5)
21045 .nr(16)
21046 .kr(1)
21047 .sr(1)
21048 .m(5)
21049 .n(16)
21050 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021051 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021052 }
21053 }
21054
Marat Dukhande06f492020-04-09 00:19:31 -070021055 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021056 TEST_REQUIRES_X86_AVX512F;
21057 for (size_t k = 2; k < 10; k++) {
21058 GemmMicrokernelTester()
21059 .mr(5)
21060 .nr(16)
21061 .kr(1)
21062 .sr(1)
21063 .m(5)
21064 .n(16)
21065 .k(k)
21066 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021067 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021068 }
21069 }
21070
Marat Dukhande06f492020-04-09 00:19:31 -070021071 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021072 TEST_REQUIRES_X86_AVX512F;
21073 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021074 for (uint32_t n = 1; n <= 16; n++) {
21075 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021076 GemmMicrokernelTester()
21077 .mr(5)
21078 .nr(16)
21079 .kr(1)
21080 .sr(1)
21081 .m(m)
21082 .n(n)
21083 .k(k)
21084 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021085 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021086 }
21087 }
21088 }
21089 }
21090
Marat Dukhande06f492020-04-09 00:19:31 -070021091 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021092 TEST_REQUIRES_X86_AVX512F;
21093 for (uint32_t n = 17; n < 32; n++) {
21094 for (size_t k = 1; k <= 5; k += 2) {
21095 GemmMicrokernelTester()
21096 .mr(5)
21097 .nr(16)
21098 .kr(1)
21099 .sr(1)
21100 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021101 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021102 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021103 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021104 }
21105 }
21106 }
21107
Marat Dukhande06f492020-04-09 00:19:31 -070021108 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021109 TEST_REQUIRES_X86_AVX512F;
21110 for (uint32_t n = 17; n < 32; n++) {
21111 for (size_t k = 1; k <= 5; k += 2) {
21112 GemmMicrokernelTester()
21113 .mr(5)
21114 .nr(16)
21115 .kr(1)
21116 .sr(1)
21117 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021118 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021119 .k(k)
21120 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021121 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021122 }
21123 }
21124 }
21125
Marat Dukhande06f492020-04-09 00:19:31 -070021126 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021127 TEST_REQUIRES_X86_AVX512F;
21128 for (uint32_t n = 17; n < 32; n++) {
21129 for (size_t k = 1; k <= 5; k += 2) {
21130 GemmMicrokernelTester()
21131 .mr(5)
21132 .nr(16)
21133 .kr(1)
21134 .sr(1)
21135 .m(5)
21136 .n(n)
21137 .k(k)
21138 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021139 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021140 }
21141 }
21142 }
21143
Marat Dukhande06f492020-04-09 00:19:31 -070021144 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021145 TEST_REQUIRES_X86_AVX512F;
21146 for (uint32_t n = 17; n < 32; n++) {
21147 for (size_t k = 1; k <= 5; k += 2) {
21148 for (uint32_t m = 1; m <= 5; m++) {
21149 GemmMicrokernelTester()
21150 .mr(5)
21151 .nr(16)
21152 .kr(1)
21153 .sr(1)
21154 .m(m)
21155 .n(n)
21156 .k(k)
21157 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021158 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021159 }
21160 }
21161 }
21162 }
21163
Marat Dukhande06f492020-04-09 00:19:31 -070021164 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021165 TEST_REQUIRES_X86_AVX512F;
21166 for (uint32_t n = 32; n <= 48; n += 16) {
21167 for (size_t k = 1; k <= 5; k += 2) {
21168 GemmMicrokernelTester()
21169 .mr(5)
21170 .nr(16)
21171 .kr(1)
21172 .sr(1)
21173 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021174 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021175 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021176 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021177 }
21178 }
21179 }
21180
Marat Dukhande06f492020-04-09 00:19:31 -070021181 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021182 TEST_REQUIRES_X86_AVX512F;
21183 for (uint32_t n = 32; n <= 48; n += 16) {
21184 for (size_t k = 1; k <= 5; k += 2) {
21185 GemmMicrokernelTester()
21186 .mr(5)
21187 .nr(16)
21188 .kr(1)
21189 .sr(1)
21190 .m(5)
21191 .n(n)
21192 .k(k)
21193 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021194 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021195 }
21196 }
21197 }
21198
Marat Dukhande06f492020-04-09 00:19:31 -070021199 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021200 TEST_REQUIRES_X86_AVX512F;
21201 for (uint32_t n = 32; n <= 48; n += 16) {
21202 for (size_t k = 1; k <= 5; k += 2) {
21203 GemmMicrokernelTester()
21204 .mr(5)
21205 .nr(16)
21206 .kr(1)
21207 .sr(1)
21208 .m(5)
21209 .n(n)
21210 .k(k)
21211 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021212 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021213 }
21214 }
21215 }
21216
Marat Dukhande06f492020-04-09 00:19:31 -070021217 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021218 TEST_REQUIRES_X86_AVX512F;
21219 for (uint32_t n = 32; n <= 48; n += 16) {
21220 for (size_t k = 1; k <= 5; k += 2) {
21221 for (uint32_t m = 1; m <= 5; m++) {
21222 GemmMicrokernelTester()
21223 .mr(5)
21224 .nr(16)
21225 .kr(1)
21226 .sr(1)
21227 .m(m)
21228 .n(n)
21229 .k(k)
21230 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021231 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021232 }
21233 }
21234 }
21235 }
21236
Marat Dukhande06f492020-04-09 00:19:31 -070021237 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021238 TEST_REQUIRES_X86_AVX512F;
21239 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021240 for (uint32_t n = 1; n <= 16; n++) {
21241 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021242 GemmMicrokernelTester()
21243 .mr(5)
21244 .nr(16)
21245 .kr(1)
21246 .sr(1)
21247 .m(m)
21248 .n(n)
21249 .k(k)
21250 .cm_stride(19)
21251 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021252 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021253 }
21254 }
21255 }
21256 }
21257
Marat Dukhande06f492020-04-09 00:19:31 -070021258 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021259 TEST_REQUIRES_X86_AVX512F;
21260 GemmMicrokernelTester()
21261 .mr(5)
21262 .nr(16)
21263 .kr(1)
21264 .sr(1)
21265 .m(5)
21266 .n(16)
21267 .k(1)
21268 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021269 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021270 }
21271
Marat Dukhande06f492020-04-09 00:19:31 -070021272 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021273 TEST_REQUIRES_X86_AVX512F;
21274 GemmMicrokernelTester()
21275 .mr(5)
21276 .nr(16)
21277 .kr(1)
21278 .sr(1)
21279 .m(5)
21280 .n(16)
21281 .k(1)
21282 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021283 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021284 }
21285
Marat Dukhande06f492020-04-09 00:19:31 -070021286 TEST(F32_GEMMINC_MINMAX_5X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021287 TEST_REQUIRES_X86_AVX512F;
21288 GemmMicrokernelTester()
21289 .mr(5)
21290 .nr(16)
21291 .kr(1)
21292 .sr(1)
21293 .m(5)
21294 .n(16)
21295 .k(1)
21296 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021297 .Test(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021298 }
21299#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21300
21301
Marat Dukhan4c617792021-12-21 15:47:58 -080021302#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080021303 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021304 GemmMicrokernelTester()
21305 .mr(5)
21306 .nr(8)
21307 .kr(1)
21308 .sr(1)
21309 .m(5)
21310 .n(8)
21311 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021312 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021313 }
21314
Frank Barchard0725b8d2020-12-07 11:07:35 -080021315 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021316 GemmMicrokernelTester()
21317 .mr(5)
21318 .nr(8)
21319 .kr(1)
21320 .sr(1)
21321 .m(5)
21322 .n(8)
21323 .k(1)
21324 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021325 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021326 }
21327
Frank Barchard0725b8d2020-12-07 11:07:35 -080021328 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021329 GemmMicrokernelTester()
21330 .mr(5)
21331 .nr(8)
21332 .kr(1)
21333 .sr(1)
21334 .m(5)
21335 .n(8)
21336 .k(1)
21337 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021338 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021339 }
21340
Frank Barchard0725b8d2020-12-07 11:07:35 -080021341 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021342 for (uint32_t n = 1; n <= 8; n++) {
21343 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021344 GemmMicrokernelTester()
21345 .mr(5)
21346 .nr(8)
21347 .kr(1)
21348 .sr(1)
21349 .m(m)
21350 .n(n)
21351 .k(1)
21352 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021353 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021354 }
21355 }
21356 }
21357
Frank Barchard0725b8d2020-12-07 11:07:35 -080021358 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021359 for (uint32_t m = 1; m <= 5; m++) {
21360 GemmMicrokernelTester()
21361 .mr(5)
21362 .nr(8)
21363 .kr(1)
21364 .sr(1)
21365 .m(m)
21366 .n(8)
21367 .k(1)
21368 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021369 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021370 }
21371 }
21372
Frank Barchard0725b8d2020-12-07 11:07:35 -080021373 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021374 for (uint32_t n = 1; n <= 8; n++) {
21375 GemmMicrokernelTester()
21376 .mr(5)
21377 .nr(8)
21378 .kr(1)
21379 .sr(1)
21380 .m(5)
21381 .n(n)
21382 .k(1)
21383 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021384 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021385 }
21386 }
21387
Frank Barchard0725b8d2020-12-07 11:07:35 -080021388 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021389 for (size_t k = 2; k < 10; k++) {
21390 GemmMicrokernelTester()
21391 .mr(5)
21392 .nr(8)
21393 .kr(1)
21394 .sr(1)
21395 .m(5)
21396 .n(8)
21397 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021398 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021399 }
21400 }
21401
Frank Barchard0725b8d2020-12-07 11:07:35 -080021402 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021403 for (size_t k = 2; k < 10; k++) {
21404 GemmMicrokernelTester()
21405 .mr(5)
21406 .nr(8)
21407 .kr(1)
21408 .sr(1)
21409 .m(5)
21410 .n(8)
21411 .k(k)
21412 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021413 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021414 }
21415 }
21416
Frank Barchard0725b8d2020-12-07 11:07:35 -080021417 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021418 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021419 for (uint32_t n = 1; n <= 8; n++) {
21420 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021421 GemmMicrokernelTester()
21422 .mr(5)
21423 .nr(8)
21424 .kr(1)
21425 .sr(1)
21426 .m(m)
21427 .n(n)
21428 .k(k)
21429 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021430 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021431 }
21432 }
21433 }
21434 }
21435
Frank Barchard0725b8d2020-12-07 11:07:35 -080021436 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021437 for (uint32_t n = 9; n < 16; n++) {
21438 for (size_t k = 1; k <= 5; k += 2) {
21439 GemmMicrokernelTester()
21440 .mr(5)
21441 .nr(8)
21442 .kr(1)
21443 .sr(1)
21444 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021445 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021446 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021447 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021448 }
21449 }
21450 }
21451
Frank Barchard0725b8d2020-12-07 11:07:35 -080021452 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021453 for (uint32_t n = 9; n < 16; n++) {
21454 for (size_t k = 1; k <= 5; k += 2) {
21455 GemmMicrokernelTester()
21456 .mr(5)
21457 .nr(8)
21458 .kr(1)
21459 .sr(1)
21460 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021461 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021462 .k(k)
21463 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021464 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021465 }
21466 }
21467 }
21468
Frank Barchard0725b8d2020-12-07 11:07:35 -080021469 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021470 for (uint32_t n = 9; n < 16; n++) {
21471 for (size_t k = 1; k <= 5; k += 2) {
21472 GemmMicrokernelTester()
21473 .mr(5)
21474 .nr(8)
21475 .kr(1)
21476 .sr(1)
21477 .m(5)
21478 .n(n)
21479 .k(k)
21480 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021481 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021482 }
21483 }
21484 }
21485
Frank Barchard0725b8d2020-12-07 11:07:35 -080021486 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021487 for (uint32_t n = 9; n < 16; n++) {
21488 for (size_t k = 1; k <= 5; k += 2) {
21489 for (uint32_t m = 1; m <= 5; m++) {
21490 GemmMicrokernelTester()
21491 .mr(5)
21492 .nr(8)
21493 .kr(1)
21494 .sr(1)
21495 .m(m)
21496 .n(n)
21497 .k(k)
21498 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021499 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021500 }
21501 }
21502 }
21503 }
21504
Frank Barchard0725b8d2020-12-07 11:07:35 -080021505 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021506 for (uint32_t n = 16; n <= 24; n += 8) {
21507 for (size_t k = 1; k <= 5; k += 2) {
21508 GemmMicrokernelTester()
21509 .mr(5)
21510 .nr(8)
21511 .kr(1)
21512 .sr(1)
21513 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021514 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021515 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021516 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021517 }
21518 }
21519 }
21520
Frank Barchard0725b8d2020-12-07 11:07:35 -080021521 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021522 for (uint32_t n = 16; n <= 24; n += 8) {
21523 for (size_t k = 1; k <= 5; k += 2) {
21524 GemmMicrokernelTester()
21525 .mr(5)
21526 .nr(8)
21527 .kr(1)
21528 .sr(1)
21529 .m(5)
21530 .n(n)
21531 .k(k)
21532 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021533 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021534 }
21535 }
21536 }
21537
Frank Barchard0725b8d2020-12-07 11:07:35 -080021538 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021539 for (uint32_t n = 16; n <= 24; n += 8) {
21540 for (size_t k = 1; k <= 5; k += 2) {
21541 GemmMicrokernelTester()
21542 .mr(5)
21543 .nr(8)
21544 .kr(1)
21545 .sr(1)
21546 .m(5)
21547 .n(n)
21548 .k(k)
21549 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021550 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021551 }
21552 }
21553 }
21554
Frank Barchard0725b8d2020-12-07 11:07:35 -080021555 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021556 for (uint32_t n = 16; n <= 24; n += 8) {
21557 for (size_t k = 1; k <= 5; k += 2) {
21558 for (uint32_t m = 1; m <= 5; m++) {
21559 GemmMicrokernelTester()
21560 .mr(5)
21561 .nr(8)
21562 .kr(1)
21563 .sr(1)
21564 .m(m)
21565 .n(n)
21566 .k(k)
21567 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021568 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021569 }
21570 }
21571 }
21572 }
21573
Frank Barchard0725b8d2020-12-07 11:07:35 -080021574 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021575 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021576 for (uint32_t n = 1; n <= 8; n++) {
21577 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021578 GemmMicrokernelTester()
21579 .mr(5)
21580 .nr(8)
21581 .kr(1)
21582 .sr(1)
21583 .m(m)
21584 .n(n)
21585 .k(k)
21586 .cm_stride(11)
21587 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021588 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021589 }
21590 }
21591 }
21592 }
21593
Frank Barchard0725b8d2020-12-07 11:07:35 -080021594 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021595 GemmMicrokernelTester()
21596 .mr(5)
21597 .nr(8)
21598 .kr(1)
21599 .sr(1)
21600 .m(5)
21601 .n(8)
21602 .k(1)
21603 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021604 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021605 }
21606
Frank Barchard0725b8d2020-12-07 11:07:35 -080021607 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021608 GemmMicrokernelTester()
21609 .mr(5)
21610 .nr(8)
21611 .kr(1)
21612 .sr(1)
21613 .m(5)
21614 .n(8)
21615 .k(1)
21616 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021617 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021618 }
21619
Frank Barchard0725b8d2020-12-07 11:07:35 -080021620 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021621 GemmMicrokernelTester()
21622 .mr(5)
21623 .nr(8)
21624 .kr(1)
21625 .sr(1)
21626 .m(5)
21627 .n(8)
21628 .k(1)
21629 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021630 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021631 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021632#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021633
21634
Marat Dukhan4c617792021-12-21 15:47:58 -080021635#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080021636 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021637 GemmMicrokernelTester()
21638 .mr(1)
21639 .nr(8)
21640 .kr(1)
21641 .sr(1)
21642 .m(1)
21643 .n(8)
21644 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021645 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021646 }
21647
Frank Barchard0725b8d2020-12-07 11:07:35 -080021648 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021649 GemmMicrokernelTester()
21650 .mr(1)
21651 .nr(8)
21652 .kr(1)
21653 .sr(1)
21654 .m(1)
21655 .n(8)
21656 .k(1)
21657 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021658 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021659 }
21660
Frank Barchard0725b8d2020-12-07 11:07:35 -080021661 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021662 GemmMicrokernelTester()
21663 .mr(1)
21664 .nr(8)
21665 .kr(1)
21666 .sr(1)
21667 .m(1)
21668 .n(8)
21669 .k(1)
21670 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021671 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021672 }
21673
Frank Barchard0725b8d2020-12-07 11:07:35 -080021674 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021675 for (uint32_t n = 1; n <= 8; n++) {
21676 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021677 GemmMicrokernelTester()
21678 .mr(1)
21679 .nr(8)
21680 .kr(1)
21681 .sr(1)
21682 .m(m)
21683 .n(n)
21684 .k(1)
21685 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021686 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021687 }
21688 }
21689 }
21690
Frank Barchard0725b8d2020-12-07 11:07:35 -080021691 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021692 for (uint32_t m = 1; m <= 1; m++) {
21693 GemmMicrokernelTester()
21694 .mr(1)
21695 .nr(8)
21696 .kr(1)
21697 .sr(1)
21698 .m(m)
21699 .n(8)
21700 .k(1)
21701 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021702 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021703 }
21704 }
21705
Frank Barchard0725b8d2020-12-07 11:07:35 -080021706 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021707 for (uint32_t n = 1; n <= 8; n++) {
21708 GemmMicrokernelTester()
21709 .mr(1)
21710 .nr(8)
21711 .kr(1)
21712 .sr(1)
21713 .m(1)
21714 .n(n)
21715 .k(1)
21716 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021717 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021718 }
21719 }
21720
Frank Barchard0725b8d2020-12-07 11:07:35 -080021721 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021722 for (size_t k = 2; k < 10; k++) {
21723 GemmMicrokernelTester()
21724 .mr(1)
21725 .nr(8)
21726 .kr(1)
21727 .sr(1)
21728 .m(1)
21729 .n(8)
21730 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021731 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021732 }
21733 }
21734
Frank Barchard0725b8d2020-12-07 11:07:35 -080021735 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021736 for (size_t k = 2; k < 10; k++) {
21737 GemmMicrokernelTester()
21738 .mr(1)
21739 .nr(8)
21740 .kr(1)
21741 .sr(1)
21742 .m(1)
21743 .n(8)
21744 .k(k)
21745 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021746 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021747 }
21748 }
21749
Frank Barchard0725b8d2020-12-07 11:07:35 -080021750 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021751 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021752 for (uint32_t n = 1; n <= 8; n++) {
21753 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021754 GemmMicrokernelTester()
21755 .mr(1)
21756 .nr(8)
21757 .kr(1)
21758 .sr(1)
21759 .m(m)
21760 .n(n)
21761 .k(k)
21762 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021763 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021764 }
21765 }
21766 }
21767 }
21768
Frank Barchard0725b8d2020-12-07 11:07:35 -080021769 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021770 for (uint32_t n = 9; n < 16; n++) {
21771 for (size_t k = 1; k <= 5; k += 2) {
21772 GemmMicrokernelTester()
21773 .mr(1)
21774 .nr(8)
21775 .kr(1)
21776 .sr(1)
21777 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021778 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021779 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021780 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021781 }
21782 }
21783 }
21784
Frank Barchard0725b8d2020-12-07 11:07:35 -080021785 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021786 for (uint32_t n = 9; n < 16; n++) {
21787 for (size_t k = 1; k <= 5; k += 2) {
21788 GemmMicrokernelTester()
21789 .mr(1)
21790 .nr(8)
21791 .kr(1)
21792 .sr(1)
21793 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021794 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021795 .k(k)
21796 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021797 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021798 }
21799 }
21800 }
21801
Frank Barchard0725b8d2020-12-07 11:07:35 -080021802 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021803 for (uint32_t n = 9; n < 16; n++) {
21804 for (size_t k = 1; k <= 5; k += 2) {
21805 GemmMicrokernelTester()
21806 .mr(1)
21807 .nr(8)
21808 .kr(1)
21809 .sr(1)
21810 .m(1)
21811 .n(n)
21812 .k(k)
21813 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021814 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021815 }
21816 }
21817 }
21818
Frank Barchard0725b8d2020-12-07 11:07:35 -080021819 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021820 for (uint32_t n = 9; n < 16; n++) {
21821 for (size_t k = 1; k <= 5; k += 2) {
21822 for (uint32_t m = 1; m <= 1; m++) {
21823 GemmMicrokernelTester()
21824 .mr(1)
21825 .nr(8)
21826 .kr(1)
21827 .sr(1)
21828 .m(m)
21829 .n(n)
21830 .k(k)
21831 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021832 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021833 }
21834 }
21835 }
21836 }
21837
Frank Barchard0725b8d2020-12-07 11:07:35 -080021838 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021839 for (uint32_t n = 16; n <= 24; n += 8) {
21840 for (size_t k = 1; k <= 5; k += 2) {
21841 GemmMicrokernelTester()
21842 .mr(1)
21843 .nr(8)
21844 .kr(1)
21845 .sr(1)
21846 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021847 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021848 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021849 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021850 }
21851 }
21852 }
21853
Frank Barchard0725b8d2020-12-07 11:07:35 -080021854 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021855 for (uint32_t n = 16; n <= 24; n += 8) {
21856 for (size_t k = 1; k <= 5; k += 2) {
21857 GemmMicrokernelTester()
21858 .mr(1)
21859 .nr(8)
21860 .kr(1)
21861 .sr(1)
21862 .m(1)
21863 .n(n)
21864 .k(k)
21865 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021866 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021867 }
21868 }
21869 }
21870
Frank Barchard0725b8d2020-12-07 11:07:35 -080021871 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021872 for (uint32_t n = 16; n <= 24; n += 8) {
21873 for (size_t k = 1; k <= 5; k += 2) {
21874 GemmMicrokernelTester()
21875 .mr(1)
21876 .nr(8)
21877 .kr(1)
21878 .sr(1)
21879 .m(1)
21880 .n(n)
21881 .k(k)
21882 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021883 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021884 }
21885 }
21886 }
21887
Frank Barchard0725b8d2020-12-07 11:07:35 -080021888 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021889 for (uint32_t n = 16; n <= 24; n += 8) {
21890 for (size_t k = 1; k <= 5; k += 2) {
21891 for (uint32_t m = 1; m <= 1; m++) {
21892 GemmMicrokernelTester()
21893 .mr(1)
21894 .nr(8)
21895 .kr(1)
21896 .sr(1)
21897 .m(m)
21898 .n(n)
21899 .k(k)
21900 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021901 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021902 }
21903 }
21904 }
21905 }
21906
Frank Barchard0725b8d2020-12-07 11:07:35 -080021907 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021908 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021909 for (uint32_t n = 1; n <= 8; n++) {
21910 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021911 GemmMicrokernelTester()
21912 .mr(1)
21913 .nr(8)
21914 .kr(1)
21915 .sr(1)
21916 .m(m)
21917 .n(n)
21918 .k(k)
21919 .cm_stride(11)
21920 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021921 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021922 }
21923 }
21924 }
21925 }
21926
Frank Barchard0725b8d2020-12-07 11:07:35 -080021927 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021928 GemmMicrokernelTester()
21929 .mr(1)
21930 .nr(8)
21931 .kr(1)
21932 .sr(1)
21933 .m(1)
21934 .n(8)
21935 .k(1)
21936 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021937 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021938 }
21939
Frank Barchard0725b8d2020-12-07 11:07:35 -080021940 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021941 GemmMicrokernelTester()
21942 .mr(1)
21943 .nr(8)
21944 .kr(1)
21945 .sr(1)
21946 .m(1)
21947 .n(8)
21948 .k(1)
21949 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021950 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021951 }
21952
Frank Barchard0725b8d2020-12-07 11:07:35 -080021953 TEST(F32_GEMMINC_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021954 GemmMicrokernelTester()
21955 .mr(1)
21956 .nr(8)
21957 .kr(1)
21958 .sr(1)
21959 .m(1)
21960 .n(8)
21961 .k(1)
21962 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021963 .Test(xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021964 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021965#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021966
21967
Marat Dukhan4c617792021-12-21 15:47:58 -080021968#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080021969 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021970 GemmMicrokernelTester()
21971 .mr(3)
21972 .nr(8)
21973 .kr(1)
21974 .sr(1)
21975 .m(3)
21976 .n(8)
21977 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021978 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021979 }
21980
Frank Barchard0725b8d2020-12-07 11:07:35 -080021981 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021982 GemmMicrokernelTester()
21983 .mr(3)
21984 .nr(8)
21985 .kr(1)
21986 .sr(1)
21987 .m(3)
21988 .n(8)
21989 .k(1)
21990 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080021991 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021992 }
21993
Frank Barchard0725b8d2020-12-07 11:07:35 -080021994 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070021995 GemmMicrokernelTester()
21996 .mr(3)
21997 .nr(8)
21998 .kr(1)
21999 .sr(1)
22000 .m(3)
22001 .n(8)
22002 .k(1)
22003 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022004 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022005 }
22006
Frank Barchard0725b8d2020-12-07 11:07:35 -080022007 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022008 for (uint32_t n = 1; n <= 8; n++) {
22009 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022010 GemmMicrokernelTester()
22011 .mr(3)
22012 .nr(8)
22013 .kr(1)
22014 .sr(1)
22015 .m(m)
22016 .n(n)
22017 .k(1)
22018 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022019 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022020 }
22021 }
22022 }
22023
Frank Barchard0725b8d2020-12-07 11:07:35 -080022024 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022025 for (uint32_t m = 1; m <= 3; m++) {
22026 GemmMicrokernelTester()
22027 .mr(3)
22028 .nr(8)
22029 .kr(1)
22030 .sr(1)
22031 .m(m)
22032 .n(8)
22033 .k(1)
22034 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022035 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022036 }
22037 }
22038
Frank Barchard0725b8d2020-12-07 11:07:35 -080022039 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022040 for (uint32_t n = 1; n <= 8; n++) {
22041 GemmMicrokernelTester()
22042 .mr(3)
22043 .nr(8)
22044 .kr(1)
22045 .sr(1)
22046 .m(3)
22047 .n(n)
22048 .k(1)
22049 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022050 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022051 }
22052 }
22053
Frank Barchard0725b8d2020-12-07 11:07:35 -080022054 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022055 for (size_t k = 2; k < 10; k++) {
22056 GemmMicrokernelTester()
22057 .mr(3)
22058 .nr(8)
22059 .kr(1)
22060 .sr(1)
22061 .m(3)
22062 .n(8)
22063 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022064 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022065 }
22066 }
22067
Frank Barchard0725b8d2020-12-07 11:07:35 -080022068 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022069 for (size_t k = 2; k < 10; k++) {
22070 GemmMicrokernelTester()
22071 .mr(3)
22072 .nr(8)
22073 .kr(1)
22074 .sr(1)
22075 .m(3)
22076 .n(8)
22077 .k(k)
22078 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022079 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022080 }
22081 }
22082
Frank Barchard0725b8d2020-12-07 11:07:35 -080022083 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022084 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022085 for (uint32_t n = 1; n <= 8; n++) {
22086 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022087 GemmMicrokernelTester()
22088 .mr(3)
22089 .nr(8)
22090 .kr(1)
22091 .sr(1)
22092 .m(m)
22093 .n(n)
22094 .k(k)
22095 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022096 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022097 }
22098 }
22099 }
22100 }
22101
Frank Barchard0725b8d2020-12-07 11:07:35 -080022102 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022103 for (uint32_t n = 9; n < 16; n++) {
22104 for (size_t k = 1; k <= 5; k += 2) {
22105 GemmMicrokernelTester()
22106 .mr(3)
22107 .nr(8)
22108 .kr(1)
22109 .sr(1)
22110 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022111 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022112 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022113 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022114 }
22115 }
22116 }
22117
Frank Barchard0725b8d2020-12-07 11:07:35 -080022118 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022119 for (uint32_t n = 9; n < 16; n++) {
22120 for (size_t k = 1; k <= 5; k += 2) {
22121 GemmMicrokernelTester()
22122 .mr(3)
22123 .nr(8)
22124 .kr(1)
22125 .sr(1)
22126 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022127 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022128 .k(k)
22129 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022130 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022131 }
22132 }
22133 }
22134
Frank Barchard0725b8d2020-12-07 11:07:35 -080022135 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022136 for (uint32_t n = 9; n < 16; n++) {
22137 for (size_t k = 1; k <= 5; k += 2) {
22138 GemmMicrokernelTester()
22139 .mr(3)
22140 .nr(8)
22141 .kr(1)
22142 .sr(1)
22143 .m(3)
22144 .n(n)
22145 .k(k)
22146 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022147 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022148 }
22149 }
22150 }
22151
Frank Barchard0725b8d2020-12-07 11:07:35 -080022152 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022153 for (uint32_t n = 9; n < 16; n++) {
22154 for (size_t k = 1; k <= 5; k += 2) {
22155 for (uint32_t m = 1; m <= 3; m++) {
22156 GemmMicrokernelTester()
22157 .mr(3)
22158 .nr(8)
22159 .kr(1)
22160 .sr(1)
22161 .m(m)
22162 .n(n)
22163 .k(k)
22164 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022165 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022166 }
22167 }
22168 }
22169 }
22170
Frank Barchard0725b8d2020-12-07 11:07:35 -080022171 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022172 for (uint32_t n = 16; n <= 24; n += 8) {
22173 for (size_t k = 1; k <= 5; k += 2) {
22174 GemmMicrokernelTester()
22175 .mr(3)
22176 .nr(8)
22177 .kr(1)
22178 .sr(1)
22179 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022180 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022181 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022182 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022183 }
22184 }
22185 }
22186
Frank Barchard0725b8d2020-12-07 11:07:35 -080022187 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022188 for (uint32_t n = 16; n <= 24; n += 8) {
22189 for (size_t k = 1; k <= 5; k += 2) {
22190 GemmMicrokernelTester()
22191 .mr(3)
22192 .nr(8)
22193 .kr(1)
22194 .sr(1)
22195 .m(3)
22196 .n(n)
22197 .k(k)
22198 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022199 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022200 }
22201 }
22202 }
22203
Frank Barchard0725b8d2020-12-07 11:07:35 -080022204 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022205 for (uint32_t n = 16; n <= 24; n += 8) {
22206 for (size_t k = 1; k <= 5; k += 2) {
22207 GemmMicrokernelTester()
22208 .mr(3)
22209 .nr(8)
22210 .kr(1)
22211 .sr(1)
22212 .m(3)
22213 .n(n)
22214 .k(k)
22215 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022216 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022217 }
22218 }
22219 }
22220
Frank Barchard0725b8d2020-12-07 11:07:35 -080022221 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022222 for (uint32_t n = 16; n <= 24; n += 8) {
22223 for (size_t k = 1; k <= 5; k += 2) {
22224 for (uint32_t m = 1; m <= 3; m++) {
22225 GemmMicrokernelTester()
22226 .mr(3)
22227 .nr(8)
22228 .kr(1)
22229 .sr(1)
22230 .m(m)
22231 .n(n)
22232 .k(k)
22233 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022234 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022235 }
22236 }
22237 }
22238 }
22239
Frank Barchard0725b8d2020-12-07 11:07:35 -080022240 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022241 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022242 for (uint32_t n = 1; n <= 8; n++) {
22243 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022244 GemmMicrokernelTester()
22245 .mr(3)
22246 .nr(8)
22247 .kr(1)
22248 .sr(1)
22249 .m(m)
22250 .n(n)
22251 .k(k)
22252 .cm_stride(11)
22253 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022254 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022255 }
22256 }
22257 }
22258 }
22259
Frank Barchard0725b8d2020-12-07 11:07:35 -080022260 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022261 GemmMicrokernelTester()
22262 .mr(3)
22263 .nr(8)
22264 .kr(1)
22265 .sr(1)
22266 .m(3)
22267 .n(8)
22268 .k(1)
22269 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022270 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022271 }
22272
Frank Barchard0725b8d2020-12-07 11:07:35 -080022273 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022274 GemmMicrokernelTester()
22275 .mr(3)
22276 .nr(8)
22277 .kr(1)
22278 .sr(1)
22279 .m(3)
22280 .n(8)
22281 .k(1)
22282 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022283 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022284 }
22285
Frank Barchard0725b8d2020-12-07 11:07:35 -080022286 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022287 GemmMicrokernelTester()
22288 .mr(3)
22289 .nr(8)
22290 .kr(1)
22291 .sr(1)
22292 .m(3)
22293 .n(8)
22294 .k(1)
22295 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022296 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022297 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022298#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022299
22300
Marat Dukhan4c617792021-12-21 15:47:58 -080022301#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022302 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022303 GemmMicrokernelTester()
22304 .mr(4)
22305 .nr(8)
22306 .kr(1)
22307 .sr(1)
22308 .m(4)
22309 .n(8)
22310 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022311 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022312 }
22313
Frank Barchard0725b8d2020-12-07 11:07:35 -080022314 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022315 GemmMicrokernelTester()
22316 .mr(4)
22317 .nr(8)
22318 .kr(1)
22319 .sr(1)
22320 .m(4)
22321 .n(8)
22322 .k(1)
22323 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022324 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022325 }
22326
Frank Barchard0725b8d2020-12-07 11:07:35 -080022327 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022328 GemmMicrokernelTester()
22329 .mr(4)
22330 .nr(8)
22331 .kr(1)
22332 .sr(1)
22333 .m(4)
22334 .n(8)
22335 .k(1)
22336 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022337 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022338 }
22339
Frank Barchard0725b8d2020-12-07 11:07:35 -080022340 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022341 for (uint32_t n = 1; n <= 8; n++) {
22342 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022343 GemmMicrokernelTester()
22344 .mr(4)
22345 .nr(8)
22346 .kr(1)
22347 .sr(1)
22348 .m(m)
22349 .n(n)
22350 .k(1)
22351 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022352 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022353 }
22354 }
22355 }
22356
Frank Barchard0725b8d2020-12-07 11:07:35 -080022357 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022358 for (uint32_t m = 1; m <= 4; m++) {
22359 GemmMicrokernelTester()
22360 .mr(4)
22361 .nr(8)
22362 .kr(1)
22363 .sr(1)
22364 .m(m)
22365 .n(8)
22366 .k(1)
22367 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022368 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022369 }
22370 }
22371
Frank Barchard0725b8d2020-12-07 11:07:35 -080022372 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022373 for (uint32_t n = 1; n <= 8; n++) {
22374 GemmMicrokernelTester()
22375 .mr(4)
22376 .nr(8)
22377 .kr(1)
22378 .sr(1)
22379 .m(4)
22380 .n(n)
22381 .k(1)
22382 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022383 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022384 }
22385 }
22386
Frank Barchard0725b8d2020-12-07 11:07:35 -080022387 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022388 for (size_t k = 2; k < 10; k++) {
22389 GemmMicrokernelTester()
22390 .mr(4)
22391 .nr(8)
22392 .kr(1)
22393 .sr(1)
22394 .m(4)
22395 .n(8)
22396 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022397 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022398 }
22399 }
22400
Frank Barchard0725b8d2020-12-07 11:07:35 -080022401 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022402 for (size_t k = 2; k < 10; k++) {
22403 GemmMicrokernelTester()
22404 .mr(4)
22405 .nr(8)
22406 .kr(1)
22407 .sr(1)
22408 .m(4)
22409 .n(8)
22410 .k(k)
22411 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022412 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022413 }
22414 }
22415
Frank Barchard0725b8d2020-12-07 11:07:35 -080022416 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022417 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022418 for (uint32_t n = 1; n <= 8; n++) {
22419 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022420 GemmMicrokernelTester()
22421 .mr(4)
22422 .nr(8)
22423 .kr(1)
22424 .sr(1)
22425 .m(m)
22426 .n(n)
22427 .k(k)
22428 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022429 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022430 }
22431 }
22432 }
22433 }
22434
Frank Barchard0725b8d2020-12-07 11:07:35 -080022435 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022436 for (uint32_t n = 9; n < 16; n++) {
22437 for (size_t k = 1; k <= 5; k += 2) {
22438 GemmMicrokernelTester()
22439 .mr(4)
22440 .nr(8)
22441 .kr(1)
22442 .sr(1)
22443 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022444 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022445 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022446 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022447 }
22448 }
22449 }
22450
Frank Barchard0725b8d2020-12-07 11:07:35 -080022451 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022452 for (uint32_t n = 9; n < 16; n++) {
22453 for (size_t k = 1; k <= 5; k += 2) {
22454 GemmMicrokernelTester()
22455 .mr(4)
22456 .nr(8)
22457 .kr(1)
22458 .sr(1)
22459 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022460 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022461 .k(k)
22462 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022463 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022464 }
22465 }
22466 }
22467
Frank Barchard0725b8d2020-12-07 11:07:35 -080022468 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022469 for (uint32_t n = 9; n < 16; n++) {
22470 for (size_t k = 1; k <= 5; k += 2) {
22471 GemmMicrokernelTester()
22472 .mr(4)
22473 .nr(8)
22474 .kr(1)
22475 .sr(1)
22476 .m(4)
22477 .n(n)
22478 .k(k)
22479 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022480 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022481 }
22482 }
22483 }
22484
Frank Barchard0725b8d2020-12-07 11:07:35 -080022485 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022486 for (uint32_t n = 9; n < 16; n++) {
22487 for (size_t k = 1; k <= 5; k += 2) {
22488 for (uint32_t m = 1; m <= 4; m++) {
22489 GemmMicrokernelTester()
22490 .mr(4)
22491 .nr(8)
22492 .kr(1)
22493 .sr(1)
22494 .m(m)
22495 .n(n)
22496 .k(k)
22497 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022498 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022499 }
22500 }
22501 }
22502 }
22503
Frank Barchard0725b8d2020-12-07 11:07:35 -080022504 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022505 for (uint32_t n = 16; n <= 24; n += 8) {
22506 for (size_t k = 1; k <= 5; k += 2) {
22507 GemmMicrokernelTester()
22508 .mr(4)
22509 .nr(8)
22510 .kr(1)
22511 .sr(1)
22512 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022513 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022514 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022515 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022516 }
22517 }
22518 }
22519
Frank Barchard0725b8d2020-12-07 11:07:35 -080022520 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022521 for (uint32_t n = 16; n <= 24; n += 8) {
22522 for (size_t k = 1; k <= 5; k += 2) {
22523 GemmMicrokernelTester()
22524 .mr(4)
22525 .nr(8)
22526 .kr(1)
22527 .sr(1)
22528 .m(4)
22529 .n(n)
22530 .k(k)
22531 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022532 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022533 }
22534 }
22535 }
22536
Frank Barchard0725b8d2020-12-07 11:07:35 -080022537 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022538 for (uint32_t n = 16; n <= 24; n += 8) {
22539 for (size_t k = 1; k <= 5; k += 2) {
22540 GemmMicrokernelTester()
22541 .mr(4)
22542 .nr(8)
22543 .kr(1)
22544 .sr(1)
22545 .m(4)
22546 .n(n)
22547 .k(k)
22548 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022549 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022550 }
22551 }
22552 }
22553
Frank Barchard0725b8d2020-12-07 11:07:35 -080022554 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022555 for (uint32_t n = 16; n <= 24; n += 8) {
22556 for (size_t k = 1; k <= 5; k += 2) {
22557 for (uint32_t m = 1; m <= 4; m++) {
22558 GemmMicrokernelTester()
22559 .mr(4)
22560 .nr(8)
22561 .kr(1)
22562 .sr(1)
22563 .m(m)
22564 .n(n)
22565 .k(k)
22566 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022567 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022568 }
22569 }
22570 }
22571 }
22572
Frank Barchard0725b8d2020-12-07 11:07:35 -080022573 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022574 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022575 for (uint32_t n = 1; n <= 8; n++) {
22576 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022577 GemmMicrokernelTester()
22578 .mr(4)
22579 .nr(8)
22580 .kr(1)
22581 .sr(1)
22582 .m(m)
22583 .n(n)
22584 .k(k)
22585 .cm_stride(11)
22586 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022587 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022588 }
22589 }
22590 }
22591 }
22592
Frank Barchard0725b8d2020-12-07 11:07:35 -080022593 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022594 GemmMicrokernelTester()
22595 .mr(4)
22596 .nr(8)
22597 .kr(1)
22598 .sr(1)
22599 .m(4)
22600 .n(8)
22601 .k(1)
22602 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022603 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022604 }
22605
Frank Barchard0725b8d2020-12-07 11:07:35 -080022606 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022607 GemmMicrokernelTester()
22608 .mr(4)
22609 .nr(8)
22610 .kr(1)
22611 .sr(1)
22612 .m(4)
22613 .n(8)
22614 .k(1)
22615 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022616 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022617 }
22618
Frank Barchard0725b8d2020-12-07 11:07:35 -080022619 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022620 GemmMicrokernelTester()
22621 .mr(4)
22622 .nr(8)
22623 .kr(1)
22624 .sr(1)
22625 .m(4)
22626 .n(8)
22627 .k(1)
22628 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022629 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022630 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022631#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022632
22633
Marat Dukhan4c617792021-12-21 15:47:58 -080022634#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022635 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022636 GemmMicrokernelTester()
22637 .mr(6)
22638 .nr(8)
22639 .kr(1)
22640 .sr(1)
22641 .m(6)
22642 .n(8)
22643 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022644 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022645 }
22646
Frank Barchard0725b8d2020-12-07 11:07:35 -080022647 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022648 GemmMicrokernelTester()
22649 .mr(6)
22650 .nr(8)
22651 .kr(1)
22652 .sr(1)
22653 .m(6)
22654 .n(8)
22655 .k(1)
22656 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022657 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022658 }
22659
Frank Barchard0725b8d2020-12-07 11:07:35 -080022660 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022661 GemmMicrokernelTester()
22662 .mr(6)
22663 .nr(8)
22664 .kr(1)
22665 .sr(1)
22666 .m(6)
22667 .n(8)
22668 .k(1)
22669 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022670 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022671 }
22672
Frank Barchard0725b8d2020-12-07 11:07:35 -080022673 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022674 for (uint32_t n = 1; n <= 8; n++) {
22675 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022676 GemmMicrokernelTester()
22677 .mr(6)
22678 .nr(8)
22679 .kr(1)
22680 .sr(1)
22681 .m(m)
22682 .n(n)
22683 .k(1)
22684 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022685 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022686 }
22687 }
22688 }
22689
Frank Barchard0725b8d2020-12-07 11:07:35 -080022690 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022691 for (uint32_t m = 1; m <= 6; m++) {
22692 GemmMicrokernelTester()
22693 .mr(6)
22694 .nr(8)
22695 .kr(1)
22696 .sr(1)
22697 .m(m)
22698 .n(8)
22699 .k(1)
22700 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022701 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022702 }
22703 }
22704
Frank Barchard0725b8d2020-12-07 11:07:35 -080022705 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022706 for (uint32_t n = 1; n <= 8; n++) {
22707 GemmMicrokernelTester()
22708 .mr(6)
22709 .nr(8)
22710 .kr(1)
22711 .sr(1)
22712 .m(6)
22713 .n(n)
22714 .k(1)
22715 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022716 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022717 }
22718 }
22719
Frank Barchard0725b8d2020-12-07 11:07:35 -080022720 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022721 for (size_t k = 2; k < 10; k++) {
22722 GemmMicrokernelTester()
22723 .mr(6)
22724 .nr(8)
22725 .kr(1)
22726 .sr(1)
22727 .m(6)
22728 .n(8)
22729 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022730 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022731 }
22732 }
22733
Frank Barchard0725b8d2020-12-07 11:07:35 -080022734 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022735 for (size_t k = 2; k < 10; k++) {
22736 GemmMicrokernelTester()
22737 .mr(6)
22738 .nr(8)
22739 .kr(1)
22740 .sr(1)
22741 .m(6)
22742 .n(8)
22743 .k(k)
22744 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022745 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022746 }
22747 }
22748
Frank Barchard0725b8d2020-12-07 11:07:35 -080022749 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022750 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022751 for (uint32_t n = 1; n <= 8; n++) {
22752 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022753 GemmMicrokernelTester()
22754 .mr(6)
22755 .nr(8)
22756 .kr(1)
22757 .sr(1)
22758 .m(m)
22759 .n(n)
22760 .k(k)
22761 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022762 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022763 }
22764 }
22765 }
22766 }
22767
Frank Barchard0725b8d2020-12-07 11:07:35 -080022768 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022769 for (uint32_t n = 9; n < 16; n++) {
22770 for (size_t k = 1; k <= 5; k += 2) {
22771 GemmMicrokernelTester()
22772 .mr(6)
22773 .nr(8)
22774 .kr(1)
22775 .sr(1)
22776 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022777 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022778 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022779 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022780 }
22781 }
22782 }
22783
Frank Barchard0725b8d2020-12-07 11:07:35 -080022784 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022785 for (uint32_t n = 9; n < 16; n++) {
22786 for (size_t k = 1; k <= 5; k += 2) {
22787 GemmMicrokernelTester()
22788 .mr(6)
22789 .nr(8)
22790 .kr(1)
22791 .sr(1)
22792 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022793 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022794 .k(k)
22795 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022796 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022797 }
22798 }
22799 }
22800
Frank Barchard0725b8d2020-12-07 11:07:35 -080022801 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022802 for (uint32_t n = 9; n < 16; n++) {
22803 for (size_t k = 1; k <= 5; k += 2) {
22804 GemmMicrokernelTester()
22805 .mr(6)
22806 .nr(8)
22807 .kr(1)
22808 .sr(1)
22809 .m(6)
22810 .n(n)
22811 .k(k)
22812 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022813 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022814 }
22815 }
22816 }
22817
Frank Barchard0725b8d2020-12-07 11:07:35 -080022818 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022819 for (uint32_t n = 9; n < 16; n++) {
22820 for (size_t k = 1; k <= 5; k += 2) {
22821 for (uint32_t m = 1; m <= 6; m++) {
22822 GemmMicrokernelTester()
22823 .mr(6)
22824 .nr(8)
22825 .kr(1)
22826 .sr(1)
22827 .m(m)
22828 .n(n)
22829 .k(k)
22830 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022831 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022832 }
22833 }
22834 }
22835 }
22836
Frank Barchard0725b8d2020-12-07 11:07:35 -080022837 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022838 for (uint32_t n = 16; n <= 24; n += 8) {
22839 for (size_t k = 1; k <= 5; k += 2) {
22840 GemmMicrokernelTester()
22841 .mr(6)
22842 .nr(8)
22843 .kr(1)
22844 .sr(1)
22845 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022846 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022847 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022848 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022849 }
22850 }
22851 }
22852
Frank Barchard0725b8d2020-12-07 11:07:35 -080022853 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022854 for (uint32_t n = 16; n <= 24; n += 8) {
22855 for (size_t k = 1; k <= 5; k += 2) {
22856 GemmMicrokernelTester()
22857 .mr(6)
22858 .nr(8)
22859 .kr(1)
22860 .sr(1)
22861 .m(6)
22862 .n(n)
22863 .k(k)
22864 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022865 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022866 }
22867 }
22868 }
22869
Frank Barchard0725b8d2020-12-07 11:07:35 -080022870 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022871 for (uint32_t n = 16; n <= 24; n += 8) {
22872 for (size_t k = 1; k <= 5; k += 2) {
22873 GemmMicrokernelTester()
22874 .mr(6)
22875 .nr(8)
22876 .kr(1)
22877 .sr(1)
22878 .m(6)
22879 .n(n)
22880 .k(k)
22881 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022882 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022883 }
22884 }
22885 }
22886
Frank Barchard0725b8d2020-12-07 11:07:35 -080022887 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022888 for (uint32_t n = 16; n <= 24; n += 8) {
22889 for (size_t k = 1; k <= 5; k += 2) {
22890 for (uint32_t m = 1; m <= 6; m++) {
22891 GemmMicrokernelTester()
22892 .mr(6)
22893 .nr(8)
22894 .kr(1)
22895 .sr(1)
22896 .m(m)
22897 .n(n)
22898 .k(k)
22899 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022900 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022901 }
22902 }
22903 }
22904 }
22905
Frank Barchard0725b8d2020-12-07 11:07:35 -080022906 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022907 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022908 for (uint32_t n = 1; n <= 8; n++) {
22909 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022910 GemmMicrokernelTester()
22911 .mr(6)
22912 .nr(8)
22913 .kr(1)
22914 .sr(1)
22915 .m(m)
22916 .n(n)
22917 .k(k)
22918 .cm_stride(11)
22919 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022920 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022921 }
22922 }
22923 }
22924 }
22925
Frank Barchard0725b8d2020-12-07 11:07:35 -080022926 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022927 GemmMicrokernelTester()
22928 .mr(6)
22929 .nr(8)
22930 .kr(1)
22931 .sr(1)
22932 .m(6)
22933 .n(8)
22934 .k(1)
22935 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022936 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022937 }
22938
Frank Barchard0725b8d2020-12-07 11:07:35 -080022939 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022940 GemmMicrokernelTester()
22941 .mr(6)
22942 .nr(8)
22943 .kr(1)
22944 .sr(1)
22945 .m(6)
22946 .n(8)
22947 .k(1)
22948 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022949 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022950 }
22951
Frank Barchard0725b8d2020-12-07 11:07:35 -080022952 TEST(F32_GEMMINC_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022953 GemmMicrokernelTester()
22954 .mr(6)
22955 .nr(8)
22956 .kr(1)
22957 .sr(1)
22958 .m(6)
22959 .n(8)
22960 .k(1)
22961 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022962 .Test(xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022963 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022964#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022965
22966
Marat Dukhan4c617792021-12-21 15:47:58 -080022967#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022968 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022969 GemmMicrokernelTester()
22970 .mr(3)
22971 .nr(8)
22972 .kr(1)
22973 .sr(1)
22974 .m(3)
22975 .n(8)
22976 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022977 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022978 }
22979
Frank Barchard0725b8d2020-12-07 11:07:35 -080022980 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022981 GemmMicrokernelTester()
22982 .mr(3)
22983 .nr(8)
22984 .kr(1)
22985 .sr(1)
22986 .m(3)
22987 .n(8)
22988 .k(4)
22989 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022990 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022991 }
22992
Frank Barchard0725b8d2020-12-07 11:07:35 -080022993 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022994 GemmMicrokernelTester()
22995 .mr(3)
22996 .nr(8)
22997 .kr(1)
22998 .sr(1)
22999 .m(3)
23000 .n(8)
23001 .k(4)
23002 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023003 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023004 }
23005
Frank Barchard0725b8d2020-12-07 11:07:35 -080023006 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023007 for (uint32_t n = 1; n <= 8; n++) {
23008 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023009 GemmMicrokernelTester()
23010 .mr(3)
23011 .nr(8)
23012 .kr(1)
23013 .sr(1)
23014 .m(m)
23015 .n(n)
23016 .k(4)
23017 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023018 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023019 }
23020 }
23021 }
23022
Frank Barchard0725b8d2020-12-07 11:07:35 -080023023 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023024 for (uint32_t m = 1; m <= 3; m++) {
23025 GemmMicrokernelTester()
23026 .mr(3)
23027 .nr(8)
23028 .kr(1)
23029 .sr(1)
23030 .m(m)
23031 .n(8)
23032 .k(4)
23033 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023034 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023035 }
23036 }
23037
Frank Barchard0725b8d2020-12-07 11:07:35 -080023038 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023039 for (uint32_t n = 1; n <= 8; n++) {
23040 GemmMicrokernelTester()
23041 .mr(3)
23042 .nr(8)
23043 .kr(1)
23044 .sr(1)
23045 .m(3)
23046 .n(n)
23047 .k(4)
23048 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023049 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023050 }
23051 }
23052
Frank Barchard0725b8d2020-12-07 11:07:35 -080023053 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023054 for (size_t k = 1; k < 4; k++) {
23055 GemmMicrokernelTester()
23056 .mr(3)
23057 .nr(8)
23058 .kr(1)
23059 .sr(1)
23060 .m(3)
23061 .n(8)
23062 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023063 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023064 }
23065 }
23066
Frank Barchard0725b8d2020-12-07 11:07:35 -080023067 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023068 for (size_t k = 1; k < 4; k++) {
23069 GemmMicrokernelTester()
23070 .mr(3)
23071 .nr(8)
23072 .kr(1)
23073 .sr(1)
23074 .m(3)
23075 .n(8)
23076 .k(k)
23077 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023078 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023079 }
23080 }
23081
Frank Barchard0725b8d2020-12-07 11:07:35 -080023082 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023083 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023084 for (uint32_t n = 1; n <= 8; n++) {
23085 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023086 GemmMicrokernelTester()
23087 .mr(3)
23088 .nr(8)
23089 .kr(1)
23090 .sr(1)
23091 .m(m)
23092 .n(n)
23093 .k(k)
23094 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023095 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023096 }
23097 }
23098 }
23099 }
23100
Frank Barchard0725b8d2020-12-07 11:07:35 -080023101 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023102 for (size_t k = 5; k < 8; k++) {
23103 GemmMicrokernelTester()
23104 .mr(3)
23105 .nr(8)
23106 .kr(1)
23107 .sr(1)
23108 .m(3)
23109 .n(8)
23110 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023111 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023112 }
23113 }
23114
Frank Barchard0725b8d2020-12-07 11:07:35 -080023115 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023116 for (size_t k = 5; k < 8; k++) {
23117 GemmMicrokernelTester()
23118 .mr(3)
23119 .nr(8)
23120 .kr(1)
23121 .sr(1)
23122 .m(3)
23123 .n(8)
23124 .k(k)
23125 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023126 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023127 }
23128 }
23129
Frank Barchard0725b8d2020-12-07 11:07:35 -080023130 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023131 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023132 for (uint32_t n = 1; n <= 8; n++) {
23133 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023134 GemmMicrokernelTester()
23135 .mr(3)
23136 .nr(8)
23137 .kr(1)
23138 .sr(1)
23139 .m(m)
23140 .n(n)
23141 .k(k)
23142 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023143 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023144 }
23145 }
23146 }
23147 }
23148
Frank Barchard0725b8d2020-12-07 11:07:35 -080023149 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023150 for (size_t k = 8; k <= 40; k += 4) {
23151 GemmMicrokernelTester()
23152 .mr(3)
23153 .nr(8)
23154 .kr(1)
23155 .sr(1)
23156 .m(3)
23157 .n(8)
23158 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023159 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023160 }
23161 }
23162
Frank Barchard0725b8d2020-12-07 11:07:35 -080023163 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023164 for (size_t k = 8; k <= 40; k += 4) {
23165 GemmMicrokernelTester()
23166 .mr(3)
23167 .nr(8)
23168 .kr(1)
23169 .sr(1)
23170 .m(3)
23171 .n(8)
23172 .k(k)
23173 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023174 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023175 }
23176 }
23177
Frank Barchard0725b8d2020-12-07 11:07:35 -080023178 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023179 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023180 for (uint32_t n = 1; n <= 8; n++) {
23181 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023182 GemmMicrokernelTester()
23183 .mr(3)
23184 .nr(8)
23185 .kr(1)
23186 .sr(1)
23187 .m(m)
23188 .n(n)
23189 .k(k)
23190 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023191 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023192 }
23193 }
23194 }
23195 }
23196
Frank Barchard0725b8d2020-12-07 11:07:35 -080023197 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023198 for (uint32_t n = 9; n < 16; n++) {
23199 for (size_t k = 1; k <= 20; k += 5) {
23200 GemmMicrokernelTester()
23201 .mr(3)
23202 .nr(8)
23203 .kr(1)
23204 .sr(1)
23205 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023206 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023207 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023208 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023209 }
23210 }
23211 }
23212
Frank Barchard0725b8d2020-12-07 11:07:35 -080023213 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023214 for (uint32_t n = 9; n < 16; n++) {
23215 for (size_t k = 1; k <= 20; k += 5) {
23216 GemmMicrokernelTester()
23217 .mr(3)
23218 .nr(8)
23219 .kr(1)
23220 .sr(1)
23221 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023222 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023223 .k(k)
23224 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023225 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023226 }
23227 }
23228 }
23229
Frank Barchard0725b8d2020-12-07 11:07:35 -080023230 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023231 for (uint32_t n = 9; n < 16; n++) {
23232 for (size_t k = 1; k <= 20; k += 5) {
23233 GemmMicrokernelTester()
23234 .mr(3)
23235 .nr(8)
23236 .kr(1)
23237 .sr(1)
23238 .m(3)
23239 .n(n)
23240 .k(k)
23241 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023242 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023243 }
23244 }
23245 }
23246
Frank Barchard0725b8d2020-12-07 11:07:35 -080023247 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023248 for (uint32_t n = 9; n < 16; n++) {
23249 for (size_t k = 1; k <= 20; k += 5) {
23250 for (uint32_t m = 1; m <= 3; m++) {
23251 GemmMicrokernelTester()
23252 .mr(3)
23253 .nr(8)
23254 .kr(1)
23255 .sr(1)
23256 .m(m)
23257 .n(n)
23258 .k(k)
23259 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023260 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023261 }
23262 }
23263 }
23264 }
23265
Frank Barchard0725b8d2020-12-07 11:07:35 -080023266 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023267 for (uint32_t n = 16; n <= 24; n += 8) {
23268 for (size_t k = 1; k <= 20; k += 5) {
23269 GemmMicrokernelTester()
23270 .mr(3)
23271 .nr(8)
23272 .kr(1)
23273 .sr(1)
23274 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023275 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023276 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023277 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023278 }
23279 }
23280 }
23281
Frank Barchard0725b8d2020-12-07 11:07:35 -080023282 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023283 for (uint32_t n = 16; n <= 24; n += 8) {
23284 for (size_t k = 1; k <= 20; k += 5) {
23285 GemmMicrokernelTester()
23286 .mr(3)
23287 .nr(8)
23288 .kr(1)
23289 .sr(1)
23290 .m(3)
23291 .n(n)
23292 .k(k)
23293 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023294 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023295 }
23296 }
23297 }
23298
Frank Barchard0725b8d2020-12-07 11:07:35 -080023299 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023300 for (uint32_t n = 16; n <= 24; n += 8) {
23301 for (size_t k = 1; k <= 20; k += 5) {
23302 GemmMicrokernelTester()
23303 .mr(3)
23304 .nr(8)
23305 .kr(1)
23306 .sr(1)
23307 .m(3)
23308 .n(n)
23309 .k(k)
23310 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023311 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023312 }
23313 }
23314 }
23315
Frank Barchard0725b8d2020-12-07 11:07:35 -080023316 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023317 for (uint32_t n = 16; n <= 24; n += 8) {
23318 for (size_t k = 1; k <= 20; k += 5) {
23319 for (uint32_t m = 1; m <= 3; m++) {
23320 GemmMicrokernelTester()
23321 .mr(3)
23322 .nr(8)
23323 .kr(1)
23324 .sr(1)
23325 .m(m)
23326 .n(n)
23327 .k(k)
23328 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023329 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023330 }
23331 }
23332 }
23333 }
23334
Frank Barchard0725b8d2020-12-07 11:07:35 -080023335 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023336 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023337 for (uint32_t n = 1; n <= 8; n++) {
23338 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023339 GemmMicrokernelTester()
23340 .mr(3)
23341 .nr(8)
23342 .kr(1)
23343 .sr(1)
23344 .m(m)
23345 .n(n)
23346 .k(k)
23347 .cm_stride(11)
23348 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023349 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023350 }
23351 }
23352 }
23353 }
23354
Frank Barchard0725b8d2020-12-07 11:07:35 -080023355 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023356 GemmMicrokernelTester()
23357 .mr(3)
23358 .nr(8)
23359 .kr(1)
23360 .sr(1)
23361 .m(3)
23362 .n(8)
23363 .k(4)
23364 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023365 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023366 }
23367
Frank Barchard0725b8d2020-12-07 11:07:35 -080023368 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023369 GemmMicrokernelTester()
23370 .mr(3)
23371 .nr(8)
23372 .kr(1)
23373 .sr(1)
23374 .m(3)
23375 .n(8)
23376 .k(4)
23377 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023378 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023379 }
23380
Frank Barchard0725b8d2020-12-07 11:07:35 -080023381 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023382 GemmMicrokernelTester()
23383 .mr(3)
23384 .nr(8)
23385 .kr(1)
23386 .sr(1)
23387 .m(3)
23388 .n(8)
23389 .k(4)
23390 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023391 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023392 }
Marat Dukhan4c617792021-12-21 15:47:58 -080023393#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023394
23395
Marat Dukhan4c617792021-12-21 15:47:58 -080023396#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080023397 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023398 GemmMicrokernelTester()
23399 .mr(4)
23400 .nr(8)
23401 .kr(1)
23402 .sr(1)
23403 .m(4)
23404 .n(8)
23405 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023406 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023407 }
23408
Frank Barchard0725b8d2020-12-07 11:07:35 -080023409 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023410 GemmMicrokernelTester()
23411 .mr(4)
23412 .nr(8)
23413 .kr(1)
23414 .sr(1)
23415 .m(4)
23416 .n(8)
23417 .k(4)
23418 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023419 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023420 }
23421
Frank Barchard0725b8d2020-12-07 11:07:35 -080023422 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023423 GemmMicrokernelTester()
23424 .mr(4)
23425 .nr(8)
23426 .kr(1)
23427 .sr(1)
23428 .m(4)
23429 .n(8)
23430 .k(4)
23431 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023432 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023433 }
23434
Frank Barchard0725b8d2020-12-07 11:07:35 -080023435 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023436 for (uint32_t n = 1; n <= 8; n++) {
23437 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023438 GemmMicrokernelTester()
23439 .mr(4)
23440 .nr(8)
23441 .kr(1)
23442 .sr(1)
23443 .m(m)
23444 .n(n)
23445 .k(4)
23446 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023447 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023448 }
23449 }
23450 }
23451
Frank Barchard0725b8d2020-12-07 11:07:35 -080023452 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023453 for (uint32_t m = 1; m <= 4; m++) {
23454 GemmMicrokernelTester()
23455 .mr(4)
23456 .nr(8)
23457 .kr(1)
23458 .sr(1)
23459 .m(m)
23460 .n(8)
23461 .k(4)
23462 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023463 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023464 }
23465 }
23466
Frank Barchard0725b8d2020-12-07 11:07:35 -080023467 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023468 for (uint32_t n = 1; n <= 8; n++) {
23469 GemmMicrokernelTester()
23470 .mr(4)
23471 .nr(8)
23472 .kr(1)
23473 .sr(1)
23474 .m(4)
23475 .n(n)
23476 .k(4)
23477 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023478 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023479 }
23480 }
23481
Frank Barchard0725b8d2020-12-07 11:07:35 -080023482 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023483 for (size_t k = 1; k < 4; k++) {
23484 GemmMicrokernelTester()
23485 .mr(4)
23486 .nr(8)
23487 .kr(1)
23488 .sr(1)
23489 .m(4)
23490 .n(8)
23491 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023492 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023493 }
23494 }
23495
Frank Barchard0725b8d2020-12-07 11:07:35 -080023496 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023497 for (size_t k = 1; k < 4; k++) {
23498 GemmMicrokernelTester()
23499 .mr(4)
23500 .nr(8)
23501 .kr(1)
23502 .sr(1)
23503 .m(4)
23504 .n(8)
23505 .k(k)
23506 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023507 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023508 }
23509 }
23510
Frank Barchard0725b8d2020-12-07 11:07:35 -080023511 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023512 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023513 for (uint32_t n = 1; n <= 8; n++) {
23514 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023515 GemmMicrokernelTester()
23516 .mr(4)
23517 .nr(8)
23518 .kr(1)
23519 .sr(1)
23520 .m(m)
23521 .n(n)
23522 .k(k)
23523 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023524 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023525 }
23526 }
23527 }
23528 }
23529
Frank Barchard0725b8d2020-12-07 11:07:35 -080023530 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023531 for (size_t k = 5; k < 8; k++) {
23532 GemmMicrokernelTester()
23533 .mr(4)
23534 .nr(8)
23535 .kr(1)
23536 .sr(1)
23537 .m(4)
23538 .n(8)
23539 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023540 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023541 }
23542 }
23543
Frank Barchard0725b8d2020-12-07 11:07:35 -080023544 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023545 for (size_t k = 5; k < 8; k++) {
23546 GemmMicrokernelTester()
23547 .mr(4)
23548 .nr(8)
23549 .kr(1)
23550 .sr(1)
23551 .m(4)
23552 .n(8)
23553 .k(k)
23554 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023555 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023556 }
23557 }
23558
Frank Barchard0725b8d2020-12-07 11:07:35 -080023559 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023560 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023561 for (uint32_t n = 1; n <= 8; n++) {
23562 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023563 GemmMicrokernelTester()
23564 .mr(4)
23565 .nr(8)
23566 .kr(1)
23567 .sr(1)
23568 .m(m)
23569 .n(n)
23570 .k(k)
23571 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023572 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023573 }
23574 }
23575 }
23576 }
23577
Frank Barchard0725b8d2020-12-07 11:07:35 -080023578 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023579 for (size_t k = 8; k <= 40; k += 4) {
23580 GemmMicrokernelTester()
23581 .mr(4)
23582 .nr(8)
23583 .kr(1)
23584 .sr(1)
23585 .m(4)
23586 .n(8)
23587 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023588 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023589 }
23590 }
23591
Frank Barchard0725b8d2020-12-07 11:07:35 -080023592 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023593 for (size_t k = 8; k <= 40; k += 4) {
23594 GemmMicrokernelTester()
23595 .mr(4)
23596 .nr(8)
23597 .kr(1)
23598 .sr(1)
23599 .m(4)
23600 .n(8)
23601 .k(k)
23602 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023603 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023604 }
23605 }
23606
Frank Barchard0725b8d2020-12-07 11:07:35 -080023607 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023608 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023609 for (uint32_t n = 1; n <= 8; n++) {
23610 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023611 GemmMicrokernelTester()
23612 .mr(4)
23613 .nr(8)
23614 .kr(1)
23615 .sr(1)
23616 .m(m)
23617 .n(n)
23618 .k(k)
23619 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023620 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023621 }
23622 }
23623 }
23624 }
23625
Frank Barchard0725b8d2020-12-07 11:07:35 -080023626 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023627 for (uint32_t n = 9; n < 16; n++) {
23628 for (size_t k = 1; k <= 20; k += 5) {
23629 GemmMicrokernelTester()
23630 .mr(4)
23631 .nr(8)
23632 .kr(1)
23633 .sr(1)
23634 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023635 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023636 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023637 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023638 }
23639 }
23640 }
23641
Frank Barchard0725b8d2020-12-07 11:07:35 -080023642 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023643 for (uint32_t n = 9; n < 16; n++) {
23644 for (size_t k = 1; k <= 20; k += 5) {
23645 GemmMicrokernelTester()
23646 .mr(4)
23647 .nr(8)
23648 .kr(1)
23649 .sr(1)
23650 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023651 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023652 .k(k)
23653 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023654 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023655 }
23656 }
23657 }
23658
Frank Barchard0725b8d2020-12-07 11:07:35 -080023659 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023660 for (uint32_t n = 9; n < 16; n++) {
23661 for (size_t k = 1; k <= 20; k += 5) {
23662 GemmMicrokernelTester()
23663 .mr(4)
23664 .nr(8)
23665 .kr(1)
23666 .sr(1)
23667 .m(4)
23668 .n(n)
23669 .k(k)
23670 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023671 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023672 }
23673 }
23674 }
23675
Frank Barchard0725b8d2020-12-07 11:07:35 -080023676 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023677 for (uint32_t n = 9; n < 16; n++) {
23678 for (size_t k = 1; k <= 20; k += 5) {
23679 for (uint32_t m = 1; m <= 4; m++) {
23680 GemmMicrokernelTester()
23681 .mr(4)
23682 .nr(8)
23683 .kr(1)
23684 .sr(1)
23685 .m(m)
23686 .n(n)
23687 .k(k)
23688 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023689 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023690 }
23691 }
23692 }
23693 }
23694
Frank Barchard0725b8d2020-12-07 11:07:35 -080023695 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023696 for (uint32_t n = 16; n <= 24; n += 8) {
23697 for (size_t k = 1; k <= 20; k += 5) {
23698 GemmMicrokernelTester()
23699 .mr(4)
23700 .nr(8)
23701 .kr(1)
23702 .sr(1)
23703 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023704 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023705 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023706 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023707 }
23708 }
23709 }
23710
Frank Barchard0725b8d2020-12-07 11:07:35 -080023711 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023712 for (uint32_t n = 16; n <= 24; n += 8) {
23713 for (size_t k = 1; k <= 20; k += 5) {
23714 GemmMicrokernelTester()
23715 .mr(4)
23716 .nr(8)
23717 .kr(1)
23718 .sr(1)
23719 .m(4)
23720 .n(n)
23721 .k(k)
23722 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023723 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023724 }
23725 }
23726 }
23727
Frank Barchard0725b8d2020-12-07 11:07:35 -080023728 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023729 for (uint32_t n = 16; n <= 24; n += 8) {
23730 for (size_t k = 1; k <= 20; k += 5) {
23731 GemmMicrokernelTester()
23732 .mr(4)
23733 .nr(8)
23734 .kr(1)
23735 .sr(1)
23736 .m(4)
23737 .n(n)
23738 .k(k)
23739 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023740 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023741 }
23742 }
23743 }
23744
Frank Barchard0725b8d2020-12-07 11:07:35 -080023745 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023746 for (uint32_t n = 16; n <= 24; n += 8) {
23747 for (size_t k = 1; k <= 20; k += 5) {
23748 for (uint32_t m = 1; m <= 4; m++) {
23749 GemmMicrokernelTester()
23750 .mr(4)
23751 .nr(8)
23752 .kr(1)
23753 .sr(1)
23754 .m(m)
23755 .n(n)
23756 .k(k)
23757 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023758 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023759 }
23760 }
23761 }
23762 }
23763
Frank Barchard0725b8d2020-12-07 11:07:35 -080023764 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023765 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023766 for (uint32_t n = 1; n <= 8; n++) {
23767 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023768 GemmMicrokernelTester()
23769 .mr(4)
23770 .nr(8)
23771 .kr(1)
23772 .sr(1)
23773 .m(m)
23774 .n(n)
23775 .k(k)
23776 .cm_stride(11)
23777 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023778 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023779 }
23780 }
23781 }
23782 }
23783
Frank Barchard0725b8d2020-12-07 11:07:35 -080023784 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023785 GemmMicrokernelTester()
23786 .mr(4)
23787 .nr(8)
23788 .kr(1)
23789 .sr(1)
23790 .m(4)
23791 .n(8)
23792 .k(4)
23793 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023794 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023795 }
23796
Frank Barchard0725b8d2020-12-07 11:07:35 -080023797 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023798 GemmMicrokernelTester()
23799 .mr(4)
23800 .nr(8)
23801 .kr(1)
23802 .sr(1)
23803 .m(4)
23804 .n(8)
23805 .k(4)
23806 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023807 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023808 }
23809
Frank Barchard0725b8d2020-12-07 11:07:35 -080023810 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023811 GemmMicrokernelTester()
23812 .mr(4)
23813 .nr(8)
23814 .kr(1)
23815 .sr(1)
23816 .m(4)
23817 .n(8)
23818 .k(4)
23819 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023820 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023821 }
Marat Dukhan4c617792021-12-21 15:47:58 -080023822#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023823
23824
Marat Dukhan4c617792021-12-21 15:47:58 -080023825#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080023826 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023827 GemmMicrokernelTester()
23828 .mr(5)
23829 .nr(8)
23830 .kr(1)
23831 .sr(1)
23832 .m(5)
23833 .n(8)
23834 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023835 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023836 }
23837
Frank Barchard0725b8d2020-12-07 11:07:35 -080023838 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023839 GemmMicrokernelTester()
23840 .mr(5)
23841 .nr(8)
23842 .kr(1)
23843 .sr(1)
23844 .m(5)
23845 .n(8)
23846 .k(4)
23847 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023848 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023849 }
23850
Frank Barchard0725b8d2020-12-07 11:07:35 -080023851 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023852 GemmMicrokernelTester()
23853 .mr(5)
23854 .nr(8)
23855 .kr(1)
23856 .sr(1)
23857 .m(5)
23858 .n(8)
23859 .k(4)
23860 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023861 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023862 }
23863
Frank Barchard0725b8d2020-12-07 11:07:35 -080023864 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023865 for (uint32_t n = 1; n <= 8; n++) {
23866 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023867 GemmMicrokernelTester()
23868 .mr(5)
23869 .nr(8)
23870 .kr(1)
23871 .sr(1)
23872 .m(m)
23873 .n(n)
23874 .k(4)
23875 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023876 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023877 }
23878 }
23879 }
23880
Frank Barchard0725b8d2020-12-07 11:07:35 -080023881 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023882 for (uint32_t m = 1; m <= 5; m++) {
23883 GemmMicrokernelTester()
23884 .mr(5)
23885 .nr(8)
23886 .kr(1)
23887 .sr(1)
23888 .m(m)
23889 .n(8)
23890 .k(4)
23891 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023892 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023893 }
23894 }
23895
Frank Barchard0725b8d2020-12-07 11:07:35 -080023896 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023897 for (uint32_t n = 1; n <= 8; n++) {
23898 GemmMicrokernelTester()
23899 .mr(5)
23900 .nr(8)
23901 .kr(1)
23902 .sr(1)
23903 .m(5)
23904 .n(n)
23905 .k(4)
23906 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023907 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023908 }
23909 }
23910
Frank Barchard0725b8d2020-12-07 11:07:35 -080023911 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023912 for (size_t k = 1; k < 4; k++) {
23913 GemmMicrokernelTester()
23914 .mr(5)
23915 .nr(8)
23916 .kr(1)
23917 .sr(1)
23918 .m(5)
23919 .n(8)
23920 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023921 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023922 }
23923 }
23924
Frank Barchard0725b8d2020-12-07 11:07:35 -080023925 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023926 for (size_t k = 1; k < 4; k++) {
23927 GemmMicrokernelTester()
23928 .mr(5)
23929 .nr(8)
23930 .kr(1)
23931 .sr(1)
23932 .m(5)
23933 .n(8)
23934 .k(k)
23935 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023936 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023937 }
23938 }
23939
Frank Barchard0725b8d2020-12-07 11:07:35 -080023940 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023941 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023942 for (uint32_t n = 1; n <= 8; n++) {
23943 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023944 GemmMicrokernelTester()
23945 .mr(5)
23946 .nr(8)
23947 .kr(1)
23948 .sr(1)
23949 .m(m)
23950 .n(n)
23951 .k(k)
23952 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023953 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023954 }
23955 }
23956 }
23957 }
23958
Frank Barchard0725b8d2020-12-07 11:07:35 -080023959 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023960 for (size_t k = 5; k < 8; k++) {
23961 GemmMicrokernelTester()
23962 .mr(5)
23963 .nr(8)
23964 .kr(1)
23965 .sr(1)
23966 .m(5)
23967 .n(8)
23968 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023969 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023970 }
23971 }
23972
Frank Barchard0725b8d2020-12-07 11:07:35 -080023973 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023974 for (size_t k = 5; k < 8; k++) {
23975 GemmMicrokernelTester()
23976 .mr(5)
23977 .nr(8)
23978 .kr(1)
23979 .sr(1)
23980 .m(5)
23981 .n(8)
23982 .k(k)
23983 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023984 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023985 }
23986 }
23987
Frank Barchard0725b8d2020-12-07 11:07:35 -080023988 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023989 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023990 for (uint32_t n = 1; n <= 8; n++) {
23991 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023992 GemmMicrokernelTester()
23993 .mr(5)
23994 .nr(8)
23995 .kr(1)
23996 .sr(1)
23997 .m(m)
23998 .n(n)
23999 .k(k)
24000 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024001 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024002 }
24003 }
24004 }
24005 }
24006
Frank Barchard0725b8d2020-12-07 11:07:35 -080024007 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024008 for (size_t k = 8; k <= 40; k += 4) {
24009 GemmMicrokernelTester()
24010 .mr(5)
24011 .nr(8)
24012 .kr(1)
24013 .sr(1)
24014 .m(5)
24015 .n(8)
24016 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024017 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024018 }
24019 }
24020
Frank Barchard0725b8d2020-12-07 11:07:35 -080024021 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024022 for (size_t k = 8; k <= 40; k += 4) {
24023 GemmMicrokernelTester()
24024 .mr(5)
24025 .nr(8)
24026 .kr(1)
24027 .sr(1)
24028 .m(5)
24029 .n(8)
24030 .k(k)
24031 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024032 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024033 }
24034 }
24035
Frank Barchard0725b8d2020-12-07 11:07:35 -080024036 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024037 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024038 for (uint32_t n = 1; n <= 8; n++) {
24039 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024040 GemmMicrokernelTester()
24041 .mr(5)
24042 .nr(8)
24043 .kr(1)
24044 .sr(1)
24045 .m(m)
24046 .n(n)
24047 .k(k)
24048 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024049 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024050 }
24051 }
24052 }
24053 }
24054
Frank Barchard0725b8d2020-12-07 11:07:35 -080024055 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024056 for (uint32_t n = 9; n < 16; n++) {
24057 for (size_t k = 1; k <= 20; k += 5) {
24058 GemmMicrokernelTester()
24059 .mr(5)
24060 .nr(8)
24061 .kr(1)
24062 .sr(1)
24063 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024064 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024065 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024066 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024067 }
24068 }
24069 }
24070
Frank Barchard0725b8d2020-12-07 11:07:35 -080024071 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024072 for (uint32_t n = 9; n < 16; n++) {
24073 for (size_t k = 1; k <= 20; k += 5) {
24074 GemmMicrokernelTester()
24075 .mr(5)
24076 .nr(8)
24077 .kr(1)
24078 .sr(1)
24079 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024080 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024081 .k(k)
24082 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024083 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024084 }
24085 }
24086 }
24087
Frank Barchard0725b8d2020-12-07 11:07:35 -080024088 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024089 for (uint32_t n = 9; n < 16; n++) {
24090 for (size_t k = 1; k <= 20; k += 5) {
24091 GemmMicrokernelTester()
24092 .mr(5)
24093 .nr(8)
24094 .kr(1)
24095 .sr(1)
24096 .m(5)
24097 .n(n)
24098 .k(k)
24099 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024100 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024101 }
24102 }
24103 }
24104
Frank Barchard0725b8d2020-12-07 11:07:35 -080024105 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024106 for (uint32_t n = 9; n < 16; n++) {
24107 for (size_t k = 1; k <= 20; k += 5) {
24108 for (uint32_t m = 1; m <= 5; m++) {
24109 GemmMicrokernelTester()
24110 .mr(5)
24111 .nr(8)
24112 .kr(1)
24113 .sr(1)
24114 .m(m)
24115 .n(n)
24116 .k(k)
24117 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024118 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024119 }
24120 }
24121 }
24122 }
24123
Frank Barchard0725b8d2020-12-07 11:07:35 -080024124 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024125 for (uint32_t n = 16; n <= 24; n += 8) {
24126 for (size_t k = 1; k <= 20; k += 5) {
24127 GemmMicrokernelTester()
24128 .mr(5)
24129 .nr(8)
24130 .kr(1)
24131 .sr(1)
24132 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024133 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024134 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024135 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024136 }
24137 }
24138 }
24139
Frank Barchard0725b8d2020-12-07 11:07:35 -080024140 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024141 for (uint32_t n = 16; n <= 24; n += 8) {
24142 for (size_t k = 1; k <= 20; k += 5) {
24143 GemmMicrokernelTester()
24144 .mr(5)
24145 .nr(8)
24146 .kr(1)
24147 .sr(1)
24148 .m(5)
24149 .n(n)
24150 .k(k)
24151 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024152 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024153 }
24154 }
24155 }
24156
Frank Barchard0725b8d2020-12-07 11:07:35 -080024157 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024158 for (uint32_t n = 16; n <= 24; n += 8) {
24159 for (size_t k = 1; k <= 20; k += 5) {
24160 GemmMicrokernelTester()
24161 .mr(5)
24162 .nr(8)
24163 .kr(1)
24164 .sr(1)
24165 .m(5)
24166 .n(n)
24167 .k(k)
24168 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024169 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024170 }
24171 }
24172 }
24173
Frank Barchard0725b8d2020-12-07 11:07:35 -080024174 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024175 for (uint32_t n = 16; n <= 24; n += 8) {
24176 for (size_t k = 1; k <= 20; k += 5) {
24177 for (uint32_t m = 1; m <= 5; m++) {
24178 GemmMicrokernelTester()
24179 .mr(5)
24180 .nr(8)
24181 .kr(1)
24182 .sr(1)
24183 .m(m)
24184 .n(n)
24185 .k(k)
24186 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024187 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024188 }
24189 }
24190 }
24191 }
24192
Frank Barchard0725b8d2020-12-07 11:07:35 -080024193 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024194 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024195 for (uint32_t n = 1; n <= 8; n++) {
24196 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024197 GemmMicrokernelTester()
24198 .mr(5)
24199 .nr(8)
24200 .kr(1)
24201 .sr(1)
24202 .m(m)
24203 .n(n)
24204 .k(k)
24205 .cm_stride(11)
24206 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024207 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024208 }
24209 }
24210 }
24211 }
24212
Frank Barchard0725b8d2020-12-07 11:07:35 -080024213 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024214 GemmMicrokernelTester()
24215 .mr(5)
24216 .nr(8)
24217 .kr(1)
24218 .sr(1)
24219 .m(5)
24220 .n(8)
24221 .k(4)
24222 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024223 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024224 }
24225
Frank Barchard0725b8d2020-12-07 11:07:35 -080024226 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024227 GemmMicrokernelTester()
24228 .mr(5)
24229 .nr(8)
24230 .kr(1)
24231 .sr(1)
24232 .m(5)
24233 .n(8)
24234 .k(4)
24235 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024236 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024237 }
24238
Frank Barchard0725b8d2020-12-07 11:07:35 -080024239 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024240 GemmMicrokernelTester()
24241 .mr(5)
24242 .nr(8)
24243 .kr(1)
24244 .sr(1)
24245 .m(5)
24246 .n(8)
24247 .k(4)
24248 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024249 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024250 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024251#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024252
24253
Marat Dukhan4c617792021-12-21 15:47:58 -080024254#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080024255 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024256 GemmMicrokernelTester()
24257 .mr(3)
24258 .nr(8)
24259 .kr(1)
24260 .sr(1)
24261 .m(3)
24262 .n(8)
24263 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024264 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024265 }
24266
Frank Barchard0725b8d2020-12-07 11:07:35 -080024267 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024268 GemmMicrokernelTester()
24269 .mr(3)
24270 .nr(8)
24271 .kr(1)
24272 .sr(1)
24273 .m(3)
24274 .n(8)
24275 .k(4)
24276 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024277 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024278 }
24279
Frank Barchard0725b8d2020-12-07 11:07:35 -080024280 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024281 GemmMicrokernelTester()
24282 .mr(3)
24283 .nr(8)
24284 .kr(1)
24285 .sr(1)
24286 .m(3)
24287 .n(8)
24288 .k(4)
24289 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024290 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024291 }
24292
Frank Barchard0725b8d2020-12-07 11:07:35 -080024293 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024294 for (uint32_t n = 1; n <= 8; n++) {
24295 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024296 GemmMicrokernelTester()
24297 .mr(3)
24298 .nr(8)
24299 .kr(1)
24300 .sr(1)
24301 .m(m)
24302 .n(n)
24303 .k(4)
24304 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024305 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024306 }
24307 }
24308 }
24309
Frank Barchard0725b8d2020-12-07 11:07:35 -080024310 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024311 for (uint32_t m = 1; m <= 3; m++) {
24312 GemmMicrokernelTester()
24313 .mr(3)
24314 .nr(8)
24315 .kr(1)
24316 .sr(1)
24317 .m(m)
24318 .n(8)
24319 .k(4)
24320 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024321 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024322 }
24323 }
24324
Frank Barchard0725b8d2020-12-07 11:07:35 -080024325 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024326 for (uint32_t n = 1; n <= 8; n++) {
24327 GemmMicrokernelTester()
24328 .mr(3)
24329 .nr(8)
24330 .kr(1)
24331 .sr(1)
24332 .m(3)
24333 .n(n)
24334 .k(4)
24335 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024336 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024337 }
24338 }
24339
Frank Barchard0725b8d2020-12-07 11:07:35 -080024340 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024341 for (size_t k = 1; k < 4; k++) {
24342 GemmMicrokernelTester()
24343 .mr(3)
24344 .nr(8)
24345 .kr(1)
24346 .sr(1)
24347 .m(3)
24348 .n(8)
24349 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024350 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024351 }
24352 }
24353
Frank Barchard0725b8d2020-12-07 11:07:35 -080024354 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024355 for (size_t k = 1; k < 4; k++) {
24356 GemmMicrokernelTester()
24357 .mr(3)
24358 .nr(8)
24359 .kr(1)
24360 .sr(1)
24361 .m(3)
24362 .n(8)
24363 .k(k)
24364 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024365 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024366 }
24367 }
24368
Frank Barchard0725b8d2020-12-07 11:07:35 -080024369 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024370 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024371 for (uint32_t n = 1; n <= 8; n++) {
24372 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024373 GemmMicrokernelTester()
24374 .mr(3)
24375 .nr(8)
24376 .kr(1)
24377 .sr(1)
24378 .m(m)
24379 .n(n)
24380 .k(k)
24381 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024382 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024383 }
24384 }
24385 }
24386 }
24387
Frank Barchard0725b8d2020-12-07 11:07:35 -080024388 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024389 for (size_t k = 5; k < 8; k++) {
24390 GemmMicrokernelTester()
24391 .mr(3)
24392 .nr(8)
24393 .kr(1)
24394 .sr(1)
24395 .m(3)
24396 .n(8)
24397 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024398 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024399 }
24400 }
24401
Frank Barchard0725b8d2020-12-07 11:07:35 -080024402 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024403 for (size_t k = 5; k < 8; k++) {
24404 GemmMicrokernelTester()
24405 .mr(3)
24406 .nr(8)
24407 .kr(1)
24408 .sr(1)
24409 .m(3)
24410 .n(8)
24411 .k(k)
24412 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024413 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024414 }
24415 }
24416
Frank Barchard0725b8d2020-12-07 11:07:35 -080024417 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024418 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024419 for (uint32_t n = 1; n <= 8; n++) {
24420 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024421 GemmMicrokernelTester()
24422 .mr(3)
24423 .nr(8)
24424 .kr(1)
24425 .sr(1)
24426 .m(m)
24427 .n(n)
24428 .k(k)
24429 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024430 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024431 }
24432 }
24433 }
24434 }
24435
Frank Barchard0725b8d2020-12-07 11:07:35 -080024436 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024437 for (size_t k = 8; k <= 40; k += 4) {
24438 GemmMicrokernelTester()
24439 .mr(3)
24440 .nr(8)
24441 .kr(1)
24442 .sr(1)
24443 .m(3)
24444 .n(8)
24445 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024446 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024447 }
24448 }
24449
Frank Barchard0725b8d2020-12-07 11:07:35 -080024450 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024451 for (size_t k = 8; k <= 40; k += 4) {
24452 GemmMicrokernelTester()
24453 .mr(3)
24454 .nr(8)
24455 .kr(1)
24456 .sr(1)
24457 .m(3)
24458 .n(8)
24459 .k(k)
24460 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024461 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024462 }
24463 }
24464
Frank Barchard0725b8d2020-12-07 11:07:35 -080024465 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024466 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024467 for (uint32_t n = 1; n <= 8; n++) {
24468 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024469 GemmMicrokernelTester()
24470 .mr(3)
24471 .nr(8)
24472 .kr(1)
24473 .sr(1)
24474 .m(m)
24475 .n(n)
24476 .k(k)
24477 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024478 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024479 }
24480 }
24481 }
24482 }
24483
Frank Barchard0725b8d2020-12-07 11:07:35 -080024484 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024485 for (uint32_t n = 9; n < 16; n++) {
24486 for (size_t k = 1; k <= 20; k += 5) {
24487 GemmMicrokernelTester()
24488 .mr(3)
24489 .nr(8)
24490 .kr(1)
24491 .sr(1)
24492 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024493 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024494 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024495 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024496 }
24497 }
24498 }
24499
Frank Barchard0725b8d2020-12-07 11:07:35 -080024500 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024501 for (uint32_t n = 9; n < 16; n++) {
24502 for (size_t k = 1; k <= 20; k += 5) {
24503 GemmMicrokernelTester()
24504 .mr(3)
24505 .nr(8)
24506 .kr(1)
24507 .sr(1)
24508 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024509 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024510 .k(k)
24511 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024512 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024513 }
24514 }
24515 }
24516
Frank Barchard0725b8d2020-12-07 11:07:35 -080024517 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024518 for (uint32_t n = 9; n < 16; n++) {
24519 for (size_t k = 1; k <= 20; k += 5) {
24520 GemmMicrokernelTester()
24521 .mr(3)
24522 .nr(8)
24523 .kr(1)
24524 .sr(1)
24525 .m(3)
24526 .n(n)
24527 .k(k)
24528 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024529 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024530 }
24531 }
24532 }
24533
Frank Barchard0725b8d2020-12-07 11:07:35 -080024534 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024535 for (uint32_t n = 9; n < 16; n++) {
24536 for (size_t k = 1; k <= 20; k += 5) {
24537 for (uint32_t m = 1; m <= 3; m++) {
24538 GemmMicrokernelTester()
24539 .mr(3)
24540 .nr(8)
24541 .kr(1)
24542 .sr(1)
24543 .m(m)
24544 .n(n)
24545 .k(k)
24546 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024547 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024548 }
24549 }
24550 }
24551 }
24552
Frank Barchard0725b8d2020-12-07 11:07:35 -080024553 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024554 for (uint32_t n = 16; n <= 24; n += 8) {
24555 for (size_t k = 1; k <= 20; k += 5) {
24556 GemmMicrokernelTester()
24557 .mr(3)
24558 .nr(8)
24559 .kr(1)
24560 .sr(1)
24561 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024562 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024563 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024564 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024565 }
24566 }
24567 }
24568
Frank Barchard0725b8d2020-12-07 11:07:35 -080024569 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024570 for (uint32_t n = 16; n <= 24; n += 8) {
24571 for (size_t k = 1; k <= 20; k += 5) {
24572 GemmMicrokernelTester()
24573 .mr(3)
24574 .nr(8)
24575 .kr(1)
24576 .sr(1)
24577 .m(3)
24578 .n(n)
24579 .k(k)
24580 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024581 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024582 }
24583 }
24584 }
24585
Frank Barchard0725b8d2020-12-07 11:07:35 -080024586 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024587 for (uint32_t n = 16; n <= 24; n += 8) {
24588 for (size_t k = 1; k <= 20; k += 5) {
24589 GemmMicrokernelTester()
24590 .mr(3)
24591 .nr(8)
24592 .kr(1)
24593 .sr(1)
24594 .m(3)
24595 .n(n)
24596 .k(k)
24597 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024598 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024599 }
24600 }
24601 }
24602
Frank Barchard0725b8d2020-12-07 11:07:35 -080024603 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024604 for (uint32_t n = 16; n <= 24; n += 8) {
24605 for (size_t k = 1; k <= 20; k += 5) {
24606 for (uint32_t m = 1; m <= 3; m++) {
24607 GemmMicrokernelTester()
24608 .mr(3)
24609 .nr(8)
24610 .kr(1)
24611 .sr(1)
24612 .m(m)
24613 .n(n)
24614 .k(k)
24615 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024616 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024617 }
24618 }
24619 }
24620 }
24621
Frank Barchard0725b8d2020-12-07 11:07:35 -080024622 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024623 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024624 for (uint32_t n = 1; n <= 8; n++) {
24625 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024626 GemmMicrokernelTester()
24627 .mr(3)
24628 .nr(8)
24629 .kr(1)
24630 .sr(1)
24631 .m(m)
24632 .n(n)
24633 .k(k)
24634 .cm_stride(11)
24635 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024636 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024637 }
24638 }
24639 }
24640 }
24641
Frank Barchard0725b8d2020-12-07 11:07:35 -080024642 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024643 GemmMicrokernelTester()
24644 .mr(3)
24645 .nr(8)
24646 .kr(1)
24647 .sr(1)
24648 .m(3)
24649 .n(8)
24650 .k(4)
24651 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024652 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024653 }
24654
Frank Barchard0725b8d2020-12-07 11:07:35 -080024655 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024656 GemmMicrokernelTester()
24657 .mr(3)
24658 .nr(8)
24659 .kr(1)
24660 .sr(1)
24661 .m(3)
24662 .n(8)
24663 .k(4)
24664 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024665 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024666 }
24667
Frank Barchard0725b8d2020-12-07 11:07:35 -080024668 TEST(F32_GEMMINC_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024669 GemmMicrokernelTester()
24670 .mr(3)
24671 .nr(8)
24672 .kr(1)
24673 .sr(1)
24674 .m(3)
24675 .n(8)
24676 .k(4)
24677 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024678 .Test(xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024679 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024680#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024681
24682
Marat Dukhan4c617792021-12-21 15:47:58 -080024683#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080024684 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024685 GemmMicrokernelTester()
24686 .mr(4)
24687 .nr(8)
24688 .kr(1)
24689 .sr(1)
24690 .m(4)
24691 .n(8)
24692 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024693 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024694 }
24695
Frank Barchard0725b8d2020-12-07 11:07:35 -080024696 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024697 GemmMicrokernelTester()
24698 .mr(4)
24699 .nr(8)
24700 .kr(1)
24701 .sr(1)
24702 .m(4)
24703 .n(8)
24704 .k(4)
24705 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024706 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024707 }
24708
Frank Barchard0725b8d2020-12-07 11:07:35 -080024709 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024710 GemmMicrokernelTester()
24711 .mr(4)
24712 .nr(8)
24713 .kr(1)
24714 .sr(1)
24715 .m(4)
24716 .n(8)
24717 .k(4)
24718 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024719 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024720 }
24721
Frank Barchard0725b8d2020-12-07 11:07:35 -080024722 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024723 for (uint32_t n = 1; n <= 8; n++) {
24724 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024725 GemmMicrokernelTester()
24726 .mr(4)
24727 .nr(8)
24728 .kr(1)
24729 .sr(1)
24730 .m(m)
24731 .n(n)
24732 .k(4)
24733 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024734 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024735 }
24736 }
24737 }
24738
Frank Barchard0725b8d2020-12-07 11:07:35 -080024739 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024740 for (uint32_t m = 1; m <= 4; m++) {
24741 GemmMicrokernelTester()
24742 .mr(4)
24743 .nr(8)
24744 .kr(1)
24745 .sr(1)
24746 .m(m)
24747 .n(8)
24748 .k(4)
24749 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024750 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024751 }
24752 }
24753
Frank Barchard0725b8d2020-12-07 11:07:35 -080024754 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024755 for (uint32_t n = 1; n <= 8; n++) {
24756 GemmMicrokernelTester()
24757 .mr(4)
24758 .nr(8)
24759 .kr(1)
24760 .sr(1)
24761 .m(4)
24762 .n(n)
24763 .k(4)
24764 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024765 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024766 }
24767 }
24768
Frank Barchard0725b8d2020-12-07 11:07:35 -080024769 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024770 for (size_t k = 1; k < 4; k++) {
24771 GemmMicrokernelTester()
24772 .mr(4)
24773 .nr(8)
24774 .kr(1)
24775 .sr(1)
24776 .m(4)
24777 .n(8)
24778 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024779 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024780 }
24781 }
24782
Frank Barchard0725b8d2020-12-07 11:07:35 -080024783 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024784 for (size_t k = 1; k < 4; k++) {
24785 GemmMicrokernelTester()
24786 .mr(4)
24787 .nr(8)
24788 .kr(1)
24789 .sr(1)
24790 .m(4)
24791 .n(8)
24792 .k(k)
24793 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024794 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024795 }
24796 }
24797
Frank Barchard0725b8d2020-12-07 11:07:35 -080024798 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024799 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024800 for (uint32_t n = 1; n <= 8; n++) {
24801 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024802 GemmMicrokernelTester()
24803 .mr(4)
24804 .nr(8)
24805 .kr(1)
24806 .sr(1)
24807 .m(m)
24808 .n(n)
24809 .k(k)
24810 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024811 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024812 }
24813 }
24814 }
24815 }
24816
Frank Barchard0725b8d2020-12-07 11:07:35 -080024817 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024818 for (size_t k = 5; k < 8; k++) {
24819 GemmMicrokernelTester()
24820 .mr(4)
24821 .nr(8)
24822 .kr(1)
24823 .sr(1)
24824 .m(4)
24825 .n(8)
24826 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024827 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024828 }
24829 }
24830
Frank Barchard0725b8d2020-12-07 11:07:35 -080024831 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024832 for (size_t k = 5; k < 8; k++) {
24833 GemmMicrokernelTester()
24834 .mr(4)
24835 .nr(8)
24836 .kr(1)
24837 .sr(1)
24838 .m(4)
24839 .n(8)
24840 .k(k)
24841 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024842 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024843 }
24844 }
24845
Frank Barchard0725b8d2020-12-07 11:07:35 -080024846 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024847 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024848 for (uint32_t n = 1; n <= 8; n++) {
24849 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024850 GemmMicrokernelTester()
24851 .mr(4)
24852 .nr(8)
24853 .kr(1)
24854 .sr(1)
24855 .m(m)
24856 .n(n)
24857 .k(k)
24858 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024859 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024860 }
24861 }
24862 }
24863 }
24864
Frank Barchard0725b8d2020-12-07 11:07:35 -080024865 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024866 for (size_t k = 8; k <= 40; k += 4) {
24867 GemmMicrokernelTester()
24868 .mr(4)
24869 .nr(8)
24870 .kr(1)
24871 .sr(1)
24872 .m(4)
24873 .n(8)
24874 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024875 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024876 }
24877 }
24878
Frank Barchard0725b8d2020-12-07 11:07:35 -080024879 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024880 for (size_t k = 8; k <= 40; k += 4) {
24881 GemmMicrokernelTester()
24882 .mr(4)
24883 .nr(8)
24884 .kr(1)
24885 .sr(1)
24886 .m(4)
24887 .n(8)
24888 .k(k)
24889 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024890 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024891 }
24892 }
24893
Frank Barchard0725b8d2020-12-07 11:07:35 -080024894 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024895 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024896 for (uint32_t n = 1; n <= 8; n++) {
24897 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024898 GemmMicrokernelTester()
24899 .mr(4)
24900 .nr(8)
24901 .kr(1)
24902 .sr(1)
24903 .m(m)
24904 .n(n)
24905 .k(k)
24906 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024907 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024908 }
24909 }
24910 }
24911 }
24912
Frank Barchard0725b8d2020-12-07 11:07:35 -080024913 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024914 for (uint32_t n = 9; n < 16; n++) {
24915 for (size_t k = 1; k <= 20; k += 5) {
24916 GemmMicrokernelTester()
24917 .mr(4)
24918 .nr(8)
24919 .kr(1)
24920 .sr(1)
24921 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024922 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024923 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024924 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024925 }
24926 }
24927 }
24928
Frank Barchard0725b8d2020-12-07 11:07:35 -080024929 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024930 for (uint32_t n = 9; n < 16; n++) {
24931 for (size_t k = 1; k <= 20; k += 5) {
24932 GemmMicrokernelTester()
24933 .mr(4)
24934 .nr(8)
24935 .kr(1)
24936 .sr(1)
24937 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024938 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024939 .k(k)
24940 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024941 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024942 }
24943 }
24944 }
24945
Frank Barchard0725b8d2020-12-07 11:07:35 -080024946 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024947 for (uint32_t n = 9; n < 16; n++) {
24948 for (size_t k = 1; k <= 20; k += 5) {
24949 GemmMicrokernelTester()
24950 .mr(4)
24951 .nr(8)
24952 .kr(1)
24953 .sr(1)
24954 .m(4)
24955 .n(n)
24956 .k(k)
24957 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024958 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024959 }
24960 }
24961 }
24962
Frank Barchard0725b8d2020-12-07 11:07:35 -080024963 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024964 for (uint32_t n = 9; n < 16; n++) {
24965 for (size_t k = 1; k <= 20; k += 5) {
24966 for (uint32_t m = 1; m <= 4; m++) {
24967 GemmMicrokernelTester()
24968 .mr(4)
24969 .nr(8)
24970 .kr(1)
24971 .sr(1)
24972 .m(m)
24973 .n(n)
24974 .k(k)
24975 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024976 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024977 }
24978 }
24979 }
24980 }
24981
Frank Barchard0725b8d2020-12-07 11:07:35 -080024982 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024983 for (uint32_t n = 16; n <= 24; n += 8) {
24984 for (size_t k = 1; k <= 20; k += 5) {
24985 GemmMicrokernelTester()
24986 .mr(4)
24987 .nr(8)
24988 .kr(1)
24989 .sr(1)
24990 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024991 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024992 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024993 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024994 }
24995 }
24996 }
24997
Frank Barchard0725b8d2020-12-07 11:07:35 -080024998 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024999 for (uint32_t n = 16; n <= 24; n += 8) {
25000 for (size_t k = 1; k <= 20; k += 5) {
25001 GemmMicrokernelTester()
25002 .mr(4)
25003 .nr(8)
25004 .kr(1)
25005 .sr(1)
25006 .m(4)
25007 .n(n)
25008 .k(k)
25009 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025010 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025011 }
25012 }
25013 }
25014
Frank Barchard0725b8d2020-12-07 11:07:35 -080025015 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025016 for (uint32_t n = 16; n <= 24; n += 8) {
25017 for (size_t k = 1; k <= 20; k += 5) {
25018 GemmMicrokernelTester()
25019 .mr(4)
25020 .nr(8)
25021 .kr(1)
25022 .sr(1)
25023 .m(4)
25024 .n(n)
25025 .k(k)
25026 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025027 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025028 }
25029 }
25030 }
25031
Frank Barchard0725b8d2020-12-07 11:07:35 -080025032 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025033 for (uint32_t n = 16; n <= 24; n += 8) {
25034 for (size_t k = 1; k <= 20; k += 5) {
25035 for (uint32_t m = 1; m <= 4; m++) {
25036 GemmMicrokernelTester()
25037 .mr(4)
25038 .nr(8)
25039 .kr(1)
25040 .sr(1)
25041 .m(m)
25042 .n(n)
25043 .k(k)
25044 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025045 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025046 }
25047 }
25048 }
25049 }
25050
Frank Barchard0725b8d2020-12-07 11:07:35 -080025051 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025052 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025053 for (uint32_t n = 1; n <= 8; n++) {
25054 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025055 GemmMicrokernelTester()
25056 .mr(4)
25057 .nr(8)
25058 .kr(1)
25059 .sr(1)
25060 .m(m)
25061 .n(n)
25062 .k(k)
25063 .cm_stride(11)
25064 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025065 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025066 }
25067 }
25068 }
25069 }
25070
Frank Barchard0725b8d2020-12-07 11:07:35 -080025071 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025072 GemmMicrokernelTester()
25073 .mr(4)
25074 .nr(8)
25075 .kr(1)
25076 .sr(1)
25077 .m(4)
25078 .n(8)
25079 .k(4)
25080 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025081 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025082 }
25083
Frank Barchard0725b8d2020-12-07 11:07:35 -080025084 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025085 GemmMicrokernelTester()
25086 .mr(4)
25087 .nr(8)
25088 .kr(1)
25089 .sr(1)
25090 .m(4)
25091 .n(8)
25092 .k(4)
25093 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025094 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025095 }
25096
Frank Barchard0725b8d2020-12-07 11:07:35 -080025097 TEST(F32_GEMMINC_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025098 GemmMicrokernelTester()
25099 .mr(4)
25100 .nr(8)
25101 .kr(1)
25102 .sr(1)
25103 .m(4)
25104 .n(8)
25105 .k(4)
25106 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025107 .Test(xnn_f32_gemminc_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025108 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025109#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025110
25111
Marat Dukhan4c617792021-12-21 15:47:58 -080025112#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080025113 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025114 GemmMicrokernelTester()
25115 .mr(5)
25116 .nr(8)
25117 .kr(1)
25118 .sr(1)
25119 .m(5)
25120 .n(8)
25121 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025122 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025123 }
25124
Frank Barchard0725b8d2020-12-07 11:07:35 -080025125 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025126 GemmMicrokernelTester()
25127 .mr(5)
25128 .nr(8)
25129 .kr(1)
25130 .sr(1)
25131 .m(5)
25132 .n(8)
25133 .k(4)
25134 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025135 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025136 }
25137
Frank Barchard0725b8d2020-12-07 11:07:35 -080025138 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025139 GemmMicrokernelTester()
25140 .mr(5)
25141 .nr(8)
25142 .kr(1)
25143 .sr(1)
25144 .m(5)
25145 .n(8)
25146 .k(4)
25147 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025148 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025149 }
25150
Frank Barchard0725b8d2020-12-07 11:07:35 -080025151 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025152 for (uint32_t n = 1; n <= 8; n++) {
25153 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025154 GemmMicrokernelTester()
25155 .mr(5)
25156 .nr(8)
25157 .kr(1)
25158 .sr(1)
25159 .m(m)
25160 .n(n)
25161 .k(4)
25162 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025163 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025164 }
25165 }
25166 }
25167
Frank Barchard0725b8d2020-12-07 11:07:35 -080025168 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025169 for (uint32_t m = 1; m <= 5; m++) {
25170 GemmMicrokernelTester()
25171 .mr(5)
25172 .nr(8)
25173 .kr(1)
25174 .sr(1)
25175 .m(m)
25176 .n(8)
25177 .k(4)
25178 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025179 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025180 }
25181 }
25182
Frank Barchard0725b8d2020-12-07 11:07:35 -080025183 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025184 for (uint32_t n = 1; n <= 8; n++) {
25185 GemmMicrokernelTester()
25186 .mr(5)
25187 .nr(8)
25188 .kr(1)
25189 .sr(1)
25190 .m(5)
25191 .n(n)
25192 .k(4)
25193 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025194 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025195 }
25196 }
25197
Frank Barchard0725b8d2020-12-07 11:07:35 -080025198 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025199 for (size_t k = 1; k < 4; k++) {
25200 GemmMicrokernelTester()
25201 .mr(5)
25202 .nr(8)
25203 .kr(1)
25204 .sr(1)
25205 .m(5)
25206 .n(8)
25207 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025208 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025209 }
25210 }
25211
Frank Barchard0725b8d2020-12-07 11:07:35 -080025212 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025213 for (size_t k = 1; k < 4; k++) {
25214 GemmMicrokernelTester()
25215 .mr(5)
25216 .nr(8)
25217 .kr(1)
25218 .sr(1)
25219 .m(5)
25220 .n(8)
25221 .k(k)
25222 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025223 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025224 }
25225 }
25226
Frank Barchard0725b8d2020-12-07 11:07:35 -080025227 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025228 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025229 for (uint32_t n = 1; n <= 8; n++) {
25230 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025231 GemmMicrokernelTester()
25232 .mr(5)
25233 .nr(8)
25234 .kr(1)
25235 .sr(1)
25236 .m(m)
25237 .n(n)
25238 .k(k)
25239 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025240 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025241 }
25242 }
25243 }
25244 }
25245
Frank Barchard0725b8d2020-12-07 11:07:35 -080025246 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025247 for (size_t k = 5; k < 8; k++) {
25248 GemmMicrokernelTester()
25249 .mr(5)
25250 .nr(8)
25251 .kr(1)
25252 .sr(1)
25253 .m(5)
25254 .n(8)
25255 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025256 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025257 }
25258 }
25259
Frank Barchard0725b8d2020-12-07 11:07:35 -080025260 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025261 for (size_t k = 5; k < 8; k++) {
25262 GemmMicrokernelTester()
25263 .mr(5)
25264 .nr(8)
25265 .kr(1)
25266 .sr(1)
25267 .m(5)
25268 .n(8)
25269 .k(k)
25270 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025271 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025272 }
25273 }
25274
Frank Barchard0725b8d2020-12-07 11:07:35 -080025275 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025276 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025277 for (uint32_t n = 1; n <= 8; n++) {
25278 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025279 GemmMicrokernelTester()
25280 .mr(5)
25281 .nr(8)
25282 .kr(1)
25283 .sr(1)
25284 .m(m)
25285 .n(n)
25286 .k(k)
25287 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025288 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025289 }
25290 }
25291 }
25292 }
25293
Frank Barchard0725b8d2020-12-07 11:07:35 -080025294 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025295 for (size_t k = 8; k <= 40; k += 4) {
25296 GemmMicrokernelTester()
25297 .mr(5)
25298 .nr(8)
25299 .kr(1)
25300 .sr(1)
25301 .m(5)
25302 .n(8)
25303 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025304 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025305 }
25306 }
25307
Frank Barchard0725b8d2020-12-07 11:07:35 -080025308 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025309 for (size_t k = 8; k <= 40; k += 4) {
25310 GemmMicrokernelTester()
25311 .mr(5)
25312 .nr(8)
25313 .kr(1)
25314 .sr(1)
25315 .m(5)
25316 .n(8)
25317 .k(k)
25318 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025319 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025320 }
25321 }
25322
Frank Barchard0725b8d2020-12-07 11:07:35 -080025323 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025324 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025325 for (uint32_t n = 1; n <= 8; n++) {
25326 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025327 GemmMicrokernelTester()
25328 .mr(5)
25329 .nr(8)
25330 .kr(1)
25331 .sr(1)
25332 .m(m)
25333 .n(n)
25334 .k(k)
25335 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025336 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025337 }
25338 }
25339 }
25340 }
25341
Frank Barchard0725b8d2020-12-07 11:07:35 -080025342 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025343 for (uint32_t n = 9; n < 16; n++) {
25344 for (size_t k = 1; k <= 20; k += 5) {
25345 GemmMicrokernelTester()
25346 .mr(5)
25347 .nr(8)
25348 .kr(1)
25349 .sr(1)
25350 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025351 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025352 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025353 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025354 }
25355 }
25356 }
25357
Frank Barchard0725b8d2020-12-07 11:07:35 -080025358 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025359 for (uint32_t n = 9; n < 16; n++) {
25360 for (size_t k = 1; k <= 20; k += 5) {
25361 GemmMicrokernelTester()
25362 .mr(5)
25363 .nr(8)
25364 .kr(1)
25365 .sr(1)
25366 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025367 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025368 .k(k)
25369 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025370 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025371 }
25372 }
25373 }
25374
Frank Barchard0725b8d2020-12-07 11:07:35 -080025375 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025376 for (uint32_t n = 9; n < 16; n++) {
25377 for (size_t k = 1; k <= 20; k += 5) {
25378 GemmMicrokernelTester()
25379 .mr(5)
25380 .nr(8)
25381 .kr(1)
25382 .sr(1)
25383 .m(5)
25384 .n(n)
25385 .k(k)
25386 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025387 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025388 }
25389 }
25390 }
25391
Frank Barchard0725b8d2020-12-07 11:07:35 -080025392 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025393 for (uint32_t n = 9; n < 16; n++) {
25394 for (size_t k = 1; k <= 20; k += 5) {
25395 for (uint32_t m = 1; m <= 5; m++) {
25396 GemmMicrokernelTester()
25397 .mr(5)
25398 .nr(8)
25399 .kr(1)
25400 .sr(1)
25401 .m(m)
25402 .n(n)
25403 .k(k)
25404 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025405 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025406 }
25407 }
25408 }
25409 }
25410
Frank Barchard0725b8d2020-12-07 11:07:35 -080025411 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025412 for (uint32_t n = 16; n <= 24; n += 8) {
25413 for (size_t k = 1; k <= 20; k += 5) {
25414 GemmMicrokernelTester()
25415 .mr(5)
25416 .nr(8)
25417 .kr(1)
25418 .sr(1)
25419 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025420 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025421 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025422 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025423 }
25424 }
25425 }
25426
Frank Barchard0725b8d2020-12-07 11:07:35 -080025427 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025428 for (uint32_t n = 16; n <= 24; n += 8) {
25429 for (size_t k = 1; k <= 20; k += 5) {
25430 GemmMicrokernelTester()
25431 .mr(5)
25432 .nr(8)
25433 .kr(1)
25434 .sr(1)
25435 .m(5)
25436 .n(n)
25437 .k(k)
25438 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025439 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025440 }
25441 }
25442 }
25443
Frank Barchard0725b8d2020-12-07 11:07:35 -080025444 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025445 for (uint32_t n = 16; n <= 24; n += 8) {
25446 for (size_t k = 1; k <= 20; k += 5) {
25447 GemmMicrokernelTester()
25448 .mr(5)
25449 .nr(8)
25450 .kr(1)
25451 .sr(1)
25452 .m(5)
25453 .n(n)
25454 .k(k)
25455 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025456 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025457 }
25458 }
25459 }
25460
Frank Barchard0725b8d2020-12-07 11:07:35 -080025461 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025462 for (uint32_t n = 16; n <= 24; n += 8) {
25463 for (size_t k = 1; k <= 20; k += 5) {
25464 for (uint32_t m = 1; m <= 5; m++) {
25465 GemmMicrokernelTester()
25466 .mr(5)
25467 .nr(8)
25468 .kr(1)
25469 .sr(1)
25470 .m(m)
25471 .n(n)
25472 .k(k)
25473 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025474 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025475 }
25476 }
25477 }
25478 }
25479
Frank Barchard0725b8d2020-12-07 11:07:35 -080025480 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025481 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025482 for (uint32_t n = 1; n <= 8; n++) {
25483 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025484 GemmMicrokernelTester()
25485 .mr(5)
25486 .nr(8)
25487 .kr(1)
25488 .sr(1)
25489 .m(m)
25490 .n(n)
25491 .k(k)
25492 .cm_stride(11)
25493 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025494 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025495 }
25496 }
25497 }
25498 }
25499
Frank Barchard0725b8d2020-12-07 11:07:35 -080025500 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025501 GemmMicrokernelTester()
25502 .mr(5)
25503 .nr(8)
25504 .kr(1)
25505 .sr(1)
25506 .m(5)
25507 .n(8)
25508 .k(4)
25509 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025510 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025511 }
25512
Frank Barchard0725b8d2020-12-07 11:07:35 -080025513 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025514 GemmMicrokernelTester()
25515 .mr(5)
25516 .nr(8)
25517 .kr(1)
25518 .sr(1)
25519 .m(5)
25520 .n(8)
25521 .k(4)
25522 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025523 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025524 }
25525
Frank Barchard0725b8d2020-12-07 11:07:35 -080025526 TEST(F32_GEMMINC_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025527 GemmMicrokernelTester()
25528 .mr(5)
25529 .nr(8)
25530 .kr(1)
25531 .sr(1)
25532 .m(5)
25533 .n(8)
25534 .k(4)
25535 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025536 .Test(xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025537 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025538#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025539
25540
Marat Dukhan4c617792021-12-21 15:47:58 -080025541#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025542 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4) {
25543 GemmMicrokernelTester()
25544 .mr(1)
25545 .nr(8)
25546 .kr(1)
25547 .sr(4)
25548 .m(1)
25549 .n(8)
25550 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025551 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025552 }
25553
25554 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, strided_cn) {
25555 GemmMicrokernelTester()
25556 .mr(1)
25557 .nr(8)
25558 .kr(1)
25559 .sr(4)
25560 .m(1)
25561 .n(8)
25562 .k(4)
25563 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025564 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025565 }
25566
25567 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
25568 GemmMicrokernelTester()
25569 .mr(1)
25570 .nr(8)
25571 .kr(1)
25572 .sr(4)
25573 .m(1)
25574 .n(8)
25575 .k(4)
25576 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025577 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025578 }
25579
25580 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025581 for (uint32_t n = 1; n <= 8; n++) {
25582 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025583 GemmMicrokernelTester()
25584 .mr(1)
25585 .nr(8)
25586 .kr(1)
25587 .sr(4)
25588 .m(m)
25589 .n(n)
25590 .k(4)
25591 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025592 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025593 }
25594 }
25595 }
25596
25597 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
25598 for (uint32_t m = 1; m <= 1; m++) {
25599 GemmMicrokernelTester()
25600 .mr(1)
25601 .nr(8)
25602 .kr(1)
25603 .sr(4)
25604 .m(m)
25605 .n(8)
25606 .k(4)
25607 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025608 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025609 }
25610 }
25611
25612 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
25613 for (uint32_t n = 1; n <= 8; n++) {
25614 GemmMicrokernelTester()
25615 .mr(1)
25616 .nr(8)
25617 .kr(1)
25618 .sr(4)
25619 .m(1)
25620 .n(n)
25621 .k(4)
25622 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025623 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025624 }
25625 }
25626
25627 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4) {
25628 for (size_t k = 1; k < 4; k++) {
25629 GemmMicrokernelTester()
25630 .mr(1)
25631 .nr(8)
25632 .kr(1)
25633 .sr(4)
25634 .m(1)
25635 .n(8)
25636 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025637 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025638 }
25639 }
25640
25641 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
25642 for (size_t k = 1; k < 4; k++) {
25643 GemmMicrokernelTester()
25644 .mr(1)
25645 .nr(8)
25646 .kr(1)
25647 .sr(4)
25648 .m(1)
25649 .n(8)
25650 .k(k)
25651 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025652 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025653 }
25654 }
25655
25656 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
25657 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025658 for (uint32_t n = 1; n <= 8; n++) {
25659 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025660 GemmMicrokernelTester()
25661 .mr(1)
25662 .nr(8)
25663 .kr(1)
25664 .sr(4)
25665 .m(m)
25666 .n(n)
25667 .k(k)
25668 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025669 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025670 }
25671 }
25672 }
25673 }
25674
25675 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4) {
25676 for (size_t k = 5; k < 8; k++) {
25677 GemmMicrokernelTester()
25678 .mr(1)
25679 .nr(8)
25680 .kr(1)
25681 .sr(4)
25682 .m(1)
25683 .n(8)
25684 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025685 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025686 }
25687 }
25688
25689 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
25690 for (size_t k = 5; k < 8; k++) {
25691 GemmMicrokernelTester()
25692 .mr(1)
25693 .nr(8)
25694 .kr(1)
25695 .sr(4)
25696 .m(1)
25697 .n(8)
25698 .k(k)
25699 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025700 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025701 }
25702 }
25703
25704 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
25705 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025706 for (uint32_t n = 1; n <= 8; n++) {
25707 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025708 GemmMicrokernelTester()
25709 .mr(1)
25710 .nr(8)
25711 .kr(1)
25712 .sr(4)
25713 .m(m)
25714 .n(n)
25715 .k(k)
25716 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025717 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025718 }
25719 }
25720 }
25721 }
25722
25723 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4) {
25724 for (size_t k = 8; k <= 40; k += 4) {
25725 GemmMicrokernelTester()
25726 .mr(1)
25727 .nr(8)
25728 .kr(1)
25729 .sr(4)
25730 .m(1)
25731 .n(8)
25732 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025733 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025734 }
25735 }
25736
25737 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
25738 for (size_t k = 8; k <= 40; k += 4) {
25739 GemmMicrokernelTester()
25740 .mr(1)
25741 .nr(8)
25742 .kr(1)
25743 .sr(4)
25744 .m(1)
25745 .n(8)
25746 .k(k)
25747 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025748 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025749 }
25750 }
25751
25752 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4_subtile) {
25753 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025754 for (uint32_t n = 1; n <= 8; n++) {
25755 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025756 GemmMicrokernelTester()
25757 .mr(1)
25758 .nr(8)
25759 .kr(1)
25760 .sr(4)
25761 .m(m)
25762 .n(n)
25763 .k(k)
25764 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025765 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025766 }
25767 }
25768 }
25769 }
25770
25771 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8) {
25772 for (uint32_t n = 9; n < 16; n++) {
25773 for (size_t k = 1; k <= 20; k += 5) {
25774 GemmMicrokernelTester()
25775 .mr(1)
25776 .nr(8)
25777 .kr(1)
25778 .sr(4)
25779 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025780 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025781 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025782 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025783 }
25784 }
25785 }
25786
25787 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
25788 for (uint32_t n = 9; n < 16; n++) {
25789 for (size_t k = 1; k <= 20; k += 5) {
25790 GemmMicrokernelTester()
25791 .mr(1)
25792 .nr(8)
25793 .kr(1)
25794 .sr(4)
25795 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025796 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025797 .k(k)
25798 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025799 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025800 }
25801 }
25802 }
25803
25804 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
25805 for (uint32_t n = 9; n < 16; n++) {
25806 for (size_t k = 1; k <= 20; k += 5) {
25807 GemmMicrokernelTester()
25808 .mr(1)
25809 .nr(8)
25810 .kr(1)
25811 .sr(4)
25812 .m(1)
25813 .n(n)
25814 .k(k)
25815 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025816 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025817 }
25818 }
25819 }
25820
25821 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
25822 for (uint32_t n = 9; n < 16; n++) {
25823 for (size_t k = 1; k <= 20; k += 5) {
25824 for (uint32_t m = 1; m <= 1; m++) {
25825 GemmMicrokernelTester()
25826 .mr(1)
25827 .nr(8)
25828 .kr(1)
25829 .sr(4)
25830 .m(m)
25831 .n(n)
25832 .k(k)
25833 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025834 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025835 }
25836 }
25837 }
25838 }
25839
25840 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8) {
25841 for (uint32_t n = 16; n <= 24; n += 8) {
25842 for (size_t k = 1; k <= 20; k += 5) {
25843 GemmMicrokernelTester()
25844 .mr(1)
25845 .nr(8)
25846 .kr(1)
25847 .sr(4)
25848 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025849 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025850 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025851 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025852 }
25853 }
25854 }
25855
25856 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
25857 for (uint32_t n = 16; n <= 24; n += 8) {
25858 for (size_t k = 1; k <= 20; k += 5) {
25859 GemmMicrokernelTester()
25860 .mr(1)
25861 .nr(8)
25862 .kr(1)
25863 .sr(4)
25864 .m(1)
25865 .n(n)
25866 .k(k)
25867 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025868 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025869 }
25870 }
25871 }
25872
25873 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
25874 for (uint32_t n = 16; n <= 24; n += 8) {
25875 for (size_t k = 1; k <= 20; k += 5) {
25876 GemmMicrokernelTester()
25877 .mr(1)
25878 .nr(8)
25879 .kr(1)
25880 .sr(4)
25881 .m(1)
25882 .n(n)
25883 .k(k)
25884 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025885 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025886 }
25887 }
25888 }
25889
25890 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_subtile) {
25891 for (uint32_t n = 16; n <= 24; n += 8) {
25892 for (size_t k = 1; k <= 20; k += 5) {
25893 for (uint32_t m = 1; m <= 1; m++) {
25894 GemmMicrokernelTester()
25895 .mr(1)
25896 .nr(8)
25897 .kr(1)
25898 .sr(4)
25899 .m(m)
25900 .n(n)
25901 .k(k)
25902 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025903 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025904 }
25905 }
25906 }
25907 }
25908
25909 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm_subtile) {
25910 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025911 for (uint32_t n = 1; n <= 8; n++) {
25912 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025913 GemmMicrokernelTester()
25914 .mr(1)
25915 .nr(8)
25916 .kr(1)
25917 .sr(4)
25918 .m(m)
25919 .n(n)
25920 .k(k)
25921 .cm_stride(11)
25922 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025923 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025924 }
25925 }
25926 }
25927 }
25928
25929 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, qmin) {
25930 GemmMicrokernelTester()
25931 .mr(1)
25932 .nr(8)
25933 .kr(1)
25934 .sr(4)
25935 .m(1)
25936 .n(8)
25937 .k(4)
25938 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025939 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025940 }
25941
25942 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, qmax) {
25943 GemmMicrokernelTester()
25944 .mr(1)
25945 .nr(8)
25946 .kr(1)
25947 .sr(4)
25948 .m(1)
25949 .n(8)
25950 .k(4)
25951 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025952 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025953 }
25954
25955 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm) {
25956 GemmMicrokernelTester()
25957 .mr(1)
25958 .nr(8)
25959 .kr(1)
25960 .sr(4)
25961 .m(1)
25962 .n(8)
25963 .k(4)
25964 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025965 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025966 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025967#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025968
25969
Marat Dukhan4c617792021-12-21 15:47:58 -080025970#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025971 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4) {
25972 GemmMicrokernelTester()
25973 .mr(4)
25974 .nr(8)
25975 .kr(1)
25976 .sr(4)
25977 .m(4)
25978 .n(8)
25979 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025980 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025981 }
25982
25983 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, strided_cn) {
25984 GemmMicrokernelTester()
25985 .mr(4)
25986 .nr(8)
25987 .kr(1)
25988 .sr(4)
25989 .m(4)
25990 .n(8)
25991 .k(4)
25992 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025993 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025994 }
25995
25996 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
25997 GemmMicrokernelTester()
25998 .mr(4)
25999 .nr(8)
26000 .kr(1)
26001 .sr(4)
26002 .m(4)
26003 .n(8)
26004 .k(4)
26005 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026006 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026007 }
26008
26009 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026010 for (uint32_t n = 1; n <= 8; n++) {
26011 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026012 GemmMicrokernelTester()
26013 .mr(4)
26014 .nr(8)
26015 .kr(1)
26016 .sr(4)
26017 .m(m)
26018 .n(n)
26019 .k(4)
26020 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026021 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026022 }
26023 }
26024 }
26025
26026 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
26027 for (uint32_t m = 1; m <= 4; m++) {
26028 GemmMicrokernelTester()
26029 .mr(4)
26030 .nr(8)
26031 .kr(1)
26032 .sr(4)
26033 .m(m)
26034 .n(8)
26035 .k(4)
26036 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026037 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026038 }
26039 }
26040
26041 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
26042 for (uint32_t n = 1; n <= 8; n++) {
26043 GemmMicrokernelTester()
26044 .mr(4)
26045 .nr(8)
26046 .kr(1)
26047 .sr(4)
26048 .m(4)
26049 .n(n)
26050 .k(4)
26051 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026052 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026053 }
26054 }
26055
26056 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4) {
26057 for (size_t k = 1; k < 4; k++) {
26058 GemmMicrokernelTester()
26059 .mr(4)
26060 .nr(8)
26061 .kr(1)
26062 .sr(4)
26063 .m(4)
26064 .n(8)
26065 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026066 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026067 }
26068 }
26069
26070 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
26071 for (size_t k = 1; k < 4; k++) {
26072 GemmMicrokernelTester()
26073 .mr(4)
26074 .nr(8)
26075 .kr(1)
26076 .sr(4)
26077 .m(4)
26078 .n(8)
26079 .k(k)
26080 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026081 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026082 }
26083 }
26084
26085 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
26086 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026087 for (uint32_t n = 1; n <= 8; n++) {
26088 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026089 GemmMicrokernelTester()
26090 .mr(4)
26091 .nr(8)
26092 .kr(1)
26093 .sr(4)
26094 .m(m)
26095 .n(n)
26096 .k(k)
26097 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026098 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026099 }
26100 }
26101 }
26102 }
26103
26104 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4) {
26105 for (size_t k = 5; k < 8; k++) {
26106 GemmMicrokernelTester()
26107 .mr(4)
26108 .nr(8)
26109 .kr(1)
26110 .sr(4)
26111 .m(4)
26112 .n(8)
26113 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026114 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026115 }
26116 }
26117
26118 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
26119 for (size_t k = 5; k < 8; k++) {
26120 GemmMicrokernelTester()
26121 .mr(4)
26122 .nr(8)
26123 .kr(1)
26124 .sr(4)
26125 .m(4)
26126 .n(8)
26127 .k(k)
26128 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026129 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026130 }
26131 }
26132
26133 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
26134 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026135 for (uint32_t n = 1; n <= 8; n++) {
26136 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026137 GemmMicrokernelTester()
26138 .mr(4)
26139 .nr(8)
26140 .kr(1)
26141 .sr(4)
26142 .m(m)
26143 .n(n)
26144 .k(k)
26145 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026146 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026147 }
26148 }
26149 }
26150 }
26151
26152 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4) {
26153 for (size_t k = 8; k <= 40; k += 4) {
26154 GemmMicrokernelTester()
26155 .mr(4)
26156 .nr(8)
26157 .kr(1)
26158 .sr(4)
26159 .m(4)
26160 .n(8)
26161 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026162 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026163 }
26164 }
26165
26166 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
26167 for (size_t k = 8; k <= 40; k += 4) {
26168 GemmMicrokernelTester()
26169 .mr(4)
26170 .nr(8)
26171 .kr(1)
26172 .sr(4)
26173 .m(4)
26174 .n(8)
26175 .k(k)
26176 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026177 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026178 }
26179 }
26180
26181 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4_subtile) {
26182 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026183 for (uint32_t n = 1; n <= 8; n++) {
26184 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026185 GemmMicrokernelTester()
26186 .mr(4)
26187 .nr(8)
26188 .kr(1)
26189 .sr(4)
26190 .m(m)
26191 .n(n)
26192 .k(k)
26193 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026194 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026195 }
26196 }
26197 }
26198 }
26199
26200 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8) {
26201 for (uint32_t n = 9; n < 16; n++) {
26202 for (size_t k = 1; k <= 20; k += 5) {
26203 GemmMicrokernelTester()
26204 .mr(4)
26205 .nr(8)
26206 .kr(1)
26207 .sr(4)
26208 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026209 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026210 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026211 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026212 }
26213 }
26214 }
26215
26216 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
26217 for (uint32_t n = 9; n < 16; n++) {
26218 for (size_t k = 1; k <= 20; k += 5) {
26219 GemmMicrokernelTester()
26220 .mr(4)
26221 .nr(8)
26222 .kr(1)
26223 .sr(4)
26224 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026225 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026226 .k(k)
26227 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026228 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026229 }
26230 }
26231 }
26232
26233 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
26234 for (uint32_t n = 9; n < 16; n++) {
26235 for (size_t k = 1; k <= 20; k += 5) {
26236 GemmMicrokernelTester()
26237 .mr(4)
26238 .nr(8)
26239 .kr(1)
26240 .sr(4)
26241 .m(4)
26242 .n(n)
26243 .k(k)
26244 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026245 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026246 }
26247 }
26248 }
26249
26250 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
26251 for (uint32_t n = 9; n < 16; n++) {
26252 for (size_t k = 1; k <= 20; k += 5) {
26253 for (uint32_t m = 1; m <= 4; m++) {
26254 GemmMicrokernelTester()
26255 .mr(4)
26256 .nr(8)
26257 .kr(1)
26258 .sr(4)
26259 .m(m)
26260 .n(n)
26261 .k(k)
26262 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026263 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026264 }
26265 }
26266 }
26267 }
26268
26269 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8) {
26270 for (uint32_t n = 16; n <= 24; n += 8) {
26271 for (size_t k = 1; k <= 20; k += 5) {
26272 GemmMicrokernelTester()
26273 .mr(4)
26274 .nr(8)
26275 .kr(1)
26276 .sr(4)
26277 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026278 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026279 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026280 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026281 }
26282 }
26283 }
26284
26285 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
26286 for (uint32_t n = 16; n <= 24; n += 8) {
26287 for (size_t k = 1; k <= 20; k += 5) {
26288 GemmMicrokernelTester()
26289 .mr(4)
26290 .nr(8)
26291 .kr(1)
26292 .sr(4)
26293 .m(4)
26294 .n(n)
26295 .k(k)
26296 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026297 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026298 }
26299 }
26300 }
26301
26302 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
26303 for (uint32_t n = 16; n <= 24; n += 8) {
26304 for (size_t k = 1; k <= 20; k += 5) {
26305 GemmMicrokernelTester()
26306 .mr(4)
26307 .nr(8)
26308 .kr(1)
26309 .sr(4)
26310 .m(4)
26311 .n(n)
26312 .k(k)
26313 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026314 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026315 }
26316 }
26317 }
26318
26319 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_subtile) {
26320 for (uint32_t n = 16; n <= 24; n += 8) {
26321 for (size_t k = 1; k <= 20; k += 5) {
26322 for (uint32_t m = 1; m <= 4; m++) {
26323 GemmMicrokernelTester()
26324 .mr(4)
26325 .nr(8)
26326 .kr(1)
26327 .sr(4)
26328 .m(m)
26329 .n(n)
26330 .k(k)
26331 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026332 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026333 }
26334 }
26335 }
26336 }
26337
26338 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm_subtile) {
26339 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026340 for (uint32_t n = 1; n <= 8; n++) {
26341 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026342 GemmMicrokernelTester()
26343 .mr(4)
26344 .nr(8)
26345 .kr(1)
26346 .sr(4)
26347 .m(m)
26348 .n(n)
26349 .k(k)
26350 .cm_stride(11)
26351 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026352 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026353 }
26354 }
26355 }
26356 }
26357
26358 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, qmin) {
26359 GemmMicrokernelTester()
26360 .mr(4)
26361 .nr(8)
26362 .kr(1)
26363 .sr(4)
26364 .m(4)
26365 .n(8)
26366 .k(4)
26367 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026368 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026369 }
26370
26371 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, qmax) {
26372 GemmMicrokernelTester()
26373 .mr(4)
26374 .nr(8)
26375 .kr(1)
26376 .sr(4)
26377 .m(4)
26378 .n(8)
26379 .k(4)
26380 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026381 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026382 }
26383
26384 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm) {
26385 GemmMicrokernelTester()
26386 .mr(4)
26387 .nr(8)
26388 .kr(1)
26389 .sr(4)
26390 .m(4)
26391 .n(8)
26392 .k(4)
26393 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026394 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026395 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026396#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026397
26398
Marat Dukhan4c617792021-12-21 15:47:58 -080026399#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026400 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4) {
26401 GemmMicrokernelTester()
26402 .mr(5)
26403 .nr(8)
26404 .kr(1)
26405 .sr(4)
26406 .m(5)
26407 .n(8)
26408 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026409 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026410 }
26411
26412 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, strided_cn) {
26413 GemmMicrokernelTester()
26414 .mr(5)
26415 .nr(8)
26416 .kr(1)
26417 .sr(4)
26418 .m(5)
26419 .n(8)
26420 .k(4)
26421 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026422 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026423 }
26424
26425 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_strided_a) {
26426 GemmMicrokernelTester()
26427 .mr(5)
26428 .nr(8)
26429 .kr(1)
26430 .sr(4)
26431 .m(5)
26432 .n(8)
26433 .k(4)
26434 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026435 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026436 }
26437
26438 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026439 for (uint32_t n = 1; n <= 8; n++) {
26440 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026441 GemmMicrokernelTester()
26442 .mr(5)
26443 .nr(8)
26444 .kr(1)
26445 .sr(4)
26446 .m(m)
26447 .n(n)
26448 .k(4)
26449 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026450 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026451 }
26452 }
26453 }
26454
26455 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
26456 for (uint32_t m = 1; m <= 5; m++) {
26457 GemmMicrokernelTester()
26458 .mr(5)
26459 .nr(8)
26460 .kr(1)
26461 .sr(4)
26462 .m(m)
26463 .n(8)
26464 .k(4)
26465 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026466 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026467 }
26468 }
26469
26470 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
26471 for (uint32_t n = 1; n <= 8; n++) {
26472 GemmMicrokernelTester()
26473 .mr(5)
26474 .nr(8)
26475 .kr(1)
26476 .sr(4)
26477 .m(5)
26478 .n(n)
26479 .k(4)
26480 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026481 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026482 }
26483 }
26484
26485 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4) {
26486 for (size_t k = 1; k < 4; k++) {
26487 GemmMicrokernelTester()
26488 .mr(5)
26489 .nr(8)
26490 .kr(1)
26491 .sr(4)
26492 .m(5)
26493 .n(8)
26494 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026495 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026496 }
26497 }
26498
26499 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4_strided_a) {
26500 for (size_t k = 1; k < 4; k++) {
26501 GemmMicrokernelTester()
26502 .mr(5)
26503 .nr(8)
26504 .kr(1)
26505 .sr(4)
26506 .m(5)
26507 .n(8)
26508 .k(k)
26509 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026510 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026511 }
26512 }
26513
26514 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
26515 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026516 for (uint32_t n = 1; n <= 8; n++) {
26517 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026518 GemmMicrokernelTester()
26519 .mr(5)
26520 .nr(8)
26521 .kr(1)
26522 .sr(4)
26523 .m(m)
26524 .n(n)
26525 .k(k)
26526 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026527 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026528 }
26529 }
26530 }
26531 }
26532
26533 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4) {
26534 for (size_t k = 5; k < 8; k++) {
26535 GemmMicrokernelTester()
26536 .mr(5)
26537 .nr(8)
26538 .kr(1)
26539 .sr(4)
26540 .m(5)
26541 .n(8)
26542 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026543 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026544 }
26545 }
26546
26547 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4_strided_a) {
26548 for (size_t k = 5; k < 8; k++) {
26549 GemmMicrokernelTester()
26550 .mr(5)
26551 .nr(8)
26552 .kr(1)
26553 .sr(4)
26554 .m(5)
26555 .n(8)
26556 .k(k)
26557 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026558 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026559 }
26560 }
26561
26562 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
26563 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026564 for (uint32_t n = 1; n <= 8; n++) {
26565 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026566 GemmMicrokernelTester()
26567 .mr(5)
26568 .nr(8)
26569 .kr(1)
26570 .sr(4)
26571 .m(m)
26572 .n(n)
26573 .k(k)
26574 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026575 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026576 }
26577 }
26578 }
26579 }
26580
26581 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4) {
26582 for (size_t k = 8; k <= 40; k += 4) {
26583 GemmMicrokernelTester()
26584 .mr(5)
26585 .nr(8)
26586 .kr(1)
26587 .sr(4)
26588 .m(5)
26589 .n(8)
26590 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026591 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026592 }
26593 }
26594
26595 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4_strided_a) {
26596 for (size_t k = 8; k <= 40; k += 4) {
26597 GemmMicrokernelTester()
26598 .mr(5)
26599 .nr(8)
26600 .kr(1)
26601 .sr(4)
26602 .m(5)
26603 .n(8)
26604 .k(k)
26605 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026606 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026607 }
26608 }
26609
26610 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4_subtile) {
26611 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026612 for (uint32_t n = 1; n <= 8; n++) {
26613 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026614 GemmMicrokernelTester()
26615 .mr(5)
26616 .nr(8)
26617 .kr(1)
26618 .sr(4)
26619 .m(m)
26620 .n(n)
26621 .k(k)
26622 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026623 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026624 }
26625 }
26626 }
26627 }
26628
26629 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8) {
26630 for (uint32_t n = 9; n < 16; n++) {
26631 for (size_t k = 1; k <= 20; k += 5) {
26632 GemmMicrokernelTester()
26633 .mr(5)
26634 .nr(8)
26635 .kr(1)
26636 .sr(4)
26637 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026638 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026639 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026640 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026641 }
26642 }
26643 }
26644
26645 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
26646 for (uint32_t n = 9; n < 16; n++) {
26647 for (size_t k = 1; k <= 20; k += 5) {
26648 GemmMicrokernelTester()
26649 .mr(5)
26650 .nr(8)
26651 .kr(1)
26652 .sr(4)
26653 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026654 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026655 .k(k)
26656 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026657 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026658 }
26659 }
26660 }
26661
26662 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_strided_a) {
26663 for (uint32_t n = 9; n < 16; n++) {
26664 for (size_t k = 1; k <= 20; k += 5) {
26665 GemmMicrokernelTester()
26666 .mr(5)
26667 .nr(8)
26668 .kr(1)
26669 .sr(4)
26670 .m(5)
26671 .n(n)
26672 .k(k)
26673 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026674 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026675 }
26676 }
26677 }
26678
26679 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
26680 for (uint32_t n = 9; n < 16; n++) {
26681 for (size_t k = 1; k <= 20; k += 5) {
26682 for (uint32_t m = 1; m <= 5; m++) {
26683 GemmMicrokernelTester()
26684 .mr(5)
26685 .nr(8)
26686 .kr(1)
26687 .sr(4)
26688 .m(m)
26689 .n(n)
26690 .k(k)
26691 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026692 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026693 }
26694 }
26695 }
26696 }
26697
26698 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8) {
26699 for (uint32_t n = 16; n <= 24; n += 8) {
26700 for (size_t k = 1; k <= 20; k += 5) {
26701 GemmMicrokernelTester()
26702 .mr(5)
26703 .nr(8)
26704 .kr(1)
26705 .sr(4)
26706 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026707 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026708 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026709 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026710 }
26711 }
26712 }
26713
26714 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
26715 for (uint32_t n = 16; n <= 24; n += 8) {
26716 for (size_t k = 1; k <= 20; k += 5) {
26717 GemmMicrokernelTester()
26718 .mr(5)
26719 .nr(8)
26720 .kr(1)
26721 .sr(4)
26722 .m(5)
26723 .n(n)
26724 .k(k)
26725 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026726 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026727 }
26728 }
26729 }
26730
26731 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_strided_a) {
26732 for (uint32_t n = 16; n <= 24; n += 8) {
26733 for (size_t k = 1; k <= 20; k += 5) {
26734 GemmMicrokernelTester()
26735 .mr(5)
26736 .nr(8)
26737 .kr(1)
26738 .sr(4)
26739 .m(5)
26740 .n(n)
26741 .k(k)
26742 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026743 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026744 }
26745 }
26746 }
26747
26748 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_subtile) {
26749 for (uint32_t n = 16; n <= 24; n += 8) {
26750 for (size_t k = 1; k <= 20; k += 5) {
26751 for (uint32_t m = 1; m <= 5; m++) {
26752 GemmMicrokernelTester()
26753 .mr(5)
26754 .nr(8)
26755 .kr(1)
26756 .sr(4)
26757 .m(m)
26758 .n(n)
26759 .k(k)
26760 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026761 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026762 }
26763 }
26764 }
26765 }
26766
26767 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm_subtile) {
26768 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026769 for (uint32_t n = 1; n <= 8; n++) {
26770 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026771 GemmMicrokernelTester()
26772 .mr(5)
26773 .nr(8)
26774 .kr(1)
26775 .sr(4)
26776 .m(m)
26777 .n(n)
26778 .k(k)
26779 .cm_stride(11)
26780 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026781 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026782 }
26783 }
26784 }
26785 }
26786
26787 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, qmin) {
26788 GemmMicrokernelTester()
26789 .mr(5)
26790 .nr(8)
26791 .kr(1)
26792 .sr(4)
26793 .m(5)
26794 .n(8)
26795 .k(4)
26796 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026797 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026798 }
26799
26800 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, qmax) {
26801 GemmMicrokernelTester()
26802 .mr(5)
26803 .nr(8)
26804 .kr(1)
26805 .sr(4)
26806 .m(5)
26807 .n(8)
26808 .k(4)
26809 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026810 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026811 }
26812
26813 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm) {
26814 GemmMicrokernelTester()
26815 .mr(5)
26816 .nr(8)
26817 .kr(1)
26818 .sr(4)
26819 .m(5)
26820 .n(8)
26821 .k(4)
26822 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026823 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026824 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026825#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026826
26827
Marat Dukhan4c617792021-12-21 15:47:58 -080026828#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026829 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4) {
26830 GemmMicrokernelTester()
26831 .mr(1)
26832 .nr(8)
26833 .kr(1)
26834 .sr(4)
26835 .m(1)
26836 .n(8)
26837 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026838 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026839 }
26840
26841 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, strided_cn) {
26842 GemmMicrokernelTester()
26843 .mr(1)
26844 .nr(8)
26845 .kr(1)
26846 .sr(4)
26847 .m(1)
26848 .n(8)
26849 .k(4)
26850 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026851 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026852 }
26853
26854 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
26855 GemmMicrokernelTester()
26856 .mr(1)
26857 .nr(8)
26858 .kr(1)
26859 .sr(4)
26860 .m(1)
26861 .n(8)
26862 .k(4)
26863 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026864 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026865 }
26866
26867 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026868 for (uint32_t n = 1; n <= 8; n++) {
26869 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026870 GemmMicrokernelTester()
26871 .mr(1)
26872 .nr(8)
26873 .kr(1)
26874 .sr(4)
26875 .m(m)
26876 .n(n)
26877 .k(4)
26878 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026879 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026880 }
26881 }
26882 }
26883
26884 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
26885 for (uint32_t m = 1; m <= 1; m++) {
26886 GemmMicrokernelTester()
26887 .mr(1)
26888 .nr(8)
26889 .kr(1)
26890 .sr(4)
26891 .m(m)
26892 .n(8)
26893 .k(4)
26894 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026895 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026896 }
26897 }
26898
26899 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
26900 for (uint32_t n = 1; n <= 8; n++) {
26901 GemmMicrokernelTester()
26902 .mr(1)
26903 .nr(8)
26904 .kr(1)
26905 .sr(4)
26906 .m(1)
26907 .n(n)
26908 .k(4)
26909 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026910 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026911 }
26912 }
26913
26914 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4) {
26915 for (size_t k = 1; k < 4; k++) {
26916 GemmMicrokernelTester()
26917 .mr(1)
26918 .nr(8)
26919 .kr(1)
26920 .sr(4)
26921 .m(1)
26922 .n(8)
26923 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026924 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026925 }
26926 }
26927
26928 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
26929 for (size_t k = 1; k < 4; k++) {
26930 GemmMicrokernelTester()
26931 .mr(1)
26932 .nr(8)
26933 .kr(1)
26934 .sr(4)
26935 .m(1)
26936 .n(8)
26937 .k(k)
26938 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026939 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026940 }
26941 }
26942
26943 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4_subtile) {
26944 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026945 for (uint32_t n = 1; n <= 8; n++) {
26946 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026947 GemmMicrokernelTester()
26948 .mr(1)
26949 .nr(8)
26950 .kr(1)
26951 .sr(4)
26952 .m(m)
26953 .n(n)
26954 .k(k)
26955 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026956 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026957 }
26958 }
26959 }
26960 }
26961
26962 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4) {
26963 for (size_t k = 5; k < 8; k++) {
26964 GemmMicrokernelTester()
26965 .mr(1)
26966 .nr(8)
26967 .kr(1)
26968 .sr(4)
26969 .m(1)
26970 .n(8)
26971 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026972 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026973 }
26974 }
26975
26976 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
26977 for (size_t k = 5; k < 8; k++) {
26978 GemmMicrokernelTester()
26979 .mr(1)
26980 .nr(8)
26981 .kr(1)
26982 .sr(4)
26983 .m(1)
26984 .n(8)
26985 .k(k)
26986 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026987 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026988 }
26989 }
26990
26991 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4_subtile) {
26992 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026993 for (uint32_t n = 1; n <= 8; n++) {
26994 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026995 GemmMicrokernelTester()
26996 .mr(1)
26997 .nr(8)
26998 .kr(1)
26999 .sr(4)
27000 .m(m)
27001 .n(n)
27002 .k(k)
27003 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027004 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027005 }
27006 }
27007 }
27008 }
27009
27010 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_div_4) {
27011 for (size_t k = 8; k <= 40; k += 4) {
27012 GemmMicrokernelTester()
27013 .mr(1)
27014 .nr(8)
27015 .kr(1)
27016 .sr(4)
27017 .m(1)
27018 .n(8)
27019 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027020 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027021 }
27022 }
27023
27024 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_div_4_strided_a) {
27025 for (size_t k = 8; k <= 40; k += 4) {
27026 GemmMicrokernelTester()
27027 .mr(1)
27028 .nr(8)
27029 .kr(1)
27030 .sr(4)
27031 .m(1)
27032 .n(8)
27033 .k(k)
27034 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027035 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027036 }
27037 }
27038
27039 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, k_div_4_subtile) {
27040 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027041 for (uint32_t n = 1; n <= 8; n++) {
27042 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027043 GemmMicrokernelTester()
27044 .mr(1)
27045 .nr(8)
27046 .kr(1)
27047 .sr(4)
27048 .m(m)
27049 .n(n)
27050 .k(k)
27051 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027052 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027053 }
27054 }
27055 }
27056 }
27057
27058 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8) {
27059 for (uint32_t n = 9; n < 16; n++) {
27060 for (size_t k = 1; k <= 20; k += 5) {
27061 GemmMicrokernelTester()
27062 .mr(1)
27063 .nr(8)
27064 .kr(1)
27065 .sr(4)
27066 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027067 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027068 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027069 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027070 }
27071 }
27072 }
27073
27074 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
27075 for (uint32_t n = 9; n < 16; n++) {
27076 for (size_t k = 1; k <= 20; k += 5) {
27077 GemmMicrokernelTester()
27078 .mr(1)
27079 .nr(8)
27080 .kr(1)
27081 .sr(4)
27082 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027083 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027084 .k(k)
27085 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027086 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027087 }
27088 }
27089 }
27090
27091 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
27092 for (uint32_t n = 9; n < 16; n++) {
27093 for (size_t k = 1; k <= 20; k += 5) {
27094 GemmMicrokernelTester()
27095 .mr(1)
27096 .nr(8)
27097 .kr(1)
27098 .sr(4)
27099 .m(1)
27100 .n(n)
27101 .k(k)
27102 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027103 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027104 }
27105 }
27106 }
27107
27108 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_subtile) {
27109 for (uint32_t n = 9; n < 16; n++) {
27110 for (size_t k = 1; k <= 20; k += 5) {
27111 for (uint32_t m = 1; m <= 1; m++) {
27112 GemmMicrokernelTester()
27113 .mr(1)
27114 .nr(8)
27115 .kr(1)
27116 .sr(4)
27117 .m(m)
27118 .n(n)
27119 .k(k)
27120 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027121 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027122 }
27123 }
27124 }
27125 }
27126
27127 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8) {
27128 for (uint32_t n = 16; n <= 24; n += 8) {
27129 for (size_t k = 1; k <= 20; k += 5) {
27130 GemmMicrokernelTester()
27131 .mr(1)
27132 .nr(8)
27133 .kr(1)
27134 .sr(4)
27135 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027136 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027137 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027138 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027139 }
27140 }
27141 }
27142
27143 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
27144 for (uint32_t n = 16; n <= 24; n += 8) {
27145 for (size_t k = 1; k <= 20; k += 5) {
27146 GemmMicrokernelTester()
27147 .mr(1)
27148 .nr(8)
27149 .kr(1)
27150 .sr(4)
27151 .m(1)
27152 .n(n)
27153 .k(k)
27154 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027155 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027156 }
27157 }
27158 }
27159
27160 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_strided_a) {
27161 for (uint32_t n = 16; n <= 24; n += 8) {
27162 for (size_t k = 1; k <= 20; k += 5) {
27163 GemmMicrokernelTester()
27164 .mr(1)
27165 .nr(8)
27166 .kr(1)
27167 .sr(4)
27168 .m(1)
27169 .n(n)
27170 .k(k)
27171 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027172 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027173 }
27174 }
27175 }
27176
27177 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_subtile) {
27178 for (uint32_t n = 16; n <= 24; n += 8) {
27179 for (size_t k = 1; k <= 20; k += 5) {
27180 for (uint32_t m = 1; m <= 1; m++) {
27181 GemmMicrokernelTester()
27182 .mr(1)
27183 .nr(8)
27184 .kr(1)
27185 .sr(4)
27186 .m(m)
27187 .n(n)
27188 .k(k)
27189 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027190 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027191 }
27192 }
27193 }
27194 }
27195
27196 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, strided_cm_subtile) {
27197 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027198 for (uint32_t n = 1; n <= 8; n++) {
27199 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027200 GemmMicrokernelTester()
27201 .mr(1)
27202 .nr(8)
27203 .kr(1)
27204 .sr(4)
27205 .m(m)
27206 .n(n)
27207 .k(k)
27208 .cm_stride(11)
27209 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027210 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027211 }
27212 }
27213 }
27214 }
27215
27216 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, qmin) {
27217 GemmMicrokernelTester()
27218 .mr(1)
27219 .nr(8)
27220 .kr(1)
27221 .sr(4)
27222 .m(1)
27223 .n(8)
27224 .k(4)
27225 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027226 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027227 }
27228
27229 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, qmax) {
27230 GemmMicrokernelTester()
27231 .mr(1)
27232 .nr(8)
27233 .kr(1)
27234 .sr(4)
27235 .m(1)
27236 .n(8)
27237 .k(4)
27238 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027239 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027240 }
27241
27242 TEST(F32_GEMMINC_MINMAX_1X8S4__WASMSIMD_X86, strided_cm) {
27243 GemmMicrokernelTester()
27244 .mr(1)
27245 .nr(8)
27246 .kr(1)
27247 .sr(4)
27248 .m(1)
27249 .n(8)
27250 .k(4)
27251 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027252 .Test(xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027253 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027254#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027255
27256
Marat Dukhan4c617792021-12-21 15:47:58 -080027257#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027258 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4) {
27259 GemmMicrokernelTester()
27260 .mr(4)
27261 .nr(8)
27262 .kr(1)
27263 .sr(4)
27264 .m(4)
27265 .n(8)
27266 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027267 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027268 }
27269
27270 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, strided_cn) {
27271 GemmMicrokernelTester()
27272 .mr(4)
27273 .nr(8)
27274 .kr(1)
27275 .sr(4)
27276 .m(4)
27277 .n(8)
27278 .k(4)
27279 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027280 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027281 }
27282
27283 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
27284 GemmMicrokernelTester()
27285 .mr(4)
27286 .nr(8)
27287 .kr(1)
27288 .sr(4)
27289 .m(4)
27290 .n(8)
27291 .k(4)
27292 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027293 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027294 }
27295
27296 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027297 for (uint32_t n = 1; n <= 8; n++) {
27298 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027299 GemmMicrokernelTester()
27300 .mr(4)
27301 .nr(8)
27302 .kr(1)
27303 .sr(4)
27304 .m(m)
27305 .n(n)
27306 .k(4)
27307 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027308 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027309 }
27310 }
27311 }
27312
27313 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
27314 for (uint32_t m = 1; m <= 4; m++) {
27315 GemmMicrokernelTester()
27316 .mr(4)
27317 .nr(8)
27318 .kr(1)
27319 .sr(4)
27320 .m(m)
27321 .n(8)
27322 .k(4)
27323 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027324 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027325 }
27326 }
27327
27328 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
27329 for (uint32_t n = 1; n <= 8; n++) {
27330 GemmMicrokernelTester()
27331 .mr(4)
27332 .nr(8)
27333 .kr(1)
27334 .sr(4)
27335 .m(4)
27336 .n(n)
27337 .k(4)
27338 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027339 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027340 }
27341 }
27342
27343 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4) {
27344 for (size_t k = 1; k < 4; k++) {
27345 GemmMicrokernelTester()
27346 .mr(4)
27347 .nr(8)
27348 .kr(1)
27349 .sr(4)
27350 .m(4)
27351 .n(8)
27352 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027353 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027354 }
27355 }
27356
27357 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
27358 for (size_t k = 1; k < 4; k++) {
27359 GemmMicrokernelTester()
27360 .mr(4)
27361 .nr(8)
27362 .kr(1)
27363 .sr(4)
27364 .m(4)
27365 .n(8)
27366 .k(k)
27367 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027368 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027369 }
27370 }
27371
27372 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4_subtile) {
27373 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027374 for (uint32_t n = 1; n <= 8; n++) {
27375 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027376 GemmMicrokernelTester()
27377 .mr(4)
27378 .nr(8)
27379 .kr(1)
27380 .sr(4)
27381 .m(m)
27382 .n(n)
27383 .k(k)
27384 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027385 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027386 }
27387 }
27388 }
27389 }
27390
27391 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4) {
27392 for (size_t k = 5; k < 8; k++) {
27393 GemmMicrokernelTester()
27394 .mr(4)
27395 .nr(8)
27396 .kr(1)
27397 .sr(4)
27398 .m(4)
27399 .n(8)
27400 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027401 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027402 }
27403 }
27404
27405 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
27406 for (size_t k = 5; k < 8; k++) {
27407 GemmMicrokernelTester()
27408 .mr(4)
27409 .nr(8)
27410 .kr(1)
27411 .sr(4)
27412 .m(4)
27413 .n(8)
27414 .k(k)
27415 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027416 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027417 }
27418 }
27419
27420 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4_subtile) {
27421 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027422 for (uint32_t n = 1; n <= 8; n++) {
27423 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027424 GemmMicrokernelTester()
27425 .mr(4)
27426 .nr(8)
27427 .kr(1)
27428 .sr(4)
27429 .m(m)
27430 .n(n)
27431 .k(k)
27432 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027433 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027434 }
27435 }
27436 }
27437 }
27438
27439 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_div_4) {
27440 for (size_t k = 8; k <= 40; k += 4) {
27441 GemmMicrokernelTester()
27442 .mr(4)
27443 .nr(8)
27444 .kr(1)
27445 .sr(4)
27446 .m(4)
27447 .n(8)
27448 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027449 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027450 }
27451 }
27452
27453 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_div_4_strided_a) {
27454 for (size_t k = 8; k <= 40; k += 4) {
27455 GemmMicrokernelTester()
27456 .mr(4)
27457 .nr(8)
27458 .kr(1)
27459 .sr(4)
27460 .m(4)
27461 .n(8)
27462 .k(k)
27463 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027464 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027465 }
27466 }
27467
27468 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, k_div_4_subtile) {
27469 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027470 for (uint32_t n = 1; n <= 8; n++) {
27471 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027472 GemmMicrokernelTester()
27473 .mr(4)
27474 .nr(8)
27475 .kr(1)
27476 .sr(4)
27477 .m(m)
27478 .n(n)
27479 .k(k)
27480 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027481 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027482 }
27483 }
27484 }
27485 }
27486
27487 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8) {
27488 for (uint32_t n = 9; n < 16; n++) {
27489 for (size_t k = 1; k <= 20; k += 5) {
27490 GemmMicrokernelTester()
27491 .mr(4)
27492 .nr(8)
27493 .kr(1)
27494 .sr(4)
27495 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027496 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027497 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027498 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027499 }
27500 }
27501 }
27502
27503 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
27504 for (uint32_t n = 9; n < 16; n++) {
27505 for (size_t k = 1; k <= 20; k += 5) {
27506 GemmMicrokernelTester()
27507 .mr(4)
27508 .nr(8)
27509 .kr(1)
27510 .sr(4)
27511 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027512 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027513 .k(k)
27514 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027515 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027516 }
27517 }
27518 }
27519
27520 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
27521 for (uint32_t n = 9; n < 16; n++) {
27522 for (size_t k = 1; k <= 20; k += 5) {
27523 GemmMicrokernelTester()
27524 .mr(4)
27525 .nr(8)
27526 .kr(1)
27527 .sr(4)
27528 .m(4)
27529 .n(n)
27530 .k(k)
27531 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027532 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027533 }
27534 }
27535 }
27536
27537 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_subtile) {
27538 for (uint32_t n = 9; n < 16; n++) {
27539 for (size_t k = 1; k <= 20; k += 5) {
27540 for (uint32_t m = 1; m <= 4; m++) {
27541 GemmMicrokernelTester()
27542 .mr(4)
27543 .nr(8)
27544 .kr(1)
27545 .sr(4)
27546 .m(m)
27547 .n(n)
27548 .k(k)
27549 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027550 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027551 }
27552 }
27553 }
27554 }
27555
27556 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8) {
27557 for (uint32_t n = 16; n <= 24; n += 8) {
27558 for (size_t k = 1; k <= 20; k += 5) {
27559 GemmMicrokernelTester()
27560 .mr(4)
27561 .nr(8)
27562 .kr(1)
27563 .sr(4)
27564 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027565 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027566 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027567 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027568 }
27569 }
27570 }
27571
27572 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
27573 for (uint32_t n = 16; n <= 24; n += 8) {
27574 for (size_t k = 1; k <= 20; k += 5) {
27575 GemmMicrokernelTester()
27576 .mr(4)
27577 .nr(8)
27578 .kr(1)
27579 .sr(4)
27580 .m(4)
27581 .n(n)
27582 .k(k)
27583 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027584 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027585 }
27586 }
27587 }
27588
27589 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_strided_a) {
27590 for (uint32_t n = 16; n <= 24; n += 8) {
27591 for (size_t k = 1; k <= 20; k += 5) {
27592 GemmMicrokernelTester()
27593 .mr(4)
27594 .nr(8)
27595 .kr(1)
27596 .sr(4)
27597 .m(4)
27598 .n(n)
27599 .k(k)
27600 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027601 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027602 }
27603 }
27604 }
27605
27606 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_subtile) {
27607 for (uint32_t n = 16; n <= 24; n += 8) {
27608 for (size_t k = 1; k <= 20; k += 5) {
27609 for (uint32_t m = 1; m <= 4; m++) {
27610 GemmMicrokernelTester()
27611 .mr(4)
27612 .nr(8)
27613 .kr(1)
27614 .sr(4)
27615 .m(m)
27616 .n(n)
27617 .k(k)
27618 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027619 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027620 }
27621 }
27622 }
27623 }
27624
27625 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, strided_cm_subtile) {
27626 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027627 for (uint32_t n = 1; n <= 8; n++) {
27628 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027629 GemmMicrokernelTester()
27630 .mr(4)
27631 .nr(8)
27632 .kr(1)
27633 .sr(4)
27634 .m(m)
27635 .n(n)
27636 .k(k)
27637 .cm_stride(11)
27638 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027639 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027640 }
27641 }
27642 }
27643 }
27644
27645 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, qmin) {
27646 GemmMicrokernelTester()
27647 .mr(4)
27648 .nr(8)
27649 .kr(1)
27650 .sr(4)
27651 .m(4)
27652 .n(8)
27653 .k(4)
27654 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027655 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027656 }
27657
27658 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, qmax) {
27659 GemmMicrokernelTester()
27660 .mr(4)
27661 .nr(8)
27662 .kr(1)
27663 .sr(4)
27664 .m(4)
27665 .n(8)
27666 .k(4)
27667 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027668 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027669 }
27670
27671 TEST(F32_GEMMINC_MINMAX_4X8S4__WASMSIMD_X86, strided_cm) {
27672 GemmMicrokernelTester()
27673 .mr(4)
27674 .nr(8)
27675 .kr(1)
27676 .sr(4)
27677 .m(4)
27678 .n(8)
27679 .k(4)
27680 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027681 .Test(xnn_f32_gemminc_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027682 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027683#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027684
27685
Marat Dukhan4c617792021-12-21 15:47:58 -080027686#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027687 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4) {
27688 GemmMicrokernelTester()
27689 .mr(5)
27690 .nr(8)
27691 .kr(1)
27692 .sr(4)
27693 .m(5)
27694 .n(8)
27695 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027696 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027697 }
27698
27699 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, strided_cn) {
27700 GemmMicrokernelTester()
27701 .mr(5)
27702 .nr(8)
27703 .kr(1)
27704 .sr(4)
27705 .m(5)
27706 .n(8)
27707 .k(4)
27708 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027709 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027710 }
27711
27712 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_strided_a) {
27713 GemmMicrokernelTester()
27714 .mr(5)
27715 .nr(8)
27716 .kr(1)
27717 .sr(4)
27718 .m(5)
27719 .n(8)
27720 .k(4)
27721 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027722 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027723 }
27724
27725 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027726 for (uint32_t n = 1; n <= 8; n++) {
27727 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027728 GemmMicrokernelTester()
27729 .mr(5)
27730 .nr(8)
27731 .kr(1)
27732 .sr(4)
27733 .m(m)
27734 .n(n)
27735 .k(4)
27736 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027737 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027738 }
27739 }
27740 }
27741
27742 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
27743 for (uint32_t m = 1; m <= 5; m++) {
27744 GemmMicrokernelTester()
27745 .mr(5)
27746 .nr(8)
27747 .kr(1)
27748 .sr(4)
27749 .m(m)
27750 .n(8)
27751 .k(4)
27752 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027753 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027754 }
27755 }
27756
27757 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
27758 for (uint32_t n = 1; n <= 8; n++) {
27759 GemmMicrokernelTester()
27760 .mr(5)
27761 .nr(8)
27762 .kr(1)
27763 .sr(4)
27764 .m(5)
27765 .n(n)
27766 .k(4)
27767 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027768 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027769 }
27770 }
27771
27772 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4) {
27773 for (size_t k = 1; k < 4; k++) {
27774 GemmMicrokernelTester()
27775 .mr(5)
27776 .nr(8)
27777 .kr(1)
27778 .sr(4)
27779 .m(5)
27780 .n(8)
27781 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027782 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027783 }
27784 }
27785
27786 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4_strided_a) {
27787 for (size_t k = 1; k < 4; k++) {
27788 GemmMicrokernelTester()
27789 .mr(5)
27790 .nr(8)
27791 .kr(1)
27792 .sr(4)
27793 .m(5)
27794 .n(8)
27795 .k(k)
27796 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027797 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027798 }
27799 }
27800
27801 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4_subtile) {
27802 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027803 for (uint32_t n = 1; n <= 8; n++) {
27804 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027805 GemmMicrokernelTester()
27806 .mr(5)
27807 .nr(8)
27808 .kr(1)
27809 .sr(4)
27810 .m(m)
27811 .n(n)
27812 .k(k)
27813 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027814 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027815 }
27816 }
27817 }
27818 }
27819
27820 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4) {
27821 for (size_t k = 5; k < 8; k++) {
27822 GemmMicrokernelTester()
27823 .mr(5)
27824 .nr(8)
27825 .kr(1)
27826 .sr(4)
27827 .m(5)
27828 .n(8)
27829 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027830 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027831 }
27832 }
27833
27834 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4_strided_a) {
27835 for (size_t k = 5; k < 8; k++) {
27836 GemmMicrokernelTester()
27837 .mr(5)
27838 .nr(8)
27839 .kr(1)
27840 .sr(4)
27841 .m(5)
27842 .n(8)
27843 .k(k)
27844 .a_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027845 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027846 }
27847 }
27848
27849 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4_subtile) {
27850 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027851 for (uint32_t n = 1; n <= 8; n++) {
27852 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027853 GemmMicrokernelTester()
27854 .mr(5)
27855 .nr(8)
27856 .kr(1)
27857 .sr(4)
27858 .m(m)
27859 .n(n)
27860 .k(k)
27861 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027862 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027863 }
27864 }
27865 }
27866 }
27867
27868 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_div_4) {
27869 for (size_t k = 8; k <= 40; k += 4) {
27870 GemmMicrokernelTester()
27871 .mr(5)
27872 .nr(8)
27873 .kr(1)
27874 .sr(4)
27875 .m(5)
27876 .n(8)
27877 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027878 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027879 }
27880 }
27881
27882 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_div_4_strided_a) {
27883 for (size_t k = 8; k <= 40; k += 4) {
27884 GemmMicrokernelTester()
27885 .mr(5)
27886 .nr(8)
27887 .kr(1)
27888 .sr(4)
27889 .m(5)
27890 .n(8)
27891 .k(k)
27892 .a_stride(43)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027893 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027894 }
27895 }
27896
27897 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, k_div_4_subtile) {
27898 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027899 for (uint32_t n = 1; n <= 8; n++) {
27900 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027901 GemmMicrokernelTester()
27902 .mr(5)
27903 .nr(8)
27904 .kr(1)
27905 .sr(4)
27906 .m(m)
27907 .n(n)
27908 .k(k)
27909 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027910 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027911 }
27912 }
27913 }
27914 }
27915
27916 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8) {
27917 for (uint32_t n = 9; n < 16; n++) {
27918 for (size_t k = 1; k <= 20; k += 5) {
27919 GemmMicrokernelTester()
27920 .mr(5)
27921 .nr(8)
27922 .kr(1)
27923 .sr(4)
27924 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027925 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027926 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027927 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027928 }
27929 }
27930 }
27931
27932 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
27933 for (uint32_t n = 9; n < 16; n++) {
27934 for (size_t k = 1; k <= 20; k += 5) {
27935 GemmMicrokernelTester()
27936 .mr(5)
27937 .nr(8)
27938 .kr(1)
27939 .sr(4)
27940 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027941 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027942 .k(k)
27943 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027944 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027945 }
27946 }
27947 }
27948
27949 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_strided_a) {
27950 for (uint32_t n = 9; n < 16; n++) {
27951 for (size_t k = 1; k <= 20; k += 5) {
27952 GemmMicrokernelTester()
27953 .mr(5)
27954 .nr(8)
27955 .kr(1)
27956 .sr(4)
27957 .m(5)
27958 .n(n)
27959 .k(k)
27960 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027961 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027962 }
27963 }
27964 }
27965
27966 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_subtile) {
27967 for (uint32_t n = 9; n < 16; n++) {
27968 for (size_t k = 1; k <= 20; k += 5) {
27969 for (uint32_t m = 1; m <= 5; m++) {
27970 GemmMicrokernelTester()
27971 .mr(5)
27972 .nr(8)
27973 .kr(1)
27974 .sr(4)
27975 .m(m)
27976 .n(n)
27977 .k(k)
27978 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027979 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027980 }
27981 }
27982 }
27983 }
27984
27985 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8) {
27986 for (uint32_t n = 16; n <= 24; n += 8) {
27987 for (size_t k = 1; k <= 20; k += 5) {
27988 GemmMicrokernelTester()
27989 .mr(5)
27990 .nr(8)
27991 .kr(1)
27992 .sr(4)
27993 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027994 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027995 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027996 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027997 }
27998 }
27999 }
28000
28001 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
28002 for (uint32_t n = 16; n <= 24; n += 8) {
28003 for (size_t k = 1; k <= 20; k += 5) {
28004 GemmMicrokernelTester()
28005 .mr(5)
28006 .nr(8)
28007 .kr(1)
28008 .sr(4)
28009 .m(5)
28010 .n(n)
28011 .k(k)
28012 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028013 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028014 }
28015 }
28016 }
28017
28018 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_strided_a) {
28019 for (uint32_t n = 16; n <= 24; n += 8) {
28020 for (size_t k = 1; k <= 20; k += 5) {
28021 GemmMicrokernelTester()
28022 .mr(5)
28023 .nr(8)
28024 .kr(1)
28025 .sr(4)
28026 .m(5)
28027 .n(n)
28028 .k(k)
28029 .a_stride(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028030 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028031 }
28032 }
28033 }
28034
28035 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_subtile) {
28036 for (uint32_t n = 16; n <= 24; n += 8) {
28037 for (size_t k = 1; k <= 20; k += 5) {
28038 for (uint32_t m = 1; m <= 5; m++) {
28039 GemmMicrokernelTester()
28040 .mr(5)
28041 .nr(8)
28042 .kr(1)
28043 .sr(4)
28044 .m(m)
28045 .n(n)
28046 .k(k)
28047 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028048 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028049 }
28050 }
28051 }
28052 }
28053
28054 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, strided_cm_subtile) {
28055 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028056 for (uint32_t n = 1; n <= 8; n++) {
28057 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028058 GemmMicrokernelTester()
28059 .mr(5)
28060 .nr(8)
28061 .kr(1)
28062 .sr(4)
28063 .m(m)
28064 .n(n)
28065 .k(k)
28066 .cm_stride(11)
28067 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028068 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028069 }
28070 }
28071 }
28072 }
28073
28074 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, qmin) {
28075 GemmMicrokernelTester()
28076 .mr(5)
28077 .nr(8)
28078 .kr(1)
28079 .sr(4)
28080 .m(5)
28081 .n(8)
28082 .k(4)
28083 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028084 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028085 }
28086
28087 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, qmax) {
28088 GemmMicrokernelTester()
28089 .mr(5)
28090 .nr(8)
28091 .kr(1)
28092 .sr(4)
28093 .m(5)
28094 .n(8)
28095 .k(4)
28096 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028097 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028098 }
28099
28100 TEST(F32_GEMMINC_MINMAX_5X8S4__WASMSIMD_X86, strided_cm) {
28101 GemmMicrokernelTester()
28102 .mr(5)
28103 .nr(8)
28104 .kr(1)
28105 .sr(4)
28106 .m(5)
28107 .n(8)
28108 .k(4)
28109 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028110 .Test(xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028111 }
Marat Dukhan4c617792021-12-21 15:47:58 -080028112#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028113
28114
Marat Dukhan4c617792021-12-21 15:47:58 -080028115#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhande06f492020-04-09 00:19:31 -070028116 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028117 GemmMicrokernelTester()
28118 .mr(1)
28119 .nr(4)
28120 .kr(1)
28121 .sr(1)
28122 .m(1)
28123 .n(4)
28124 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028125 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028126 }
28127
Marat Dukhande06f492020-04-09 00:19:31 -070028128 TEST(F32_GEMMINC_MINMAX_1X4__WASM, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028129 GemmMicrokernelTester()
28130 .mr(1)
28131 .nr(4)
28132 .kr(1)
28133 .sr(1)
28134 .m(1)
28135 .n(4)
28136 .k(1)
28137 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028138 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028139 }
28140
Marat Dukhande06f492020-04-09 00:19:31 -070028141 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028142 GemmMicrokernelTester()
28143 .mr(1)
28144 .nr(4)
28145 .kr(1)
28146 .sr(1)
28147 .m(1)
28148 .n(4)
28149 .k(1)
28150 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028151 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028152 }
28153
Marat Dukhande06f492020-04-09 00:19:31 -070028154 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028155 for (uint32_t n = 1; n <= 4; n++) {
28156 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028157 GemmMicrokernelTester()
28158 .mr(1)
28159 .nr(4)
28160 .kr(1)
28161 .sr(1)
28162 .m(m)
28163 .n(n)
28164 .k(1)
28165 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028166 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028167 }
28168 }
28169 }
28170
Marat Dukhande06f492020-04-09 00:19:31 -070028171 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028172 for (uint32_t m = 1; m <= 1; m++) {
28173 GemmMicrokernelTester()
28174 .mr(1)
28175 .nr(4)
28176 .kr(1)
28177 .sr(1)
28178 .m(m)
28179 .n(4)
28180 .k(1)
28181 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028182 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028183 }
28184 }
28185
Marat Dukhande06f492020-04-09 00:19:31 -070028186 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028187 for (uint32_t n = 1; n <= 4; n++) {
28188 GemmMicrokernelTester()
28189 .mr(1)
28190 .nr(4)
28191 .kr(1)
28192 .sr(1)
28193 .m(1)
28194 .n(n)
28195 .k(1)
28196 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028197 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028198 }
28199 }
28200
Marat Dukhande06f492020-04-09 00:19:31 -070028201 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028202 for (size_t k = 2; k < 10; k++) {
28203 GemmMicrokernelTester()
28204 .mr(1)
28205 .nr(4)
28206 .kr(1)
28207 .sr(1)
28208 .m(1)
28209 .n(4)
28210 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028211 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028212 }
28213 }
28214
Marat Dukhande06f492020-04-09 00:19:31 -070028215 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028216 for (size_t k = 2; k < 10; k++) {
28217 GemmMicrokernelTester()
28218 .mr(1)
28219 .nr(4)
28220 .kr(1)
28221 .sr(1)
28222 .m(1)
28223 .n(4)
28224 .k(k)
28225 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028226 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028227 }
28228 }
28229
Marat Dukhande06f492020-04-09 00:19:31 -070028230 TEST(F32_GEMMINC_MINMAX_1X4__WASM, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028231 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028232 for (uint32_t n = 1; n <= 4; n++) {
28233 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028234 GemmMicrokernelTester()
28235 .mr(1)
28236 .nr(4)
28237 .kr(1)
28238 .sr(1)
28239 .m(m)
28240 .n(n)
28241 .k(k)
28242 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028243 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028244 }
28245 }
28246 }
28247 }
28248
Marat Dukhande06f492020-04-09 00:19:31 -070028249 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028250 for (uint32_t n = 5; n < 8; n++) {
28251 for (size_t k = 1; k <= 5; k += 2) {
28252 GemmMicrokernelTester()
28253 .mr(1)
28254 .nr(4)
28255 .kr(1)
28256 .sr(1)
28257 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028258 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028259 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028260 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028261 }
28262 }
28263 }
28264
Marat Dukhande06f492020-04-09 00:19:31 -070028265 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028266 for (uint32_t n = 5; n < 8; n++) {
28267 for (size_t k = 1; k <= 5; k += 2) {
28268 GemmMicrokernelTester()
28269 .mr(1)
28270 .nr(4)
28271 .kr(1)
28272 .sr(1)
28273 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028274 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028275 .k(k)
28276 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028277 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028278 }
28279 }
28280 }
28281
Marat Dukhande06f492020-04-09 00:19:31 -070028282 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028283 for (uint32_t n = 5; n < 8; n++) {
28284 for (size_t k = 1; k <= 5; k += 2) {
28285 GemmMicrokernelTester()
28286 .mr(1)
28287 .nr(4)
28288 .kr(1)
28289 .sr(1)
28290 .m(1)
28291 .n(n)
28292 .k(k)
28293 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028294 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028295 }
28296 }
28297 }
28298
Marat Dukhande06f492020-04-09 00:19:31 -070028299 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028300 for (uint32_t n = 5; n < 8; n++) {
28301 for (size_t k = 1; k <= 5; k += 2) {
28302 for (uint32_t m = 1; m <= 1; m++) {
28303 GemmMicrokernelTester()
28304 .mr(1)
28305 .nr(4)
28306 .kr(1)
28307 .sr(1)
28308 .m(m)
28309 .n(n)
28310 .k(k)
28311 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028312 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028313 }
28314 }
28315 }
28316 }
28317
Marat Dukhande06f492020-04-09 00:19:31 -070028318 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028319 for (uint32_t n = 8; n <= 12; n += 4) {
28320 for (size_t k = 1; k <= 5; k += 2) {
28321 GemmMicrokernelTester()
28322 .mr(1)
28323 .nr(4)
28324 .kr(1)
28325 .sr(1)
28326 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028327 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028328 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028329 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028330 }
28331 }
28332 }
28333
Marat Dukhande06f492020-04-09 00:19:31 -070028334 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028335 for (uint32_t n = 8; n <= 12; n += 4) {
28336 for (size_t k = 1; k <= 5; k += 2) {
28337 GemmMicrokernelTester()
28338 .mr(1)
28339 .nr(4)
28340 .kr(1)
28341 .sr(1)
28342 .m(1)
28343 .n(n)
28344 .k(k)
28345 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028346 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028347 }
28348 }
28349 }
28350
Marat Dukhande06f492020-04-09 00:19:31 -070028351 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028352 for (uint32_t n = 8; n <= 12; n += 4) {
28353 for (size_t k = 1; k <= 5; k += 2) {
28354 GemmMicrokernelTester()
28355 .mr(1)
28356 .nr(4)
28357 .kr(1)
28358 .sr(1)
28359 .m(1)
28360 .n(n)
28361 .k(k)
28362 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028363 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028364 }
28365 }
28366 }
28367
Marat Dukhande06f492020-04-09 00:19:31 -070028368 TEST(F32_GEMMINC_MINMAX_1X4__WASM, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028369 for (uint32_t n = 8; n <= 12; n += 4) {
28370 for (size_t k = 1; k <= 5; k += 2) {
28371 for (uint32_t m = 1; m <= 1; m++) {
28372 GemmMicrokernelTester()
28373 .mr(1)
28374 .nr(4)
28375 .kr(1)
28376 .sr(1)
28377 .m(m)
28378 .n(n)
28379 .k(k)
28380 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028381 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028382 }
28383 }
28384 }
28385 }
28386
Marat Dukhande06f492020-04-09 00:19:31 -070028387 TEST(F32_GEMMINC_MINMAX_1X4__WASM, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028388 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028389 for (uint32_t n = 1; n <= 4; n++) {
28390 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028391 GemmMicrokernelTester()
28392 .mr(1)
28393 .nr(4)
28394 .kr(1)
28395 .sr(1)
28396 .m(m)
28397 .n(n)
28398 .k(k)
28399 .cm_stride(7)
28400 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028401 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028402 }
28403 }
28404 }
28405 }
28406
Marat Dukhande06f492020-04-09 00:19:31 -070028407 TEST(F32_GEMMINC_MINMAX_1X4__WASM, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028408 GemmMicrokernelTester()
28409 .mr(1)
28410 .nr(4)
28411 .kr(1)
28412 .sr(1)
28413 .m(1)
28414 .n(4)
28415 .k(1)
28416 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028417 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028418 }
28419
Marat Dukhande06f492020-04-09 00:19:31 -070028420 TEST(F32_GEMMINC_MINMAX_1X4__WASM, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028421 GemmMicrokernelTester()
28422 .mr(1)
28423 .nr(4)
28424 .kr(1)
28425 .sr(1)
28426 .m(1)
28427 .n(4)
28428 .k(1)
28429 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028430 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028431 }
28432
Marat Dukhande06f492020-04-09 00:19:31 -070028433 TEST(F32_GEMMINC_MINMAX_1X4__WASM, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028434 GemmMicrokernelTester()
28435 .mr(1)
28436 .nr(4)
28437 .kr(1)
28438 .sr(1)
28439 .m(1)
28440 .n(4)
28441 .k(1)
28442 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028443 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028444 }
Marat Dukhan4c617792021-12-21 15:47:58 -080028445#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1c587112020-04-08 20:04:28 -070028446
28447
Marat Dukhande06f492020-04-09 00:19:31 -070028448TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028449 GemmMicrokernelTester()
28450 .mr(1)
28451 .nr(4)
28452 .kr(1)
28453 .sr(1)
28454 .m(1)
28455 .n(4)
28456 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028457 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028458}
28459
Marat Dukhande06f492020-04-09 00:19:31 -070028460TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028461 GemmMicrokernelTester()
28462 .mr(1)
28463 .nr(4)
28464 .kr(1)
28465 .sr(1)
28466 .m(1)
28467 .n(4)
28468 .k(1)
28469 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028470 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028471}
28472
Marat Dukhande06f492020-04-09 00:19:31 -070028473TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028474 GemmMicrokernelTester()
28475 .mr(1)
28476 .nr(4)
28477 .kr(1)
28478 .sr(1)
28479 .m(1)
28480 .n(4)
28481 .k(1)
28482 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028483 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028484}
28485
Marat Dukhande06f492020-04-09 00:19:31 -070028486TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028487 for (uint32_t n = 1; n <= 4; n++) {
28488 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028489 GemmMicrokernelTester()
28490 .mr(1)
28491 .nr(4)
28492 .kr(1)
28493 .sr(1)
28494 .m(m)
28495 .n(n)
28496 .k(1)
28497 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028498 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028499 }
28500 }
28501}
28502
Marat Dukhande06f492020-04-09 00:19:31 -070028503TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028504 for (uint32_t m = 1; m <= 1; m++) {
28505 GemmMicrokernelTester()
28506 .mr(1)
28507 .nr(4)
28508 .kr(1)
28509 .sr(1)
28510 .m(m)
28511 .n(4)
28512 .k(1)
28513 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028514 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028515 }
28516}
28517
Marat Dukhande06f492020-04-09 00:19:31 -070028518TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028519 for (uint32_t n = 1; n <= 4; n++) {
28520 GemmMicrokernelTester()
28521 .mr(1)
28522 .nr(4)
28523 .kr(1)
28524 .sr(1)
28525 .m(1)
28526 .n(n)
28527 .k(1)
28528 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028529 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028530 }
28531}
28532
Marat Dukhande06f492020-04-09 00:19:31 -070028533TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028534 for (size_t k = 2; k < 10; k++) {
28535 GemmMicrokernelTester()
28536 .mr(1)
28537 .nr(4)
28538 .kr(1)
28539 .sr(1)
28540 .m(1)
28541 .n(4)
28542 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028543 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028544 }
28545}
28546
Marat Dukhande06f492020-04-09 00:19:31 -070028547TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028548 for (size_t k = 2; k < 10; k++) {
28549 GemmMicrokernelTester()
28550 .mr(1)
28551 .nr(4)
28552 .kr(1)
28553 .sr(1)
28554 .m(1)
28555 .n(4)
28556 .k(k)
28557 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028558 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028559 }
28560}
28561
Marat Dukhande06f492020-04-09 00:19:31 -070028562TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028563 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028564 for (uint32_t n = 1; n <= 4; n++) {
28565 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028566 GemmMicrokernelTester()
28567 .mr(1)
28568 .nr(4)
28569 .kr(1)
28570 .sr(1)
28571 .m(m)
28572 .n(n)
28573 .k(k)
28574 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028575 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028576 }
28577 }
28578 }
28579}
28580
Marat Dukhande06f492020-04-09 00:19:31 -070028581TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028582 for (uint32_t n = 5; n < 8; n++) {
28583 for (size_t k = 1; k <= 5; k += 2) {
28584 GemmMicrokernelTester()
28585 .mr(1)
28586 .nr(4)
28587 .kr(1)
28588 .sr(1)
28589 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028590 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028591 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028592 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028593 }
28594 }
28595}
28596
Marat Dukhande06f492020-04-09 00:19:31 -070028597TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028598 for (uint32_t n = 5; n < 8; n++) {
28599 for (size_t k = 1; k <= 5; k += 2) {
28600 GemmMicrokernelTester()
28601 .mr(1)
28602 .nr(4)
28603 .kr(1)
28604 .sr(1)
28605 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028606 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028607 .k(k)
28608 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028609 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028610 }
28611 }
28612}
28613
Marat Dukhande06f492020-04-09 00:19:31 -070028614TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028615 for (uint32_t n = 5; n < 8; n++) {
28616 for (size_t k = 1; k <= 5; k += 2) {
28617 GemmMicrokernelTester()
28618 .mr(1)
28619 .nr(4)
28620 .kr(1)
28621 .sr(1)
28622 .m(1)
28623 .n(n)
28624 .k(k)
28625 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028626 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028627 }
28628 }
28629}
28630
Marat Dukhande06f492020-04-09 00:19:31 -070028631TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028632 for (uint32_t n = 5; n < 8; n++) {
28633 for (size_t k = 1; k <= 5; k += 2) {
28634 for (uint32_t m = 1; m <= 1; m++) {
28635 GemmMicrokernelTester()
28636 .mr(1)
28637 .nr(4)
28638 .kr(1)
28639 .sr(1)
28640 .m(m)
28641 .n(n)
28642 .k(k)
28643 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028644 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028645 }
28646 }
28647 }
28648}
28649
Marat Dukhande06f492020-04-09 00:19:31 -070028650TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028651 for (uint32_t n = 8; n <= 12; n += 4) {
28652 for (size_t k = 1; k <= 5; k += 2) {
28653 GemmMicrokernelTester()
28654 .mr(1)
28655 .nr(4)
28656 .kr(1)
28657 .sr(1)
28658 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028659 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028660 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028661 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028662 }
28663 }
28664}
28665
Marat Dukhande06f492020-04-09 00:19:31 -070028666TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028667 for (uint32_t n = 8; n <= 12; n += 4) {
28668 for (size_t k = 1; k <= 5; k += 2) {
28669 GemmMicrokernelTester()
28670 .mr(1)
28671 .nr(4)
28672 .kr(1)
28673 .sr(1)
28674 .m(1)
28675 .n(n)
28676 .k(k)
28677 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028678 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028679 }
28680 }
28681}
28682
Marat Dukhande06f492020-04-09 00:19:31 -070028683TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028684 for (uint32_t n = 8; n <= 12; n += 4) {
28685 for (size_t k = 1; k <= 5; k += 2) {
28686 GemmMicrokernelTester()
28687 .mr(1)
28688 .nr(4)
28689 .kr(1)
28690 .sr(1)
28691 .m(1)
28692 .n(n)
28693 .k(k)
28694 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028695 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028696 }
28697 }
28698}
28699
Marat Dukhande06f492020-04-09 00:19:31 -070028700TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028701 for (uint32_t n = 8; n <= 12; n += 4) {
28702 for (size_t k = 1; k <= 5; k += 2) {
28703 for (uint32_t m = 1; m <= 1; m++) {
28704 GemmMicrokernelTester()
28705 .mr(1)
28706 .nr(4)
28707 .kr(1)
28708 .sr(1)
28709 .m(m)
28710 .n(n)
28711 .k(k)
28712 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028713 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028714 }
28715 }
28716 }
28717}
28718
Marat Dukhande06f492020-04-09 00:19:31 -070028719TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028720 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028721 for (uint32_t n = 1; n <= 4; n++) {
28722 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028723 GemmMicrokernelTester()
28724 .mr(1)
28725 .nr(4)
28726 .kr(1)
28727 .sr(1)
28728 .m(m)
28729 .n(n)
28730 .k(k)
28731 .cm_stride(7)
28732 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028733 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028734 }
28735 }
28736 }
28737}
28738
Marat Dukhande06f492020-04-09 00:19:31 -070028739TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028740 GemmMicrokernelTester()
28741 .mr(1)
28742 .nr(4)
28743 .kr(1)
28744 .sr(1)
28745 .m(1)
28746 .n(4)
28747 .k(1)
28748 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028749 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028750}
28751
Marat Dukhande06f492020-04-09 00:19:31 -070028752TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028753 GemmMicrokernelTester()
28754 .mr(1)
28755 .nr(4)
28756 .kr(1)
28757 .sr(1)
28758 .m(1)
28759 .n(4)
28760 .k(1)
28761 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028762 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028763}
28764
Marat Dukhande06f492020-04-09 00:19:31 -070028765TEST(F32_GEMMINC_MINMAX_1X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028766 GemmMicrokernelTester()
28767 .mr(1)
28768 .nr(4)
28769 .kr(1)
28770 .sr(1)
28771 .m(1)
28772 .n(4)
28773 .k(1)
28774 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028775 .Test(xnn_f32_gemminc_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028776}
28777
28778
Marat Dukhande06f492020-04-09 00:19:31 -070028779TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028780 GemmMicrokernelTester()
28781 .mr(4)
28782 .nr(4)
28783 .kr(1)
28784 .sr(1)
28785 .m(4)
28786 .n(4)
28787 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028788 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028789}
28790
Marat Dukhande06f492020-04-09 00:19:31 -070028791TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028792 GemmMicrokernelTester()
28793 .mr(4)
28794 .nr(4)
28795 .kr(1)
28796 .sr(1)
28797 .m(4)
28798 .n(4)
28799 .k(1)
28800 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028801 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028802}
28803
Marat Dukhande06f492020-04-09 00:19:31 -070028804TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028805 GemmMicrokernelTester()
28806 .mr(4)
28807 .nr(4)
28808 .kr(1)
28809 .sr(1)
28810 .m(4)
28811 .n(4)
28812 .k(1)
28813 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028814 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028815}
28816
Marat Dukhande06f492020-04-09 00:19:31 -070028817TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028818 for (uint32_t n = 1; n <= 4; n++) {
28819 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028820 GemmMicrokernelTester()
28821 .mr(4)
28822 .nr(4)
28823 .kr(1)
28824 .sr(1)
28825 .m(m)
28826 .n(n)
28827 .k(1)
28828 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028829 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028830 }
28831 }
28832}
28833
Marat Dukhande06f492020-04-09 00:19:31 -070028834TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028835 for (uint32_t m = 1; m <= 4; m++) {
28836 GemmMicrokernelTester()
28837 .mr(4)
28838 .nr(4)
28839 .kr(1)
28840 .sr(1)
28841 .m(m)
28842 .n(4)
28843 .k(1)
28844 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028845 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028846 }
28847}
28848
Marat Dukhande06f492020-04-09 00:19:31 -070028849TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028850 for (uint32_t n = 1; n <= 4; n++) {
28851 GemmMicrokernelTester()
28852 .mr(4)
28853 .nr(4)
28854 .kr(1)
28855 .sr(1)
28856 .m(4)
28857 .n(n)
28858 .k(1)
28859 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028860 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028861 }
28862}
28863
Marat Dukhande06f492020-04-09 00:19:31 -070028864TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028865 for (size_t k = 2; k < 10; k++) {
28866 GemmMicrokernelTester()
28867 .mr(4)
28868 .nr(4)
28869 .kr(1)
28870 .sr(1)
28871 .m(4)
28872 .n(4)
28873 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028874 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028875 }
28876}
28877
Marat Dukhande06f492020-04-09 00:19:31 -070028878TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_gt_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028879 for (size_t k = 2; k < 10; k++) {
28880 GemmMicrokernelTester()
28881 .mr(4)
28882 .nr(4)
28883 .kr(1)
28884 .sr(1)
28885 .m(4)
28886 .n(4)
28887 .k(k)
28888 .a_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028889 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028890 }
28891}
28892
Marat Dukhande06f492020-04-09 00:19:31 -070028893TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028894 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028895 for (uint32_t n = 1; n <= 4; n++) {
28896 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028897 GemmMicrokernelTester()
28898 .mr(4)
28899 .nr(4)
28900 .kr(1)
28901 .sr(1)
28902 .m(m)
28903 .n(n)
28904 .k(k)
28905 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028906 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028907 }
28908 }
28909 }
28910}
28911
Marat Dukhande06f492020-04-09 00:19:31 -070028912TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028913 for (uint32_t n = 5; n < 8; n++) {
28914 for (size_t k = 1; k <= 5; k += 2) {
28915 GemmMicrokernelTester()
28916 .mr(4)
28917 .nr(4)
28918 .kr(1)
28919 .sr(1)
28920 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028921 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028922 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028923 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028924 }
28925 }
28926}
28927
Marat Dukhande06f492020-04-09 00:19:31 -070028928TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028929 for (uint32_t n = 5; n < 8; n++) {
28930 for (size_t k = 1; k <= 5; k += 2) {
28931 GemmMicrokernelTester()
28932 .mr(4)
28933 .nr(4)
28934 .kr(1)
28935 .sr(1)
28936 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028937 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028938 .k(k)
28939 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028940 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028941 }
28942 }
28943}
28944
Marat Dukhande06f492020-04-09 00:19:31 -070028945TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028946 for (uint32_t n = 5; n < 8; n++) {
28947 for (size_t k = 1; k <= 5; k += 2) {
28948 GemmMicrokernelTester()
28949 .mr(4)
28950 .nr(4)
28951 .kr(1)
28952 .sr(1)
28953 .m(4)
28954 .n(n)
28955 .k(k)
28956 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028957 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028958 }
28959 }
28960}
28961
Marat Dukhande06f492020-04-09 00:19:31 -070028962TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028963 for (uint32_t n = 5; n < 8; n++) {
28964 for (size_t k = 1; k <= 5; k += 2) {
28965 for (uint32_t m = 1; m <= 4; m++) {
28966 GemmMicrokernelTester()
28967 .mr(4)
28968 .nr(4)
28969 .kr(1)
28970 .sr(1)
28971 .m(m)
28972 .n(n)
28973 .k(k)
28974 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028975 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028976 }
28977 }
28978 }
28979}
28980
Marat Dukhande06f492020-04-09 00:19:31 -070028981TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028982 for (uint32_t n = 8; n <= 12; n += 4) {
28983 for (size_t k = 1; k <= 5; k += 2) {
28984 GemmMicrokernelTester()
28985 .mr(4)
28986 .nr(4)
28987 .kr(1)
28988 .sr(1)
28989 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028990 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070028991 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070028992 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070028993 }
28994 }
28995}
28996
Marat Dukhande06f492020-04-09 00:19:31 -070028997TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028998 for (uint32_t n = 8; n <= 12; n += 4) {
28999 for (size_t k = 1; k <= 5; k += 2) {
29000 GemmMicrokernelTester()
29001 .mr(4)
29002 .nr(4)
29003 .kr(1)
29004 .sr(1)
29005 .m(4)
29006 .n(n)
29007 .k(k)
29008 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029009 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029010 }
29011 }
29012}
29013
Marat Dukhande06f492020-04-09 00:19:31 -070029014TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029015 for (uint32_t n = 8; n <= 12; n += 4) {
29016 for (size_t k = 1; k <= 5; k += 2) {
29017 GemmMicrokernelTester()
29018 .mr(4)
29019 .nr(4)
29020 .kr(1)
29021 .sr(1)
29022 .m(4)
29023 .n(n)
29024 .k(k)
29025 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029026 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029027 }
29028 }
29029}
29030
Marat Dukhande06f492020-04-09 00:19:31 -070029031TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029032 for (uint32_t n = 8; n <= 12; n += 4) {
29033 for (size_t k = 1; k <= 5; k += 2) {
29034 for (uint32_t m = 1; m <= 4; m++) {
29035 GemmMicrokernelTester()
29036 .mr(4)
29037 .nr(4)
29038 .kr(1)
29039 .sr(1)
29040 .m(m)
29041 .n(n)
29042 .k(k)
29043 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029044 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029045 }
29046 }
29047 }
29048}
29049
Marat Dukhande06f492020-04-09 00:19:31 -070029050TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029051 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029052 for (uint32_t n = 1; n <= 4; n++) {
29053 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029054 GemmMicrokernelTester()
29055 .mr(4)
29056 .nr(4)
29057 .kr(1)
29058 .sr(1)
29059 .m(m)
29060 .n(n)
29061 .k(k)
29062 .cm_stride(7)
29063 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029064 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029065 }
29066 }
29067 }
29068}
29069
Marat Dukhande06f492020-04-09 00:19:31 -070029070TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029071 GemmMicrokernelTester()
29072 .mr(4)
29073 .nr(4)
29074 .kr(1)
29075 .sr(1)
29076 .m(4)
29077 .n(4)
29078 .k(1)
29079 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029080 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029081}
29082
Marat Dukhande06f492020-04-09 00:19:31 -070029083TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029084 GemmMicrokernelTester()
29085 .mr(4)
29086 .nr(4)
29087 .kr(1)
29088 .sr(1)
29089 .m(4)
29090 .n(4)
29091 .k(1)
29092 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029093 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029094}
29095
Marat Dukhande06f492020-04-09 00:19:31 -070029096TEST(F32_GEMMINC_MINMAX_4X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029097 GemmMicrokernelTester()
29098 .mr(4)
29099 .nr(4)
29100 .kr(1)
29101 .sr(1)
29102 .m(4)
29103 .n(4)
29104 .k(1)
29105 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029106 .Test(xnn_f32_gemminc_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029107}