blob: ebf2eac5a52cfdc6c787b5afdf009f5b5e14eece [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-ppmm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhan1c587112020-04-08 20:04:28 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070027 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070028 TEST_REQUIRES_ARM_NEON;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(4)
35 .n(8)
36 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070037 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070038 }
39
Marat Dukhande06f492020-04-09 00:19:31 -070040 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070041 TEST_REQUIRES_ARM_NEON;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(4)
48 .n(8)
49 .k(1)
50 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070051 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070052 }
53
Marat Dukhande06f492020-04-09 00:19:31 -070054 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070055 TEST_REQUIRES_ARM_NEON;
56 GemmMicrokernelTester()
57 .mr(4)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(4)
62 .n(8)
63 .k(1)
64 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070065 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070066 }
67
Marat Dukhande06f492020-04-09 00:19:31 -070068 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070069 TEST_REQUIRES_ARM_NEON;
Frank Barchard5e1a3032022-01-14 13:12:41 -080070 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070072 GemmMicrokernelTester()
73 .mr(4)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(1)
80 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070081 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070082 }
83 }
84 }
85
Marat Dukhande06f492020-04-09 00:19:31 -070086 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070087 TEST_REQUIRES_ARM_NEON;
88 for (uint32_t m = 1; m <= 4; m++) {
89 GemmMicrokernelTester()
90 .mr(4)
91 .nr(8)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(1)
97 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070098 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070099 }
100 }
101
Marat Dukhande06f492020-04-09 00:19:31 -0700102 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(4)
107 .nr(8)
108 .kr(1)
109 .sr(1)
110 .m(4)
111 .n(n)
112 .k(1)
113 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700114 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700115 }
116 }
117
Marat Dukhande06f492020-04-09 00:19:31 -0700118 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700119 TEST_REQUIRES_ARM_NEON;
120 for (size_t k = 2; k < 10; k++) {
121 GemmMicrokernelTester()
122 .mr(4)
123 .nr(8)
124 .kr(1)
125 .sr(1)
126 .m(4)
127 .n(8)
128 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700129 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700130 }
131 }
132
Marat Dukhande06f492020-04-09 00:19:31 -0700133 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700134 TEST_REQUIRES_ARM_NEON;
135 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -0800136 for (uint32_t n = 1; n <= 8; n++) {
137 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700138 GemmMicrokernelTester()
139 .mr(4)
140 .nr(8)
141 .kr(1)
142 .sr(1)
143 .m(m)
144 .n(n)
145 .k(k)
146 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700147 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700148 }
149 }
150 }
151 }
152
Marat Dukhande06f492020-04-09 00:19:31 -0700153 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700154 TEST_REQUIRES_ARM_NEON;
155 for (uint32_t n = 9; n < 16; n++) {
156 for (size_t k = 1; k <= 5; k += 2) {
157 GemmMicrokernelTester()
158 .mr(4)
159 .nr(8)
160 .kr(1)
161 .sr(1)
162 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800163 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700164 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700165 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700166 }
167 }
168 }
169
Marat Dukhande06f492020-04-09 00:19:31 -0700170 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700171 TEST_REQUIRES_ARM_NEON;
172 for (uint32_t n = 9; n < 16; n++) {
173 for (size_t k = 1; k <= 5; k += 2) {
174 GemmMicrokernelTester()
175 .mr(4)
176 .nr(8)
177 .kr(1)
178 .sr(1)
179 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800180 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700181 .k(k)
182 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700183 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700184 }
185 }
186 }
187
Marat Dukhande06f492020-04-09 00:19:31 -0700188 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700189 TEST_REQUIRES_ARM_NEON;
190 for (uint32_t n = 9; n < 16; n++) {
191 for (size_t k = 1; k <= 5; k += 2) {
192 GemmMicrokernelTester()
193 .mr(4)
194 .nr(8)
195 .kr(1)
196 .sr(1)
197 .m(4)
198 .n(n)
199 .k(k)
200 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700201 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700202 }
203 }
204 }
205
Marat Dukhande06f492020-04-09 00:19:31 -0700206 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700207 TEST_REQUIRES_ARM_NEON;
208 for (uint32_t n = 9; n < 16; n++) {
209 for (size_t k = 1; k <= 5; k += 2) {
210 for (uint32_t m = 1; m <= 4; m++) {
211 GemmMicrokernelTester()
212 .mr(4)
213 .nr(8)
214 .kr(1)
215 .sr(1)
216 .m(m)
217 .n(n)
218 .k(k)
219 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700220 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700221 }
222 }
223 }
224 }
225
Marat Dukhande06f492020-04-09 00:19:31 -0700226 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700227 TEST_REQUIRES_ARM_NEON;
228 for (uint32_t n = 16; n <= 24; n += 8) {
229 for (size_t k = 1; k <= 5; k += 2) {
230 GemmMicrokernelTester()
231 .mr(4)
232 .nr(8)
233 .kr(1)
234 .sr(1)
235 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800236 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700237 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700238 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700239 }
240 }
241 }
242
Marat Dukhande06f492020-04-09 00:19:31 -0700243 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700244 TEST_REQUIRES_ARM_NEON;
245 for (uint32_t n = 16; n <= 24; n += 8) {
246 for (size_t k = 1; k <= 5; k += 2) {
247 GemmMicrokernelTester()
248 .mr(4)
249 .nr(8)
250 .kr(1)
251 .sr(1)
252 .m(4)
253 .n(n)
254 .k(k)
255 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700256 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700257 }
258 }
259 }
260
Marat Dukhande06f492020-04-09 00:19:31 -0700261 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700262 TEST_REQUIRES_ARM_NEON;
263 for (uint32_t n = 16; n <= 24; n += 8) {
264 for (size_t k = 1; k <= 5; k += 2) {
265 GemmMicrokernelTester()
266 .mr(4)
267 .nr(8)
268 .kr(1)
269 .sr(1)
270 .m(4)
271 .n(n)
272 .k(k)
273 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700274 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700275 }
276 }
277 }
278
Marat Dukhande06f492020-04-09 00:19:31 -0700279 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700280 TEST_REQUIRES_ARM_NEON;
281 for (uint32_t n = 16; n <= 24; n += 8) {
282 for (size_t k = 1; k <= 5; k += 2) {
283 for (uint32_t m = 1; m <= 4; m++) {
284 GemmMicrokernelTester()
285 .mr(4)
286 .nr(8)
287 .kr(1)
288 .sr(1)
289 .m(m)
290 .n(n)
291 .k(k)
292 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700293 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700294 }
295 }
296 }
297 }
298
Marat Dukhande06f492020-04-09 00:19:31 -0700299 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700300 TEST_REQUIRES_ARM_NEON;
301 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -0800302 for (uint32_t n = 1; n <= 8; n++) {
303 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700304 GemmMicrokernelTester()
305 .mr(4)
306 .nr(8)
307 .kr(1)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .cm_stride(11)
313 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700314 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700315 }
316 }
317 }
318 }
319
Marat Dukhande06f492020-04-09 00:19:31 -0700320 TEST(F32_PPMM_MINMAX_4X8__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700321 TEST_REQUIRES_ARM_NEON;
322 GemmMicrokernelTester()
323 .mr(4)
324 .nr(8)
325 .kr(1)
326 .sr(1)
327 .m(4)
328 .n(8)
329 .k(1)
330 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700331 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700332 }
333
Marat Dukhande06f492020-04-09 00:19:31 -0700334 TEST(F32_PPMM_MINMAX_4X8__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700335 TEST_REQUIRES_ARM_NEON;
336 GemmMicrokernelTester()
337 .mr(4)
338 .nr(8)
339 .kr(1)
340 .sr(1)
341 .m(4)
342 .n(8)
343 .k(1)
344 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700345 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700346 }
347
Marat Dukhande06f492020-04-09 00:19:31 -0700348 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700349 TEST_REQUIRES_ARM_NEON;
350 GemmMicrokernelTester()
351 .mr(4)
352 .nr(8)
353 .kr(1)
354 .sr(1)
355 .m(4)
356 .n(8)
357 .k(1)
358 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700359 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700360 }
361#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
362
363
364#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700365 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700366 TEST_REQUIRES_ARM_NEON_FMA;
367 GemmMicrokernelTester()
368 .mr(4)
369 .nr(8)
370 .kr(1)
371 .sr(1)
372 .m(4)
373 .n(8)
374 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700375 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700376 }
377
Marat Dukhande06f492020-04-09 00:19:31 -0700378 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700379 TEST_REQUIRES_ARM_NEON_FMA;
380 GemmMicrokernelTester()
381 .mr(4)
382 .nr(8)
383 .kr(1)
384 .sr(1)
385 .m(4)
386 .n(8)
387 .k(1)
388 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700389 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700390 }
391
Marat Dukhande06f492020-04-09 00:19:31 -0700392 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700393 TEST_REQUIRES_ARM_NEON_FMA;
394 GemmMicrokernelTester()
395 .mr(4)
396 .nr(8)
397 .kr(1)
398 .sr(1)
399 .m(4)
400 .n(8)
401 .k(1)
402 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700403 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700404 }
405
Marat Dukhande06f492020-04-09 00:19:31 -0700406 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700407 TEST_REQUIRES_ARM_NEON_FMA;
Frank Barchard5e1a3032022-01-14 13:12:41 -0800408 for (uint32_t n = 1; n <= 8; n++) {
409 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700410 GemmMicrokernelTester()
411 .mr(4)
412 .nr(8)
413 .kr(1)
414 .sr(1)
415 .m(m)
416 .n(n)
417 .k(1)
418 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700419 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700420 }
421 }
422 }
423
Marat Dukhande06f492020-04-09 00:19:31 -0700424 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700425 TEST_REQUIRES_ARM_NEON_FMA;
426 for (uint32_t m = 1; m <= 4; m++) {
427 GemmMicrokernelTester()
428 .mr(4)
429 .nr(8)
430 .kr(1)
431 .sr(1)
432 .m(m)
433 .n(8)
434 .k(1)
435 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700436 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700437 }
438 }
439
Marat Dukhande06f492020-04-09 00:19:31 -0700440 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700441 TEST_REQUIRES_ARM_NEON_FMA;
442 for (uint32_t n = 1; n <= 8; n++) {
443 GemmMicrokernelTester()
444 .mr(4)
445 .nr(8)
446 .kr(1)
447 .sr(1)
448 .m(4)
449 .n(n)
450 .k(1)
451 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700452 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700453 }
454 }
455
Marat Dukhande06f492020-04-09 00:19:31 -0700456 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700457 TEST_REQUIRES_ARM_NEON_FMA;
458 for (size_t k = 2; k < 10; k++) {
459 GemmMicrokernelTester()
460 .mr(4)
461 .nr(8)
462 .kr(1)
463 .sr(1)
464 .m(4)
465 .n(8)
466 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700467 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700468 }
469 }
470
Marat Dukhande06f492020-04-09 00:19:31 -0700471 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700472 TEST_REQUIRES_ARM_NEON_FMA;
473 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -0800474 for (uint32_t n = 1; n <= 8; n++) {
475 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700476 GemmMicrokernelTester()
477 .mr(4)
478 .nr(8)
479 .kr(1)
480 .sr(1)
481 .m(m)
482 .n(n)
483 .k(k)
484 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700485 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700486 }
487 }
488 }
489 }
490
Marat Dukhande06f492020-04-09 00:19:31 -0700491 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700492 TEST_REQUIRES_ARM_NEON_FMA;
493 for (uint32_t n = 9; n < 16; n++) {
494 for (size_t k = 1; k <= 5; k += 2) {
495 GemmMicrokernelTester()
496 .mr(4)
497 .nr(8)
498 .kr(1)
499 .sr(1)
500 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800501 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700502 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700503 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700504 }
505 }
506 }
507
Marat Dukhande06f492020-04-09 00:19:31 -0700508 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700509 TEST_REQUIRES_ARM_NEON_FMA;
510 for (uint32_t n = 9; n < 16; n++) {
511 for (size_t k = 1; k <= 5; k += 2) {
512 GemmMicrokernelTester()
513 .mr(4)
514 .nr(8)
515 .kr(1)
516 .sr(1)
517 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800518 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700519 .k(k)
520 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700521 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700522 }
523 }
524 }
525
Marat Dukhande06f492020-04-09 00:19:31 -0700526 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700527 TEST_REQUIRES_ARM_NEON_FMA;
528 for (uint32_t n = 9; n < 16; n++) {
529 for (size_t k = 1; k <= 5; k += 2) {
530 GemmMicrokernelTester()
531 .mr(4)
532 .nr(8)
533 .kr(1)
534 .sr(1)
535 .m(4)
536 .n(n)
537 .k(k)
538 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700539 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700540 }
541 }
542 }
543
Marat Dukhande06f492020-04-09 00:19:31 -0700544 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700545 TEST_REQUIRES_ARM_NEON_FMA;
546 for (uint32_t n = 9; n < 16; n++) {
547 for (size_t k = 1; k <= 5; k += 2) {
548 for (uint32_t m = 1; m <= 4; m++) {
549 GemmMicrokernelTester()
550 .mr(4)
551 .nr(8)
552 .kr(1)
553 .sr(1)
554 .m(m)
555 .n(n)
556 .k(k)
557 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700558 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700559 }
560 }
561 }
562 }
563
Marat Dukhande06f492020-04-09 00:19:31 -0700564 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700565 TEST_REQUIRES_ARM_NEON_FMA;
566 for (uint32_t n = 16; n <= 24; n += 8) {
567 for (size_t k = 1; k <= 5; k += 2) {
568 GemmMicrokernelTester()
569 .mr(4)
570 .nr(8)
571 .kr(1)
572 .sr(1)
573 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800574 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700575 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700576 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700577 }
578 }
579 }
580
Marat Dukhande06f492020-04-09 00:19:31 -0700581 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700582 TEST_REQUIRES_ARM_NEON_FMA;
583 for (uint32_t n = 16; n <= 24; n += 8) {
584 for (size_t k = 1; k <= 5; k += 2) {
585 GemmMicrokernelTester()
586 .mr(4)
587 .nr(8)
588 .kr(1)
589 .sr(1)
590 .m(4)
591 .n(n)
592 .k(k)
593 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700594 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700595 }
596 }
597 }
598
Marat Dukhande06f492020-04-09 00:19:31 -0700599 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700600 TEST_REQUIRES_ARM_NEON_FMA;
601 for (uint32_t n = 16; n <= 24; n += 8) {
602 for (size_t k = 1; k <= 5; k += 2) {
603 GemmMicrokernelTester()
604 .mr(4)
605 .nr(8)
606 .kr(1)
607 .sr(1)
608 .m(4)
609 .n(n)
610 .k(k)
611 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700612 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700613 }
614 }
615 }
616
Marat Dukhande06f492020-04-09 00:19:31 -0700617 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700618 TEST_REQUIRES_ARM_NEON_FMA;
619 for (uint32_t n = 16; n <= 24; n += 8) {
620 for (size_t k = 1; k <= 5; k += 2) {
621 for (uint32_t m = 1; m <= 4; m++) {
622 GemmMicrokernelTester()
623 .mr(4)
624 .nr(8)
625 .kr(1)
626 .sr(1)
627 .m(m)
628 .n(n)
629 .k(k)
630 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700631 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700632 }
633 }
634 }
635 }
636
Marat Dukhande06f492020-04-09 00:19:31 -0700637 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700638 TEST_REQUIRES_ARM_NEON_FMA;
639 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -0800640 for (uint32_t n = 1; n <= 8; n++) {
641 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700642 GemmMicrokernelTester()
643 .mr(4)
644 .nr(8)
645 .kr(1)
646 .sr(1)
647 .m(m)
648 .n(n)
649 .k(k)
650 .cm_stride(11)
651 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700652 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700653 }
654 }
655 }
656 }
657
Marat Dukhande06f492020-04-09 00:19:31 -0700658 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700659 TEST_REQUIRES_ARM_NEON_FMA;
660 GemmMicrokernelTester()
661 .mr(4)
662 .nr(8)
663 .kr(1)
664 .sr(1)
665 .m(4)
666 .n(8)
667 .k(1)
668 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700669 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700670 }
671
Marat Dukhande06f492020-04-09 00:19:31 -0700672 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700673 TEST_REQUIRES_ARM_NEON_FMA;
674 GemmMicrokernelTester()
675 .mr(4)
676 .nr(8)
677 .kr(1)
678 .sr(1)
679 .m(4)
680 .n(8)
681 .k(1)
682 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700683 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700684 }
685
Marat Dukhande06f492020-04-09 00:19:31 -0700686 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700687 TEST_REQUIRES_ARM_NEON_FMA;
688 GemmMicrokernelTester()
689 .mr(4)
690 .nr(8)
691 .kr(1)
692 .sr(1)
693 .m(4)
694 .n(8)
695 .k(1)
696 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700697 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700698 }
699#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
700
701
702#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700703 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700704 TEST_REQUIRES_ARM_NEON;
705 GemmMicrokernelTester()
706 .mr(8)
707 .nr(8)
708 .kr(1)
709 .sr(1)
710 .m(8)
711 .n(8)
712 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700713 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700714 }
715
Marat Dukhande06f492020-04-09 00:19:31 -0700716 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700717 TEST_REQUIRES_ARM_NEON;
718 GemmMicrokernelTester()
719 .mr(8)
720 .nr(8)
721 .kr(1)
722 .sr(1)
723 .m(8)
724 .n(8)
725 .k(1)
726 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700727 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700728 }
729
Marat Dukhande06f492020-04-09 00:19:31 -0700730 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700731 TEST_REQUIRES_ARM_NEON;
732 GemmMicrokernelTester()
733 .mr(8)
734 .nr(8)
735 .kr(1)
736 .sr(1)
737 .m(8)
738 .n(8)
739 .k(1)
740 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700741 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700742 }
743
Marat Dukhande06f492020-04-09 00:19:31 -0700744 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700745 TEST_REQUIRES_ARM_NEON;
Frank Barchard5e1a3032022-01-14 13:12:41 -0800746 for (uint32_t n = 1; n <= 8; n++) {
747 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700748 GemmMicrokernelTester()
749 .mr(8)
750 .nr(8)
751 .kr(1)
752 .sr(1)
753 .m(m)
754 .n(n)
755 .k(1)
756 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700757 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700758 }
759 }
760 }
761
Marat Dukhande06f492020-04-09 00:19:31 -0700762 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700763 TEST_REQUIRES_ARM_NEON;
764 for (uint32_t m = 1; m <= 8; m++) {
765 GemmMicrokernelTester()
766 .mr(8)
767 .nr(8)
768 .kr(1)
769 .sr(1)
770 .m(m)
771 .n(8)
772 .k(1)
773 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700774 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700775 }
776 }
777
Marat Dukhande06f492020-04-09 00:19:31 -0700778 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700779 TEST_REQUIRES_ARM_NEON;
780 for (uint32_t n = 1; n <= 8; n++) {
781 GemmMicrokernelTester()
782 .mr(8)
783 .nr(8)
784 .kr(1)
785 .sr(1)
786 .m(8)
787 .n(n)
788 .k(1)
789 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700790 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700791 }
792 }
793
Marat Dukhande06f492020-04-09 00:19:31 -0700794 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700795 TEST_REQUIRES_ARM_NEON;
796 for (size_t k = 2; k < 10; k++) {
797 GemmMicrokernelTester()
798 .mr(8)
799 .nr(8)
800 .kr(1)
801 .sr(1)
802 .m(8)
803 .n(8)
804 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700805 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700806 }
807 }
808
Marat Dukhande06f492020-04-09 00:19:31 -0700809 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700810 TEST_REQUIRES_ARM_NEON;
811 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -0800812 for (uint32_t n = 1; n <= 8; n++) {
813 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700814 GemmMicrokernelTester()
815 .mr(8)
816 .nr(8)
817 .kr(1)
818 .sr(1)
819 .m(m)
820 .n(n)
821 .k(k)
822 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700823 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700824 }
825 }
826 }
827 }
828
Marat Dukhande06f492020-04-09 00:19:31 -0700829 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700830 TEST_REQUIRES_ARM_NEON;
831 for (uint32_t n = 9; n < 16; n++) {
832 for (size_t k = 1; k <= 5; k += 2) {
833 GemmMicrokernelTester()
834 .mr(8)
835 .nr(8)
836 .kr(1)
837 .sr(1)
838 .m(8)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800839 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700840 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700841 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700842 }
843 }
844 }
845
Marat Dukhande06f492020-04-09 00:19:31 -0700846 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700847 TEST_REQUIRES_ARM_NEON;
848 for (uint32_t n = 9; n < 16; n++) {
849 for (size_t k = 1; k <= 5; k += 2) {
850 GemmMicrokernelTester()
851 .mr(8)
852 .nr(8)
853 .kr(1)
854 .sr(1)
855 .m(8)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800856 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700857 .k(k)
858 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700859 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700860 }
861 }
862 }
863
Marat Dukhande06f492020-04-09 00:19:31 -0700864 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700865 TEST_REQUIRES_ARM_NEON;
866 for (uint32_t n = 9; n < 16; n++) {
867 for (size_t k = 1; k <= 5; k += 2) {
868 GemmMicrokernelTester()
869 .mr(8)
870 .nr(8)
871 .kr(1)
872 .sr(1)
873 .m(8)
874 .n(n)
875 .k(k)
876 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700877 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700878 }
879 }
880 }
881
Marat Dukhande06f492020-04-09 00:19:31 -0700882 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700883 TEST_REQUIRES_ARM_NEON;
884 for (uint32_t n = 9; n < 16; n++) {
885 for (size_t k = 1; k <= 5; k += 2) {
886 for (uint32_t m = 1; m <= 8; m++) {
887 GemmMicrokernelTester()
888 .mr(8)
889 .nr(8)
890 .kr(1)
891 .sr(1)
892 .m(m)
893 .n(n)
894 .k(k)
895 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700896 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700897 }
898 }
899 }
900 }
901
Marat Dukhande06f492020-04-09 00:19:31 -0700902 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700903 TEST_REQUIRES_ARM_NEON;
904 for (uint32_t n = 16; n <= 24; n += 8) {
905 for (size_t k = 1; k <= 5; k += 2) {
906 GemmMicrokernelTester()
907 .mr(8)
908 .nr(8)
909 .kr(1)
910 .sr(1)
911 .m(8)
Frank Barchard5e1a3032022-01-14 13:12:41 -0800912 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -0700913 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700914 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700915 }
916 }
917 }
918
Marat Dukhande06f492020-04-09 00:19:31 -0700919 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700920 TEST_REQUIRES_ARM_NEON;
921 for (uint32_t n = 16; n <= 24; n += 8) {
922 for (size_t k = 1; k <= 5; k += 2) {
923 GemmMicrokernelTester()
924 .mr(8)
925 .nr(8)
926 .kr(1)
927 .sr(1)
928 .m(8)
929 .n(n)
930 .k(k)
931 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700932 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700933 }
934 }
935 }
936
Marat Dukhande06f492020-04-09 00:19:31 -0700937 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700938 TEST_REQUIRES_ARM_NEON;
939 for (uint32_t n = 16; n <= 24; n += 8) {
940 for (size_t k = 1; k <= 5; k += 2) {
941 GemmMicrokernelTester()
942 .mr(8)
943 .nr(8)
944 .kr(1)
945 .sr(1)
946 .m(8)
947 .n(n)
948 .k(k)
949 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700950 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700951 }
952 }
953 }
954
Marat Dukhande06f492020-04-09 00:19:31 -0700955 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700956 TEST_REQUIRES_ARM_NEON;
957 for (uint32_t n = 16; n <= 24; n += 8) {
958 for (size_t k = 1; k <= 5; k += 2) {
959 for (uint32_t m = 1; m <= 8; m++) {
960 GemmMicrokernelTester()
961 .mr(8)
962 .nr(8)
963 .kr(1)
964 .sr(1)
965 .m(m)
966 .n(n)
967 .k(k)
968 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700969 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700970 }
971 }
972 }
973 }
974
Marat Dukhande06f492020-04-09 00:19:31 -0700975 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700976 TEST_REQUIRES_ARM_NEON;
977 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -0800978 for (uint32_t n = 1; n <= 8; n++) {
979 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700980 GemmMicrokernelTester()
981 .mr(8)
982 .nr(8)
983 .kr(1)
984 .sr(1)
985 .m(m)
986 .n(n)
987 .k(k)
988 .cm_stride(11)
989 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700990 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700991 }
992 }
993 }
994 }
995
Marat Dukhande06f492020-04-09 00:19:31 -0700996 TEST(F32_PPMM_MINMAX_8X8__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700997 TEST_REQUIRES_ARM_NEON;
998 GemmMicrokernelTester()
999 .mr(8)
1000 .nr(8)
1001 .kr(1)
1002 .sr(1)
1003 .m(8)
1004 .n(8)
1005 .k(1)
1006 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001007 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001008 }
1009
Marat Dukhande06f492020-04-09 00:19:31 -07001010 TEST(F32_PPMM_MINMAX_8X8__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001011 TEST_REQUIRES_ARM_NEON;
1012 GemmMicrokernelTester()
1013 .mr(8)
1014 .nr(8)
1015 .kr(1)
1016 .sr(1)
1017 .m(8)
1018 .n(8)
1019 .k(1)
1020 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001021 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001022 }
1023
Marat Dukhande06f492020-04-09 00:19:31 -07001024 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001025 TEST_REQUIRES_ARM_NEON;
1026 GemmMicrokernelTester()
1027 .mr(8)
1028 .nr(8)
1029 .kr(1)
1030 .sr(1)
1031 .m(8)
1032 .n(8)
1033 .k(1)
1034 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001035 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001036 }
1037#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1038
1039
1040#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07001041 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001042 TEST_REQUIRES_ARM_NEON_FMA;
1043 GemmMicrokernelTester()
1044 .mr(8)
1045 .nr(8)
1046 .kr(1)
1047 .sr(1)
1048 .m(8)
1049 .n(8)
1050 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001051 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001052 }
1053
Marat Dukhande06f492020-04-09 00:19:31 -07001054 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001055 TEST_REQUIRES_ARM_NEON_FMA;
1056 GemmMicrokernelTester()
1057 .mr(8)
1058 .nr(8)
1059 .kr(1)
1060 .sr(1)
1061 .m(8)
1062 .n(8)
1063 .k(1)
1064 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001065 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001066 }
1067
Marat Dukhande06f492020-04-09 00:19:31 -07001068 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001069 TEST_REQUIRES_ARM_NEON_FMA;
1070 GemmMicrokernelTester()
1071 .mr(8)
1072 .nr(8)
1073 .kr(1)
1074 .sr(1)
1075 .m(8)
1076 .n(8)
1077 .k(1)
1078 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001079 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001080 }
1081
Marat Dukhande06f492020-04-09 00:19:31 -07001082 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001083 TEST_REQUIRES_ARM_NEON_FMA;
Frank Barchard5e1a3032022-01-14 13:12:41 -08001084 for (uint32_t n = 1; n <= 8; n++) {
1085 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001086 GemmMicrokernelTester()
1087 .mr(8)
1088 .nr(8)
1089 .kr(1)
1090 .sr(1)
1091 .m(m)
1092 .n(n)
1093 .k(1)
1094 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001095 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001096 }
1097 }
1098 }
1099
Marat Dukhande06f492020-04-09 00:19:31 -07001100 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001101 TEST_REQUIRES_ARM_NEON_FMA;
1102 for (uint32_t m = 1; m <= 8; m++) {
1103 GemmMicrokernelTester()
1104 .mr(8)
1105 .nr(8)
1106 .kr(1)
1107 .sr(1)
1108 .m(m)
1109 .n(8)
1110 .k(1)
1111 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001112 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001113 }
1114 }
1115
Marat Dukhande06f492020-04-09 00:19:31 -07001116 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001117 TEST_REQUIRES_ARM_NEON_FMA;
1118 for (uint32_t n = 1; n <= 8; n++) {
1119 GemmMicrokernelTester()
1120 .mr(8)
1121 .nr(8)
1122 .kr(1)
1123 .sr(1)
1124 .m(8)
1125 .n(n)
1126 .k(1)
1127 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001128 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001129 }
1130 }
1131
Marat Dukhande06f492020-04-09 00:19:31 -07001132 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001133 TEST_REQUIRES_ARM_NEON_FMA;
1134 for (size_t k = 2; k < 10; k++) {
1135 GemmMicrokernelTester()
1136 .mr(8)
1137 .nr(8)
1138 .kr(1)
1139 .sr(1)
1140 .m(8)
1141 .n(8)
1142 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001143 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001144 }
1145 }
1146
Marat Dukhande06f492020-04-09 00:19:31 -07001147 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001148 TEST_REQUIRES_ARM_NEON_FMA;
1149 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001150 for (uint32_t n = 1; n <= 8; n++) {
1151 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001152 GemmMicrokernelTester()
1153 .mr(8)
1154 .nr(8)
1155 .kr(1)
1156 .sr(1)
1157 .m(m)
1158 .n(n)
1159 .k(k)
1160 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001161 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001162 }
1163 }
1164 }
1165 }
1166
Marat Dukhande06f492020-04-09 00:19:31 -07001167 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001168 TEST_REQUIRES_ARM_NEON_FMA;
1169 for (uint32_t n = 9; n < 16; n++) {
1170 for (size_t k = 1; k <= 5; k += 2) {
1171 GemmMicrokernelTester()
1172 .mr(8)
1173 .nr(8)
1174 .kr(1)
1175 .sr(1)
1176 .m(8)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001177 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001178 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001179 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001180 }
1181 }
1182 }
1183
Marat Dukhande06f492020-04-09 00:19:31 -07001184 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001185 TEST_REQUIRES_ARM_NEON_FMA;
1186 for (uint32_t n = 9; n < 16; n++) {
1187 for (size_t k = 1; k <= 5; k += 2) {
1188 GemmMicrokernelTester()
1189 .mr(8)
1190 .nr(8)
1191 .kr(1)
1192 .sr(1)
1193 .m(8)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001194 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001195 .k(k)
1196 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001197 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001198 }
1199 }
1200 }
1201
Marat Dukhande06f492020-04-09 00:19:31 -07001202 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001203 TEST_REQUIRES_ARM_NEON_FMA;
1204 for (uint32_t n = 9; n < 16; n++) {
1205 for (size_t k = 1; k <= 5; k += 2) {
1206 GemmMicrokernelTester()
1207 .mr(8)
1208 .nr(8)
1209 .kr(1)
1210 .sr(1)
1211 .m(8)
1212 .n(n)
1213 .k(k)
1214 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001215 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001216 }
1217 }
1218 }
1219
Marat Dukhande06f492020-04-09 00:19:31 -07001220 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001221 TEST_REQUIRES_ARM_NEON_FMA;
1222 for (uint32_t n = 9; n < 16; n++) {
1223 for (size_t k = 1; k <= 5; k += 2) {
1224 for (uint32_t m = 1; m <= 8; m++) {
1225 GemmMicrokernelTester()
1226 .mr(8)
1227 .nr(8)
1228 .kr(1)
1229 .sr(1)
1230 .m(m)
1231 .n(n)
1232 .k(k)
1233 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001234 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001235 }
1236 }
1237 }
1238 }
1239
Marat Dukhande06f492020-04-09 00:19:31 -07001240 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001241 TEST_REQUIRES_ARM_NEON_FMA;
1242 for (uint32_t n = 16; n <= 24; n += 8) {
1243 for (size_t k = 1; k <= 5; k += 2) {
1244 GemmMicrokernelTester()
1245 .mr(8)
1246 .nr(8)
1247 .kr(1)
1248 .sr(1)
1249 .m(8)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001250 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001251 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001252 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001253 }
1254 }
1255 }
1256
Marat Dukhande06f492020-04-09 00:19:31 -07001257 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001258 TEST_REQUIRES_ARM_NEON_FMA;
1259 for (uint32_t n = 16; n <= 24; n += 8) {
1260 for (size_t k = 1; k <= 5; k += 2) {
1261 GemmMicrokernelTester()
1262 .mr(8)
1263 .nr(8)
1264 .kr(1)
1265 .sr(1)
1266 .m(8)
1267 .n(n)
1268 .k(k)
1269 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001270 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001271 }
1272 }
1273 }
1274
Marat Dukhande06f492020-04-09 00:19:31 -07001275 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001276 TEST_REQUIRES_ARM_NEON_FMA;
1277 for (uint32_t n = 16; n <= 24; n += 8) {
1278 for (size_t k = 1; k <= 5; k += 2) {
1279 GemmMicrokernelTester()
1280 .mr(8)
1281 .nr(8)
1282 .kr(1)
1283 .sr(1)
1284 .m(8)
1285 .n(n)
1286 .k(k)
1287 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001288 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001289 }
1290 }
1291 }
1292
Marat Dukhande06f492020-04-09 00:19:31 -07001293 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001294 TEST_REQUIRES_ARM_NEON_FMA;
1295 for (uint32_t n = 16; n <= 24; n += 8) {
1296 for (size_t k = 1; k <= 5; k += 2) {
1297 for (uint32_t m = 1; m <= 8; m++) {
1298 GemmMicrokernelTester()
1299 .mr(8)
1300 .nr(8)
1301 .kr(1)
1302 .sr(1)
1303 .m(m)
1304 .n(n)
1305 .k(k)
1306 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001307 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001308 }
1309 }
1310 }
1311 }
1312
Marat Dukhande06f492020-04-09 00:19:31 -07001313 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001314 TEST_REQUIRES_ARM_NEON_FMA;
1315 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001316 for (uint32_t n = 1; n <= 8; n++) {
1317 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001318 GemmMicrokernelTester()
1319 .mr(8)
1320 .nr(8)
1321 .kr(1)
1322 .sr(1)
1323 .m(m)
1324 .n(n)
1325 .k(k)
1326 .cm_stride(11)
1327 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001328 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001329 }
1330 }
1331 }
1332 }
1333
Marat Dukhande06f492020-04-09 00:19:31 -07001334 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001335 TEST_REQUIRES_ARM_NEON_FMA;
1336 GemmMicrokernelTester()
1337 .mr(8)
1338 .nr(8)
1339 .kr(1)
1340 .sr(1)
1341 .m(8)
1342 .n(8)
1343 .k(1)
1344 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001345 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001346 }
1347
Marat Dukhande06f492020-04-09 00:19:31 -07001348 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001349 TEST_REQUIRES_ARM_NEON_FMA;
1350 GemmMicrokernelTester()
1351 .mr(8)
1352 .nr(8)
1353 .kr(1)
1354 .sr(1)
1355 .m(8)
1356 .n(8)
1357 .k(1)
1358 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001359 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001360 }
1361
Marat Dukhande06f492020-04-09 00:19:31 -07001362 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001363 TEST_REQUIRES_ARM_NEON_FMA;
1364 GemmMicrokernelTester()
1365 .mr(8)
1366 .nr(8)
1367 .kr(1)
1368 .sr(1)
1369 .m(8)
1370 .n(8)
1371 .k(1)
1372 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001373 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001374 }
1375#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1376
1377
1378#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -07001379 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001380 TEST_REQUIRES_X86_SSE;
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(8)
1384 .kr(1)
1385 .sr(1)
1386 .m(4)
1387 .n(8)
1388 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001389 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001390 }
1391
Marat Dukhande06f492020-04-09 00:19:31 -07001392 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001393 TEST_REQUIRES_X86_SSE;
1394 GemmMicrokernelTester()
1395 .mr(4)
1396 .nr(8)
1397 .kr(1)
1398 .sr(1)
1399 .m(4)
1400 .n(8)
1401 .k(1)
1402 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001403 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001404 }
1405
Marat Dukhande06f492020-04-09 00:19:31 -07001406 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001407 TEST_REQUIRES_X86_SSE;
1408 GemmMicrokernelTester()
1409 .mr(4)
1410 .nr(8)
1411 .kr(1)
1412 .sr(1)
1413 .m(4)
1414 .n(8)
1415 .k(1)
1416 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001417 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001418 }
1419
Marat Dukhande06f492020-04-09 00:19:31 -07001420 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001421 TEST_REQUIRES_X86_SSE;
Frank Barchard5e1a3032022-01-14 13:12:41 -08001422 for (uint32_t n = 1; n <= 8; n++) {
1423 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(8)
1427 .kr(1)
1428 .sr(1)
1429 .m(m)
1430 .n(n)
1431 .k(1)
1432 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001433 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001434 }
1435 }
1436 }
1437
Marat Dukhande06f492020-04-09 00:19:31 -07001438 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001439 TEST_REQUIRES_X86_SSE;
1440 for (uint32_t m = 1; m <= 4; m++) {
1441 GemmMicrokernelTester()
1442 .mr(4)
1443 .nr(8)
1444 .kr(1)
1445 .sr(1)
1446 .m(m)
1447 .n(8)
1448 .k(1)
1449 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001450 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001451 }
1452 }
1453
Marat Dukhande06f492020-04-09 00:19:31 -07001454 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001455 TEST_REQUIRES_X86_SSE;
1456 for (uint32_t n = 1; n <= 8; n++) {
1457 GemmMicrokernelTester()
1458 .mr(4)
1459 .nr(8)
1460 .kr(1)
1461 .sr(1)
1462 .m(4)
1463 .n(n)
1464 .k(1)
1465 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001466 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001467 }
1468 }
1469
Marat Dukhande06f492020-04-09 00:19:31 -07001470 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001471 TEST_REQUIRES_X86_SSE;
1472 for (size_t k = 2; k < 10; k++) {
1473 GemmMicrokernelTester()
1474 .mr(4)
1475 .nr(8)
1476 .kr(1)
1477 .sr(1)
1478 .m(4)
1479 .n(8)
1480 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001481 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001482 }
1483 }
1484
Marat Dukhande06f492020-04-09 00:19:31 -07001485 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001486 TEST_REQUIRES_X86_SSE;
1487 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001488 for (uint32_t n = 1; n <= 8; n++) {
1489 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001490 GemmMicrokernelTester()
1491 .mr(4)
1492 .nr(8)
1493 .kr(1)
1494 .sr(1)
1495 .m(m)
1496 .n(n)
1497 .k(k)
1498 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001499 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001500 }
1501 }
1502 }
1503 }
1504
Marat Dukhande06f492020-04-09 00:19:31 -07001505 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001506 TEST_REQUIRES_X86_SSE;
1507 for (uint32_t n = 9; n < 16; n++) {
1508 for (size_t k = 1; k <= 5; k += 2) {
1509 GemmMicrokernelTester()
1510 .mr(4)
1511 .nr(8)
1512 .kr(1)
1513 .sr(1)
1514 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001515 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001516 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001517 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001518 }
1519 }
1520 }
1521
Marat Dukhande06f492020-04-09 00:19:31 -07001522 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001523 TEST_REQUIRES_X86_SSE;
1524 for (uint32_t n = 9; n < 16; n++) {
1525 for (size_t k = 1; k <= 5; k += 2) {
1526 GemmMicrokernelTester()
1527 .mr(4)
1528 .nr(8)
1529 .kr(1)
1530 .sr(1)
1531 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001532 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001533 .k(k)
1534 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001535 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001536 }
1537 }
1538 }
1539
Marat Dukhande06f492020-04-09 00:19:31 -07001540 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001541 TEST_REQUIRES_X86_SSE;
1542 for (uint32_t n = 9; n < 16; n++) {
1543 for (size_t k = 1; k <= 5; k += 2) {
1544 GemmMicrokernelTester()
1545 .mr(4)
1546 .nr(8)
1547 .kr(1)
1548 .sr(1)
1549 .m(4)
1550 .n(n)
1551 .k(k)
1552 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001553 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001554 }
1555 }
1556 }
1557
Marat Dukhande06f492020-04-09 00:19:31 -07001558 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001559 TEST_REQUIRES_X86_SSE;
1560 for (uint32_t n = 9; n < 16; n++) {
1561 for (size_t k = 1; k <= 5; k += 2) {
1562 for (uint32_t m = 1; m <= 4; m++) {
1563 GemmMicrokernelTester()
1564 .mr(4)
1565 .nr(8)
1566 .kr(1)
1567 .sr(1)
1568 .m(m)
1569 .n(n)
1570 .k(k)
1571 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001572 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001573 }
1574 }
1575 }
1576 }
1577
Marat Dukhande06f492020-04-09 00:19:31 -07001578 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001579 TEST_REQUIRES_X86_SSE;
1580 for (uint32_t n = 16; n <= 24; n += 8) {
1581 for (size_t k = 1; k <= 5; k += 2) {
1582 GemmMicrokernelTester()
1583 .mr(4)
1584 .nr(8)
1585 .kr(1)
1586 .sr(1)
1587 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001588 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001589 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001590 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001591 }
1592 }
1593 }
1594
Marat Dukhande06f492020-04-09 00:19:31 -07001595 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001596 TEST_REQUIRES_X86_SSE;
1597 for (uint32_t n = 16; n <= 24; n += 8) {
1598 for (size_t k = 1; k <= 5; k += 2) {
1599 GemmMicrokernelTester()
1600 .mr(4)
1601 .nr(8)
1602 .kr(1)
1603 .sr(1)
1604 .m(4)
1605 .n(n)
1606 .k(k)
1607 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001608 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001609 }
1610 }
1611 }
1612
Marat Dukhande06f492020-04-09 00:19:31 -07001613 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001614 TEST_REQUIRES_X86_SSE;
1615 for (uint32_t n = 16; n <= 24; n += 8) {
1616 for (size_t k = 1; k <= 5; k += 2) {
1617 GemmMicrokernelTester()
1618 .mr(4)
1619 .nr(8)
1620 .kr(1)
1621 .sr(1)
1622 .m(4)
1623 .n(n)
1624 .k(k)
1625 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001626 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001627 }
1628 }
1629 }
1630
Marat Dukhande06f492020-04-09 00:19:31 -07001631 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001632 TEST_REQUIRES_X86_SSE;
1633 for (uint32_t n = 16; n <= 24; n += 8) {
1634 for (size_t k = 1; k <= 5; k += 2) {
1635 for (uint32_t m = 1; m <= 4; m++) {
1636 GemmMicrokernelTester()
1637 .mr(4)
1638 .nr(8)
1639 .kr(1)
1640 .sr(1)
1641 .m(m)
1642 .n(n)
1643 .k(k)
1644 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001645 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001646 }
1647 }
1648 }
1649 }
1650
Marat Dukhande06f492020-04-09 00:19:31 -07001651 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001652 TEST_REQUIRES_X86_SSE;
1653 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001654 for (uint32_t n = 1; n <= 8; n++) {
1655 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001656 GemmMicrokernelTester()
1657 .mr(4)
1658 .nr(8)
1659 .kr(1)
1660 .sr(1)
1661 .m(m)
1662 .n(n)
1663 .k(k)
1664 .cm_stride(11)
1665 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001666 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001667 }
1668 }
1669 }
1670 }
1671
Marat Dukhande06f492020-04-09 00:19:31 -07001672 TEST(F32_PPMM_MINMAX_4X8__SSE, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001673 TEST_REQUIRES_X86_SSE;
1674 GemmMicrokernelTester()
1675 .mr(4)
1676 .nr(8)
1677 .kr(1)
1678 .sr(1)
1679 .m(4)
1680 .n(8)
1681 .k(1)
1682 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001683 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001684 }
1685
Marat Dukhande06f492020-04-09 00:19:31 -07001686 TEST(F32_PPMM_MINMAX_4X8__SSE, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001687 TEST_REQUIRES_X86_SSE;
1688 GemmMicrokernelTester()
1689 .mr(4)
1690 .nr(8)
1691 .kr(1)
1692 .sr(1)
1693 .m(4)
1694 .n(8)
1695 .k(1)
1696 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001697 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001698 }
1699
Marat Dukhande06f492020-04-09 00:19:31 -07001700 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001701 TEST_REQUIRES_X86_SSE;
1702 GemmMicrokernelTester()
1703 .mr(4)
1704 .nr(8)
1705 .kr(1)
1706 .sr(1)
1707 .m(4)
1708 .n(8)
1709 .k(1)
1710 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001711 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001712 }
1713#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1714
1715
Marat Dukhan4c617792021-12-21 15:47:58 -08001716#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -08001717 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001718 GemmMicrokernelTester()
1719 .mr(4)
1720 .nr(8)
1721 .kr(1)
1722 .sr(1)
1723 .m(4)
1724 .n(8)
1725 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001726 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001727 }
1728
Frank Barchard0725b8d2020-12-07 11:07:35 -08001729 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001730 GemmMicrokernelTester()
1731 .mr(4)
1732 .nr(8)
1733 .kr(1)
1734 .sr(1)
1735 .m(4)
1736 .n(8)
1737 .k(1)
1738 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001739 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001740 }
1741
Frank Barchard0725b8d2020-12-07 11:07:35 -08001742 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001743 GemmMicrokernelTester()
1744 .mr(4)
1745 .nr(8)
1746 .kr(1)
1747 .sr(1)
1748 .m(4)
1749 .n(8)
1750 .k(1)
1751 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001752 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001753 }
1754
Frank Barchard0725b8d2020-12-07 11:07:35 -08001755 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001756 for (uint32_t n = 1; n <= 8; n++) {
1757 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001758 GemmMicrokernelTester()
1759 .mr(4)
1760 .nr(8)
1761 .kr(1)
1762 .sr(1)
1763 .m(m)
1764 .n(n)
1765 .k(1)
1766 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001767 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001768 }
1769 }
1770 }
1771
Frank Barchard0725b8d2020-12-07 11:07:35 -08001772 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_m) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001773 for (uint32_t m = 1; m <= 4; m++) {
1774 GemmMicrokernelTester()
1775 .mr(4)
1776 .nr(8)
1777 .kr(1)
1778 .sr(1)
1779 .m(m)
1780 .n(8)
1781 .k(1)
1782 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001783 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001784 }
1785 }
1786
Frank Barchard0725b8d2020-12-07 11:07:35 -08001787 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_n) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001788 for (uint32_t n = 1; n <= 8; n++) {
1789 GemmMicrokernelTester()
1790 .mr(4)
1791 .nr(8)
1792 .kr(1)
1793 .sr(1)
1794 .m(4)
1795 .n(n)
1796 .k(1)
1797 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001798 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001799 }
1800 }
1801
Frank Barchard0725b8d2020-12-07 11:07:35 -08001802 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001803 for (size_t k = 2; k < 10; k++) {
1804 GemmMicrokernelTester()
1805 .mr(4)
1806 .nr(8)
1807 .kr(1)
1808 .sr(1)
1809 .m(4)
1810 .n(8)
1811 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001812 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001813 }
1814 }
1815
Frank Barchard0725b8d2020-12-07 11:07:35 -08001816 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001817 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001818 for (uint32_t n = 1; n <= 8; n++) {
1819 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001820 GemmMicrokernelTester()
1821 .mr(4)
1822 .nr(8)
1823 .kr(1)
1824 .sr(1)
1825 .m(m)
1826 .n(n)
1827 .k(k)
1828 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001829 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001830 }
1831 }
1832 }
1833 }
1834
Frank Barchard0725b8d2020-12-07 11:07:35 -08001835 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001836 for (uint32_t n = 9; n < 16; n++) {
1837 for (size_t k = 1; k <= 5; k += 2) {
1838 GemmMicrokernelTester()
1839 .mr(4)
1840 .nr(8)
1841 .kr(1)
1842 .sr(1)
1843 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001844 .n(n)
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001845 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001846 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001847 }
1848 }
1849 }
1850
Frank Barchard0725b8d2020-12-07 11:07:35 -08001851 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001852 for (uint32_t n = 9; n < 16; n++) {
1853 for (size_t k = 1; k <= 5; k += 2) {
1854 GemmMicrokernelTester()
1855 .mr(4)
1856 .nr(8)
1857 .kr(1)
1858 .sr(1)
1859 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001860 .n(n)
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001861 .k(k)
1862 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001863 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001864 }
1865 }
1866 }
1867
Frank Barchard0725b8d2020-12-07 11:07:35 -08001868 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001869 for (uint32_t n = 9; n < 16; n++) {
1870 for (size_t k = 1; k <= 5; k += 2) {
1871 GemmMicrokernelTester()
1872 .mr(4)
1873 .nr(8)
1874 .kr(1)
1875 .sr(1)
1876 .m(4)
1877 .n(n)
1878 .k(k)
1879 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001880 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001881 }
1882 }
1883 }
1884
Frank Barchard0725b8d2020-12-07 11:07:35 -08001885 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001886 for (uint32_t n = 9; n < 16; n++) {
1887 for (size_t k = 1; k <= 5; k += 2) {
1888 for (uint32_t m = 1; m <= 4; m++) {
1889 GemmMicrokernelTester()
1890 .mr(4)
1891 .nr(8)
1892 .kr(1)
1893 .sr(1)
1894 .m(m)
1895 .n(n)
1896 .k(k)
1897 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001898 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001899 }
1900 }
1901 }
1902 }
1903
Frank Barchard0725b8d2020-12-07 11:07:35 -08001904 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001905 for (uint32_t n = 16; n <= 24; n += 8) {
1906 for (size_t k = 1; k <= 5; k += 2) {
1907 GemmMicrokernelTester()
1908 .mr(4)
1909 .nr(8)
1910 .kr(1)
1911 .sr(1)
1912 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08001913 .n(n)
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001914 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001915 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001916 }
1917 }
1918 }
1919
Frank Barchard0725b8d2020-12-07 11:07:35 -08001920 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001921 for (uint32_t n = 16; n <= 24; n += 8) {
1922 for (size_t k = 1; k <= 5; k += 2) {
1923 GemmMicrokernelTester()
1924 .mr(4)
1925 .nr(8)
1926 .kr(1)
1927 .sr(1)
1928 .m(4)
1929 .n(n)
1930 .k(k)
1931 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001932 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001933 }
1934 }
1935 }
1936
Frank Barchard0725b8d2020-12-07 11:07:35 -08001937 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001938 for (uint32_t n = 16; n <= 24; n += 8) {
1939 for (size_t k = 1; k <= 5; k += 2) {
1940 GemmMicrokernelTester()
1941 .mr(4)
1942 .nr(8)
1943 .kr(1)
1944 .sr(1)
1945 .m(4)
1946 .n(n)
1947 .k(k)
1948 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001949 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001950 }
1951 }
1952 }
1953
Frank Barchard0725b8d2020-12-07 11:07:35 -08001954 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001955 for (uint32_t n = 16; n <= 24; n += 8) {
1956 for (size_t k = 1; k <= 5; k += 2) {
1957 for (uint32_t m = 1; m <= 4; m++) {
1958 GemmMicrokernelTester()
1959 .mr(4)
1960 .nr(8)
1961 .kr(1)
1962 .sr(1)
1963 .m(m)
1964 .n(n)
1965 .k(k)
1966 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001967 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001968 }
1969 }
1970 }
1971 }
1972
Frank Barchard0725b8d2020-12-07 11:07:35 -08001973 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001974 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08001975 for (uint32_t n = 1; n <= 8; n++) {
1976 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001977 GemmMicrokernelTester()
1978 .mr(4)
1979 .nr(8)
1980 .kr(1)
1981 .sr(1)
1982 .m(m)
1983 .n(n)
1984 .k(k)
1985 .cm_stride(11)
1986 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08001987 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001988 }
1989 }
1990 }
1991 }
1992
Frank Barchard0725b8d2020-12-07 11:07:35 -08001993 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001994 GemmMicrokernelTester()
1995 .mr(4)
1996 .nr(8)
1997 .kr(1)
1998 .sr(1)
1999 .m(4)
2000 .n(8)
2001 .k(1)
2002 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002003 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002004 }
2005
Frank Barchard0725b8d2020-12-07 11:07:35 -08002006 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002007 GemmMicrokernelTester()
2008 .mr(4)
2009 .nr(8)
2010 .kr(1)
2011 .sr(1)
2012 .m(4)
2013 .n(8)
2014 .k(1)
2015 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002016 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002017 }
2018
Frank Barchard0725b8d2020-12-07 11:07:35 -08002019 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002020 GemmMicrokernelTester()
2021 .mr(4)
2022 .nr(8)
2023 .kr(1)
2024 .sr(1)
2025 .m(4)
2026 .n(8)
2027 .k(1)
2028 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002029 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002030 }
Marat Dukhan4c617792021-12-21 15:47:58 -08002031#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002032
2033
Marat Dukhan4c617792021-12-21 15:47:58 -08002034#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -08002035 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002036 GemmMicrokernelTester()
2037 .mr(4)
2038 .nr(8)
2039 .kr(1)
2040 .sr(1)
2041 .m(4)
2042 .n(8)
2043 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002044 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002045 }
2046
Frank Barchard0725b8d2020-12-07 11:07:35 -08002047 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002048 GemmMicrokernelTester()
2049 .mr(4)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(4)
2054 .n(8)
2055 .k(1)
2056 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002057 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002058 }
2059
Frank Barchard0725b8d2020-12-07 11:07:35 -08002060 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002061 GemmMicrokernelTester()
2062 .mr(4)
2063 .nr(8)
2064 .kr(1)
2065 .sr(1)
2066 .m(4)
2067 .n(8)
2068 .k(1)
2069 .a_stride(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002070 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002071 }
2072
Frank Barchard0725b8d2020-12-07 11:07:35 -08002073 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002074 for (uint32_t n = 1; n <= 8; n++) {
2075 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002076 GemmMicrokernelTester()
2077 .mr(4)
2078 .nr(8)
2079 .kr(1)
2080 .sr(1)
2081 .m(m)
2082 .n(n)
2083 .k(1)
2084 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002085 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002086 }
2087 }
2088 }
2089
Frank Barchard0725b8d2020-12-07 11:07:35 -08002090 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_m) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002091 for (uint32_t m = 1; m <= 4; m++) {
2092 GemmMicrokernelTester()
2093 .mr(4)
2094 .nr(8)
2095 .kr(1)
2096 .sr(1)
2097 .m(m)
2098 .n(8)
2099 .k(1)
2100 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002101 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002102 }
2103 }
2104
Frank Barchard0725b8d2020-12-07 11:07:35 -08002105 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_n) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002106 for (uint32_t n = 1; n <= 8; n++) {
2107 GemmMicrokernelTester()
2108 .mr(4)
2109 .nr(8)
2110 .kr(1)
2111 .sr(1)
2112 .m(4)
2113 .n(n)
2114 .k(1)
2115 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002116 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002117 }
2118 }
2119
Frank Barchard0725b8d2020-12-07 11:07:35 -08002120 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002121 for (size_t k = 2; k < 10; k++) {
2122 GemmMicrokernelTester()
2123 .mr(4)
2124 .nr(8)
2125 .kr(1)
2126 .sr(1)
2127 .m(4)
2128 .n(8)
2129 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002130 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002131 }
2132 }
2133
Frank Barchard0725b8d2020-12-07 11:07:35 -08002134 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002135 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002136 for (uint32_t n = 1; n <= 8; n++) {
2137 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002138 GemmMicrokernelTester()
2139 .mr(4)
2140 .nr(8)
2141 .kr(1)
2142 .sr(1)
2143 .m(m)
2144 .n(n)
2145 .k(k)
2146 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002147 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002148 }
2149 }
2150 }
2151 }
2152
Frank Barchard0725b8d2020-12-07 11:07:35 -08002153 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002154 for (uint32_t n = 9; n < 16; n++) {
2155 for (size_t k = 1; k <= 5; k += 2) {
2156 GemmMicrokernelTester()
2157 .mr(4)
2158 .nr(8)
2159 .kr(1)
2160 .sr(1)
2161 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002162 .n(n)
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002163 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002164 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002165 }
2166 }
2167 }
2168
Frank Barchard0725b8d2020-12-07 11:07:35 -08002169 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002170 for (uint32_t n = 9; n < 16; n++) {
2171 for (size_t k = 1; k <= 5; k += 2) {
2172 GemmMicrokernelTester()
2173 .mr(4)
2174 .nr(8)
2175 .kr(1)
2176 .sr(1)
2177 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002178 .n(n)
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002179 .k(k)
2180 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002181 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002182 }
2183 }
2184 }
2185
Frank Barchard0725b8d2020-12-07 11:07:35 -08002186 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002187 for (uint32_t n = 9; n < 16; n++) {
2188 for (size_t k = 1; k <= 5; k += 2) {
2189 GemmMicrokernelTester()
2190 .mr(4)
2191 .nr(8)
2192 .kr(1)
2193 .sr(1)
2194 .m(4)
2195 .n(n)
2196 .k(k)
2197 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002198 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002199 }
2200 }
2201 }
2202
Frank Barchard0725b8d2020-12-07 11:07:35 -08002203 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002204 for (uint32_t n = 9; n < 16; n++) {
2205 for (size_t k = 1; k <= 5; k += 2) {
2206 for (uint32_t m = 1; m <= 4; m++) {
2207 GemmMicrokernelTester()
2208 .mr(4)
2209 .nr(8)
2210 .kr(1)
2211 .sr(1)
2212 .m(m)
2213 .n(n)
2214 .k(k)
2215 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002216 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002217 }
2218 }
2219 }
2220 }
2221
Frank Barchard0725b8d2020-12-07 11:07:35 -08002222 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002223 for (uint32_t n = 16; n <= 24; n += 8) {
2224 for (size_t k = 1; k <= 5; k += 2) {
2225 GemmMicrokernelTester()
2226 .mr(4)
2227 .nr(8)
2228 .kr(1)
2229 .sr(1)
2230 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002231 .n(n)
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002232 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002233 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002234 }
2235 }
2236 }
2237
Frank Barchard0725b8d2020-12-07 11:07:35 -08002238 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002239 for (uint32_t n = 16; n <= 24; n += 8) {
2240 for (size_t k = 1; k <= 5; k += 2) {
2241 GemmMicrokernelTester()
2242 .mr(4)
2243 .nr(8)
2244 .kr(1)
2245 .sr(1)
2246 .m(4)
2247 .n(n)
2248 .k(k)
2249 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002250 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002251 }
2252 }
2253 }
2254
Frank Barchard0725b8d2020-12-07 11:07:35 -08002255 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002256 for (uint32_t n = 16; n <= 24; n += 8) {
2257 for (size_t k = 1; k <= 5; k += 2) {
2258 GemmMicrokernelTester()
2259 .mr(4)
2260 .nr(8)
2261 .kr(1)
2262 .sr(1)
2263 .m(4)
2264 .n(n)
2265 .k(k)
2266 .a_stride(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002267 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002268 }
2269 }
2270 }
2271
Frank Barchard0725b8d2020-12-07 11:07:35 -08002272 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002273 for (uint32_t n = 16; n <= 24; n += 8) {
2274 for (size_t k = 1; k <= 5; k += 2) {
2275 for (uint32_t m = 1; m <= 4; m++) {
2276 GemmMicrokernelTester()
2277 .mr(4)
2278 .nr(8)
2279 .kr(1)
2280 .sr(1)
2281 .m(m)
2282 .n(n)
2283 .k(k)
2284 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002285 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002286 }
2287 }
2288 }
2289 }
2290
Frank Barchard0725b8d2020-12-07 11:07:35 -08002291 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002292 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002293 for (uint32_t n = 1; n <= 8; n++) {
2294 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002295 GemmMicrokernelTester()
2296 .mr(4)
2297 .nr(8)
2298 .kr(1)
2299 .sr(1)
2300 .m(m)
2301 .n(n)
2302 .k(k)
2303 .cm_stride(11)
2304 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002305 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002306 }
2307 }
2308 }
2309 }
2310
Frank Barchard0725b8d2020-12-07 11:07:35 -08002311 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002312 GemmMicrokernelTester()
2313 .mr(4)
2314 .nr(8)
2315 .kr(1)
2316 .sr(1)
2317 .m(4)
2318 .n(8)
2319 .k(1)
2320 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002321 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002322 }
2323
Frank Barchard0725b8d2020-12-07 11:07:35 -08002324 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002325 GemmMicrokernelTester()
2326 .mr(4)
2327 .nr(8)
2328 .kr(1)
2329 .sr(1)
2330 .m(4)
2331 .n(8)
2332 .k(1)
2333 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002334 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002335 }
2336
Frank Barchard0725b8d2020-12-07 11:07:35 -08002337 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002338 GemmMicrokernelTester()
2339 .mr(4)
2340 .nr(8)
2341 .kr(1)
2342 .sr(1)
2343 .m(4)
2344 .n(8)
2345 .k(1)
2346 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -08002347 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002348 }
Marat Dukhan4c617792021-12-21 15:47:58 -08002349#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002350
2351
Marat Dukhande06f492020-04-09 00:19:31 -07002352TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002353 GemmMicrokernelTester()
2354 .mr(4)
2355 .nr(2)
2356 .kr(1)
2357 .sr(1)
2358 .m(4)
2359 .n(2)
2360 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002361 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002362}
2363
Marat Dukhande06f492020-04-09 00:19:31 -07002364TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002365 GemmMicrokernelTester()
2366 .mr(4)
2367 .nr(2)
2368 .kr(1)
2369 .sr(1)
2370 .m(4)
2371 .n(2)
2372 .k(1)
2373 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002374 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002375}
2376
Marat Dukhande06f492020-04-09 00:19:31 -07002377TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002378 GemmMicrokernelTester()
2379 .mr(4)
2380 .nr(2)
2381 .kr(1)
2382 .sr(1)
2383 .m(4)
2384 .n(2)
2385 .k(1)
2386 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002387 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002388}
2389
Marat Dukhande06f492020-04-09 00:19:31 -07002390TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002391 for (uint32_t n = 1; n <= 2; n++) {
2392 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002393 GemmMicrokernelTester()
2394 .mr(4)
2395 .nr(2)
2396 .kr(1)
2397 .sr(1)
2398 .m(m)
2399 .n(n)
2400 .k(1)
2401 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002402 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002403 }
2404 }
2405}
2406
Marat Dukhande06f492020-04-09 00:19:31 -07002407TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002408 for (uint32_t m = 1; m <= 4; m++) {
2409 GemmMicrokernelTester()
2410 .mr(4)
2411 .nr(2)
2412 .kr(1)
2413 .sr(1)
2414 .m(m)
2415 .n(2)
2416 .k(1)
2417 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002418 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002419 }
2420}
2421
Marat Dukhande06f492020-04-09 00:19:31 -07002422TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002423 for (uint32_t n = 1; n <= 2; n++) {
2424 GemmMicrokernelTester()
2425 .mr(4)
2426 .nr(2)
2427 .kr(1)
2428 .sr(1)
2429 .m(4)
2430 .n(n)
2431 .k(1)
2432 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002433 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002434 }
2435}
2436
Marat Dukhande06f492020-04-09 00:19:31 -07002437TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002438 for (size_t k = 2; k < 10; k++) {
2439 GemmMicrokernelTester()
2440 .mr(4)
2441 .nr(2)
2442 .kr(1)
2443 .sr(1)
2444 .m(4)
2445 .n(2)
2446 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002447 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002448 }
2449}
2450
Marat Dukhande06f492020-04-09 00:19:31 -07002451TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002452 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002453 for (uint32_t n = 1; n <= 2; n++) {
2454 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002455 GemmMicrokernelTester()
2456 .mr(4)
2457 .nr(2)
2458 .kr(1)
2459 .sr(1)
2460 .m(m)
2461 .n(n)
2462 .k(k)
2463 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002464 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002465 }
2466 }
2467 }
2468}
2469
Marat Dukhande06f492020-04-09 00:19:31 -07002470TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002471 for (uint32_t n = 3; n < 4; n++) {
2472 for (size_t k = 1; k <= 5; k += 2) {
2473 GemmMicrokernelTester()
2474 .mr(4)
2475 .nr(2)
2476 .kr(1)
2477 .sr(1)
2478 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002479 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002480 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002481 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002482 }
2483 }
2484}
2485
Marat Dukhande06f492020-04-09 00:19:31 -07002486TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002487 for (uint32_t n = 3; n < 4; n++) {
2488 for (size_t k = 1; k <= 5; k += 2) {
2489 GemmMicrokernelTester()
2490 .mr(4)
2491 .nr(2)
2492 .kr(1)
2493 .sr(1)
2494 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002495 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002496 .k(k)
2497 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002498 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002499 }
2500 }
2501}
2502
Marat Dukhande06f492020-04-09 00:19:31 -07002503TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002504 for (uint32_t n = 3; n < 4; n++) {
2505 for (size_t k = 1; k <= 5; k += 2) {
2506 GemmMicrokernelTester()
2507 .mr(4)
2508 .nr(2)
2509 .kr(1)
2510 .sr(1)
2511 .m(4)
2512 .n(n)
2513 .k(k)
2514 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002515 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002516 }
2517 }
2518}
2519
Marat Dukhande06f492020-04-09 00:19:31 -07002520TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002521 for (uint32_t n = 3; n < 4; n++) {
2522 for (size_t k = 1; k <= 5; k += 2) {
2523 for (uint32_t m = 1; m <= 4; m++) {
2524 GemmMicrokernelTester()
2525 .mr(4)
2526 .nr(2)
2527 .kr(1)
2528 .sr(1)
2529 .m(m)
2530 .n(n)
2531 .k(k)
2532 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002533 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002534 }
2535 }
2536 }
2537}
2538
Marat Dukhande06f492020-04-09 00:19:31 -07002539TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002540 for (uint32_t n = 4; n <= 6; n += 2) {
2541 for (size_t k = 1; k <= 5; k += 2) {
2542 GemmMicrokernelTester()
2543 .mr(4)
2544 .nr(2)
2545 .kr(1)
2546 .sr(1)
2547 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002548 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002549 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002550 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002551 }
2552 }
2553}
2554
Marat Dukhande06f492020-04-09 00:19:31 -07002555TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002556 for (uint32_t n = 4; n <= 6; n += 2) {
2557 for (size_t k = 1; k <= 5; k += 2) {
2558 GemmMicrokernelTester()
2559 .mr(4)
2560 .nr(2)
2561 .kr(1)
2562 .sr(1)
2563 .m(4)
2564 .n(n)
2565 .k(k)
2566 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002567 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002568 }
2569 }
2570}
2571
Marat Dukhande06f492020-04-09 00:19:31 -07002572TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002573 for (uint32_t n = 4; n <= 6; n += 2) {
2574 for (size_t k = 1; k <= 5; k += 2) {
2575 GemmMicrokernelTester()
2576 .mr(4)
2577 .nr(2)
2578 .kr(1)
2579 .sr(1)
2580 .m(4)
2581 .n(n)
2582 .k(k)
2583 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002584 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002585 }
2586 }
2587}
2588
Marat Dukhande06f492020-04-09 00:19:31 -07002589TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002590 for (uint32_t n = 4; n <= 6; n += 2) {
2591 for (size_t k = 1; k <= 5; k += 2) {
2592 for (uint32_t m = 1; m <= 4; m++) {
2593 GemmMicrokernelTester()
2594 .mr(4)
2595 .nr(2)
2596 .kr(1)
2597 .sr(1)
2598 .m(m)
2599 .n(n)
2600 .k(k)
2601 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002602 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002603 }
2604 }
2605 }
2606}
2607
Marat Dukhande06f492020-04-09 00:19:31 -07002608TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002609 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002610 for (uint32_t n = 1; n <= 2; n++) {
2611 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002612 GemmMicrokernelTester()
2613 .mr(4)
2614 .nr(2)
2615 .kr(1)
2616 .sr(1)
2617 .m(m)
2618 .n(n)
2619 .k(k)
2620 .cm_stride(5)
2621 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002622 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002623 }
2624 }
2625 }
2626}
2627
Marat Dukhande06f492020-04-09 00:19:31 -07002628TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002629 GemmMicrokernelTester()
2630 .mr(4)
2631 .nr(2)
2632 .kr(1)
2633 .sr(1)
2634 .m(4)
2635 .n(2)
2636 .k(1)
2637 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002638 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002639}
2640
Marat Dukhande06f492020-04-09 00:19:31 -07002641TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002642 GemmMicrokernelTester()
2643 .mr(4)
2644 .nr(2)
2645 .kr(1)
2646 .sr(1)
2647 .m(4)
2648 .n(2)
2649 .k(1)
2650 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002651 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002652}
2653
Marat Dukhande06f492020-04-09 00:19:31 -07002654TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002655 GemmMicrokernelTester()
2656 .mr(4)
2657 .nr(2)
2658 .kr(1)
2659 .sr(1)
2660 .m(4)
2661 .n(2)
2662 .k(1)
2663 .cm_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002664 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002665}
2666
2667
Marat Dukhande06f492020-04-09 00:19:31 -07002668TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002669 GemmMicrokernelTester()
2670 .mr(2)
2671 .nr(4)
2672 .kr(1)
2673 .sr(1)
2674 .m(2)
2675 .n(4)
2676 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002677 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002678}
2679
Marat Dukhande06f492020-04-09 00:19:31 -07002680TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002681 GemmMicrokernelTester()
2682 .mr(2)
2683 .nr(4)
2684 .kr(1)
2685 .sr(1)
2686 .m(2)
2687 .n(4)
2688 .k(1)
2689 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002690 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002691}
2692
Marat Dukhande06f492020-04-09 00:19:31 -07002693TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002694 GemmMicrokernelTester()
2695 .mr(2)
2696 .nr(4)
2697 .kr(1)
2698 .sr(1)
2699 .m(2)
2700 .n(4)
2701 .k(1)
2702 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002703 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002704}
2705
Marat Dukhande06f492020-04-09 00:19:31 -07002706TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002707 for (uint32_t n = 1; n <= 4; n++) {
2708 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002709 GemmMicrokernelTester()
2710 .mr(2)
2711 .nr(4)
2712 .kr(1)
2713 .sr(1)
2714 .m(m)
2715 .n(n)
2716 .k(1)
2717 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002718 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002719 }
2720 }
2721}
2722
Marat Dukhande06f492020-04-09 00:19:31 -07002723TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002724 for (uint32_t m = 1; m <= 2; m++) {
2725 GemmMicrokernelTester()
2726 .mr(2)
2727 .nr(4)
2728 .kr(1)
2729 .sr(1)
2730 .m(m)
2731 .n(4)
2732 .k(1)
2733 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002734 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002735 }
2736}
2737
Marat Dukhande06f492020-04-09 00:19:31 -07002738TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002739 for (uint32_t n = 1; n <= 4; n++) {
2740 GemmMicrokernelTester()
2741 .mr(2)
2742 .nr(4)
2743 .kr(1)
2744 .sr(1)
2745 .m(2)
2746 .n(n)
2747 .k(1)
2748 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002749 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002750 }
2751}
2752
Marat Dukhande06f492020-04-09 00:19:31 -07002753TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002754 for (size_t k = 2; k < 10; k++) {
2755 GemmMicrokernelTester()
2756 .mr(2)
2757 .nr(4)
2758 .kr(1)
2759 .sr(1)
2760 .m(2)
2761 .n(4)
2762 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002763 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002764 }
2765}
2766
Marat Dukhande06f492020-04-09 00:19:31 -07002767TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002768 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002769 for (uint32_t n = 1; n <= 4; n++) {
2770 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002771 GemmMicrokernelTester()
2772 .mr(2)
2773 .nr(4)
2774 .kr(1)
2775 .sr(1)
2776 .m(m)
2777 .n(n)
2778 .k(k)
2779 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002780 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002781 }
2782 }
2783 }
2784}
2785
Marat Dukhande06f492020-04-09 00:19:31 -07002786TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002787 for (uint32_t n = 5; n < 8; n++) {
2788 for (size_t k = 1; k <= 5; k += 2) {
2789 GemmMicrokernelTester()
2790 .mr(2)
2791 .nr(4)
2792 .kr(1)
2793 .sr(1)
2794 .m(2)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002795 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002796 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002797 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002798 }
2799 }
2800}
2801
Marat Dukhande06f492020-04-09 00:19:31 -07002802TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002803 for (uint32_t n = 5; n < 8; n++) {
2804 for (size_t k = 1; k <= 5; k += 2) {
2805 GemmMicrokernelTester()
2806 .mr(2)
2807 .nr(4)
2808 .kr(1)
2809 .sr(1)
2810 .m(2)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002811 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002812 .k(k)
2813 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002814 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002815 }
2816 }
2817}
2818
Marat Dukhande06f492020-04-09 00:19:31 -07002819TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002820 for (uint32_t n = 5; n < 8; n++) {
2821 for (size_t k = 1; k <= 5; k += 2) {
2822 GemmMicrokernelTester()
2823 .mr(2)
2824 .nr(4)
2825 .kr(1)
2826 .sr(1)
2827 .m(2)
2828 .n(n)
2829 .k(k)
2830 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002831 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002832 }
2833 }
2834}
2835
Marat Dukhande06f492020-04-09 00:19:31 -07002836TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002837 for (uint32_t n = 5; n < 8; n++) {
2838 for (size_t k = 1; k <= 5; k += 2) {
2839 for (uint32_t m = 1; m <= 2; m++) {
2840 GemmMicrokernelTester()
2841 .mr(2)
2842 .nr(4)
2843 .kr(1)
2844 .sr(1)
2845 .m(m)
2846 .n(n)
2847 .k(k)
2848 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002849 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002850 }
2851 }
2852 }
2853}
2854
Marat Dukhande06f492020-04-09 00:19:31 -07002855TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002856 for (uint32_t n = 8; n <= 12; n += 4) {
2857 for (size_t k = 1; k <= 5; k += 2) {
2858 GemmMicrokernelTester()
2859 .mr(2)
2860 .nr(4)
2861 .kr(1)
2862 .sr(1)
2863 .m(2)
Frank Barchard5e1a3032022-01-14 13:12:41 -08002864 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002865 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002866 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002867 }
2868 }
2869}
2870
Marat Dukhande06f492020-04-09 00:19:31 -07002871TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002872 for (uint32_t n = 8; n <= 12; n += 4) {
2873 for (size_t k = 1; k <= 5; k += 2) {
2874 GemmMicrokernelTester()
2875 .mr(2)
2876 .nr(4)
2877 .kr(1)
2878 .sr(1)
2879 .m(2)
2880 .n(n)
2881 .k(k)
2882 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002883 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002884 }
2885 }
2886}
2887
Marat Dukhande06f492020-04-09 00:19:31 -07002888TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002889 for (uint32_t n = 8; n <= 12; n += 4) {
2890 for (size_t k = 1; k <= 5; k += 2) {
2891 GemmMicrokernelTester()
2892 .mr(2)
2893 .nr(4)
2894 .kr(1)
2895 .sr(1)
2896 .m(2)
2897 .n(n)
2898 .k(k)
2899 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002900 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002901 }
2902 }
2903}
2904
Marat Dukhande06f492020-04-09 00:19:31 -07002905TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002906 for (uint32_t n = 8; n <= 12; n += 4) {
2907 for (size_t k = 1; k <= 5; k += 2) {
2908 for (uint32_t m = 1; m <= 2; m++) {
2909 GemmMicrokernelTester()
2910 .mr(2)
2911 .nr(4)
2912 .kr(1)
2913 .sr(1)
2914 .m(m)
2915 .n(n)
2916 .k(k)
2917 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002918 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002919 }
2920 }
2921 }
2922}
2923
Marat Dukhande06f492020-04-09 00:19:31 -07002924TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002925 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08002926 for (uint32_t n = 1; n <= 4; n++) {
2927 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002928 GemmMicrokernelTester()
2929 .mr(2)
2930 .nr(4)
2931 .kr(1)
2932 .sr(1)
2933 .m(m)
2934 .n(n)
2935 .k(k)
2936 .cm_stride(7)
2937 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002938 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002939 }
2940 }
2941 }
2942}
2943
Marat Dukhande06f492020-04-09 00:19:31 -07002944TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002945 GemmMicrokernelTester()
2946 .mr(2)
2947 .nr(4)
2948 .kr(1)
2949 .sr(1)
2950 .m(2)
2951 .n(4)
2952 .k(1)
2953 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002954 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002955}
2956
Marat Dukhande06f492020-04-09 00:19:31 -07002957TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002958 GemmMicrokernelTester()
2959 .mr(2)
2960 .nr(4)
2961 .kr(1)
2962 .sr(1)
2963 .m(2)
2964 .n(4)
2965 .k(1)
2966 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002967 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002968}
2969
Marat Dukhande06f492020-04-09 00:19:31 -07002970TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002971 GemmMicrokernelTester()
2972 .mr(2)
2973 .nr(4)
2974 .kr(1)
2975 .sr(1)
2976 .m(2)
2977 .n(4)
2978 .k(1)
2979 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002980 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002981}
2982
2983
Marat Dukhande06f492020-04-09 00:19:31 -07002984TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002985 GemmMicrokernelTester()
2986 .mr(4)
2987 .nr(4)
2988 .kr(1)
2989 .sr(1)
2990 .m(4)
2991 .n(4)
2992 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002993 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002994}
2995
Marat Dukhande06f492020-04-09 00:19:31 -07002996TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002997 GemmMicrokernelTester()
2998 .mr(4)
2999 .nr(4)
3000 .kr(1)
3001 .sr(1)
3002 .m(4)
3003 .n(4)
3004 .k(1)
3005 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003006 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003007}
3008
Marat Dukhande06f492020-04-09 00:19:31 -07003009TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003010 GemmMicrokernelTester()
3011 .mr(4)
3012 .nr(4)
3013 .kr(1)
3014 .sr(1)
3015 .m(4)
3016 .n(4)
3017 .k(1)
3018 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003019 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003020}
3021
Marat Dukhande06f492020-04-09 00:19:31 -07003022TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08003023 for (uint32_t n = 1; n <= 4; n++) {
3024 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003025 GemmMicrokernelTester()
3026 .mr(4)
3027 .nr(4)
3028 .kr(1)
3029 .sr(1)
3030 .m(m)
3031 .n(n)
3032 .k(1)
3033 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003034 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003035 }
3036 }
3037}
3038
Marat Dukhande06f492020-04-09 00:19:31 -07003039TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003040 for (uint32_t m = 1; m <= 4; m++) {
3041 GemmMicrokernelTester()
3042 .mr(4)
3043 .nr(4)
3044 .kr(1)
3045 .sr(1)
3046 .m(m)
3047 .n(4)
3048 .k(1)
3049 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003050 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003051 }
3052}
3053
Marat Dukhande06f492020-04-09 00:19:31 -07003054TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003055 for (uint32_t n = 1; n <= 4; n++) {
3056 GemmMicrokernelTester()
3057 .mr(4)
3058 .nr(4)
3059 .kr(1)
3060 .sr(1)
3061 .m(4)
3062 .n(n)
3063 .k(1)
3064 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003065 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003066 }
3067}
3068
Marat Dukhande06f492020-04-09 00:19:31 -07003069TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003070 for (size_t k = 2; k < 10; k++) {
3071 GemmMicrokernelTester()
3072 .mr(4)
3073 .nr(4)
3074 .kr(1)
3075 .sr(1)
3076 .m(4)
3077 .n(4)
3078 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003079 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003080 }
3081}
3082
Marat Dukhande06f492020-04-09 00:19:31 -07003083TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003084 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08003085 for (uint32_t n = 1; n <= 4; n++) {
3086 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003087 GemmMicrokernelTester()
3088 .mr(4)
3089 .nr(4)
3090 .kr(1)
3091 .sr(1)
3092 .m(m)
3093 .n(n)
3094 .k(k)
3095 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003096 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003097 }
3098 }
3099 }
3100}
3101
Marat Dukhande06f492020-04-09 00:19:31 -07003102TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003103 for (uint32_t n = 5; n < 8; n++) {
3104 for (size_t k = 1; k <= 5; k += 2) {
3105 GemmMicrokernelTester()
3106 .mr(4)
3107 .nr(4)
3108 .kr(1)
3109 .sr(1)
3110 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08003111 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003112 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003113 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003114 }
3115 }
3116}
3117
Marat Dukhande06f492020-04-09 00:19:31 -07003118TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003119 for (uint32_t n = 5; n < 8; n++) {
3120 for (size_t k = 1; k <= 5; k += 2) {
3121 GemmMicrokernelTester()
3122 .mr(4)
3123 .nr(4)
3124 .kr(1)
3125 .sr(1)
3126 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08003127 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003128 .k(k)
3129 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003130 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003131 }
3132 }
3133}
3134
Marat Dukhande06f492020-04-09 00:19:31 -07003135TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003136 for (uint32_t n = 5; n < 8; n++) {
3137 for (size_t k = 1; k <= 5; k += 2) {
3138 GemmMicrokernelTester()
3139 .mr(4)
3140 .nr(4)
3141 .kr(1)
3142 .sr(1)
3143 .m(4)
3144 .n(n)
3145 .k(k)
3146 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003147 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003148 }
3149 }
3150}
3151
Marat Dukhande06f492020-04-09 00:19:31 -07003152TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003153 for (uint32_t n = 5; n < 8; n++) {
3154 for (size_t k = 1; k <= 5; k += 2) {
3155 for (uint32_t m = 1; m <= 4; m++) {
3156 GemmMicrokernelTester()
3157 .mr(4)
3158 .nr(4)
3159 .kr(1)
3160 .sr(1)
3161 .m(m)
3162 .n(n)
3163 .k(k)
3164 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003165 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003166 }
3167 }
3168 }
3169}
3170
Marat Dukhande06f492020-04-09 00:19:31 -07003171TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003172 for (uint32_t n = 8; n <= 12; n += 4) {
3173 for (size_t k = 1; k <= 5; k += 2) {
3174 GemmMicrokernelTester()
3175 .mr(4)
3176 .nr(4)
3177 .kr(1)
3178 .sr(1)
3179 .m(4)
Frank Barchard5e1a3032022-01-14 13:12:41 -08003180 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003181 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003182 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003183 }
3184 }
3185}
3186
Marat Dukhande06f492020-04-09 00:19:31 -07003187TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003188 for (uint32_t n = 8; n <= 12; n += 4) {
3189 for (size_t k = 1; k <= 5; k += 2) {
3190 GemmMicrokernelTester()
3191 .mr(4)
3192 .nr(4)
3193 .kr(1)
3194 .sr(1)
3195 .m(4)
3196 .n(n)
3197 .k(k)
3198 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003199 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003200 }
3201 }
3202}
3203
Marat Dukhande06f492020-04-09 00:19:31 -07003204TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003205 for (uint32_t n = 8; n <= 12; n += 4) {
3206 for (size_t k = 1; k <= 5; k += 2) {
3207 GemmMicrokernelTester()
3208 .mr(4)
3209 .nr(4)
3210 .kr(1)
3211 .sr(1)
3212 .m(4)
3213 .n(n)
3214 .k(k)
3215 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003216 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003217 }
3218 }
3219}
3220
Marat Dukhande06f492020-04-09 00:19:31 -07003221TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003222 for (uint32_t n = 8; n <= 12; n += 4) {
3223 for (size_t k = 1; k <= 5; k += 2) {
3224 for (uint32_t m = 1; m <= 4; m++) {
3225 GemmMicrokernelTester()
3226 .mr(4)
3227 .nr(4)
3228 .kr(1)
3229 .sr(1)
3230 .m(m)
3231 .n(n)
3232 .k(k)
3233 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003234 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003235 }
3236 }
3237 }
3238}
3239
Marat Dukhande06f492020-04-09 00:19:31 -07003240TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003241 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08003242 for (uint32_t n = 1; n <= 4; n++) {
3243 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003244 GemmMicrokernelTester()
3245 .mr(4)
3246 .nr(4)
3247 .kr(1)
3248 .sr(1)
3249 .m(m)
3250 .n(n)
3251 .k(k)
3252 .cm_stride(7)
3253 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003254 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003255 }
3256 }
3257 }
3258}
3259
Marat Dukhande06f492020-04-09 00:19:31 -07003260TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003261 GemmMicrokernelTester()
3262 .mr(4)
3263 .nr(4)
3264 .kr(1)
3265 .sr(1)
3266 .m(4)
3267 .n(4)
3268 .k(1)
3269 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003270 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003271}
3272
Marat Dukhande06f492020-04-09 00:19:31 -07003273TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003274 GemmMicrokernelTester()
3275 .mr(4)
3276 .nr(4)
3277 .kr(1)
3278 .sr(1)
3279 .m(4)
3280 .n(4)
3281 .k(1)
3282 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003283 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003284}
3285
Marat Dukhande06f492020-04-09 00:19:31 -07003286TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003287 GemmMicrokernelTester()
3288 .mr(4)
3289 .nr(4)
3290 .kr(1)
3291 .sr(1)
3292 .m(4)
3293 .n(4)
3294 .k(1)
3295 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003296 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003297}
3298
3299
Marat Dukhande06f492020-04-09 00:19:31 -07003300TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003301 GemmMicrokernelTester()
3302 .mr(3)
3303 .nr(3)
3304 .kr(1)
3305 .sr(1)
3306 .m(3)
3307 .n(3)
3308 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003309 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003310}
3311
Marat Dukhande06f492020-04-09 00:19:31 -07003312TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003313 GemmMicrokernelTester()
3314 .mr(3)
3315 .nr(3)
3316 .kr(1)
3317 .sr(1)
3318 .m(3)
3319 .n(3)
3320 .k(1)
3321 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003322 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003323}
3324
Marat Dukhande06f492020-04-09 00:19:31 -07003325TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003326 GemmMicrokernelTester()
3327 .mr(3)
3328 .nr(3)
3329 .kr(1)
3330 .sr(1)
3331 .m(3)
3332 .n(3)
3333 .k(1)
3334 .a_stride(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003335 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003336}
3337
Marat Dukhande06f492020-04-09 00:19:31 -07003338TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08003339 for (uint32_t n = 1; n <= 3; n++) {
3340 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003341 GemmMicrokernelTester()
3342 .mr(3)
3343 .nr(3)
3344 .kr(1)
3345 .sr(1)
3346 .m(m)
3347 .n(n)
3348 .k(1)
3349 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003350 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003351 }
3352 }
3353}
3354
Marat Dukhande06f492020-04-09 00:19:31 -07003355TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003356 for (uint32_t m = 1; m <= 3; m++) {
3357 GemmMicrokernelTester()
3358 .mr(3)
3359 .nr(3)
3360 .kr(1)
3361 .sr(1)
3362 .m(m)
3363 .n(3)
3364 .k(1)
3365 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003366 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003367 }
3368}
3369
Marat Dukhande06f492020-04-09 00:19:31 -07003370TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003371 for (uint32_t n = 1; n <= 3; n++) {
3372 GemmMicrokernelTester()
3373 .mr(3)
3374 .nr(3)
3375 .kr(1)
3376 .sr(1)
3377 .m(3)
3378 .n(n)
3379 .k(1)
3380 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003381 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003382 }
3383}
3384
Marat Dukhande06f492020-04-09 00:19:31 -07003385TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003386 for (size_t k = 2; k < 10; k++) {
3387 GemmMicrokernelTester()
3388 .mr(3)
3389 .nr(3)
3390 .kr(1)
3391 .sr(1)
3392 .m(3)
3393 .n(3)
3394 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003395 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003396 }
3397}
3398
Marat Dukhande06f492020-04-09 00:19:31 -07003399TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003400 for (size_t k = 2; k < 10; k++) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08003401 for (uint32_t n = 1; n <= 3; n++) {
3402 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003403 GemmMicrokernelTester()
3404 .mr(3)
3405 .nr(3)
3406 .kr(1)
3407 .sr(1)
3408 .m(m)
3409 .n(n)
3410 .k(k)
3411 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003412 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003413 }
3414 }
3415 }
3416}
3417
Marat Dukhande06f492020-04-09 00:19:31 -07003418TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003419 for (uint32_t n = 4; n < 6; n++) {
3420 for (size_t k = 1; k <= 5; k += 2) {
3421 GemmMicrokernelTester()
3422 .mr(3)
3423 .nr(3)
3424 .kr(1)
3425 .sr(1)
3426 .m(3)
Frank Barchard5e1a3032022-01-14 13:12:41 -08003427 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003428 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003429 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003430 }
3431 }
3432}
3433
Marat Dukhande06f492020-04-09 00:19:31 -07003434TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003435 for (uint32_t n = 4; n < 6; n++) {
3436 for (size_t k = 1; k <= 5; k += 2) {
3437 GemmMicrokernelTester()
3438 .mr(3)
3439 .nr(3)
3440 .kr(1)
3441 .sr(1)
3442 .m(3)
Frank Barchard5e1a3032022-01-14 13:12:41 -08003443 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003444 .k(k)
3445 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003446 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003447 }
3448 }
3449}
3450
Marat Dukhande06f492020-04-09 00:19:31 -07003451TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003452 for (uint32_t n = 4; n < 6; n++) {
3453 for (size_t k = 1; k <= 5; k += 2) {
3454 GemmMicrokernelTester()
3455 .mr(3)
3456 .nr(3)
3457 .kr(1)
3458 .sr(1)
3459 .m(3)
3460 .n(n)
3461 .k(k)
3462 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003463 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003464 }
3465 }
3466}
3467
Marat Dukhande06f492020-04-09 00:19:31 -07003468TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003469 for (uint32_t n = 4; n < 6; n++) {
3470 for (size_t k = 1; k <= 5; k += 2) {
3471 for (uint32_t m = 1; m <= 3; m++) {
3472 GemmMicrokernelTester()
3473 .mr(3)
3474 .nr(3)
3475 .kr(1)
3476 .sr(1)
3477 .m(m)
3478 .n(n)
3479 .k(k)
3480 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003481 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003482 }
3483 }
3484 }
3485}
3486
Marat Dukhande06f492020-04-09 00:19:31 -07003487TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003488 for (uint32_t n = 6; n <= 9; n += 3) {
3489 for (size_t k = 1; k <= 5; k += 2) {
3490 GemmMicrokernelTester()
3491 .mr(3)
3492 .nr(3)
3493 .kr(1)
3494 .sr(1)
3495 .m(3)
Frank Barchard5e1a3032022-01-14 13:12:41 -08003496 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003497 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003498 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003499 }
3500 }
3501}
3502
Marat Dukhande06f492020-04-09 00:19:31 -07003503TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003504 for (uint32_t n = 6; n <= 9; n += 3) {
3505 for (size_t k = 1; k <= 5; k += 2) {
3506 GemmMicrokernelTester()
3507 .mr(3)
3508 .nr(3)
3509 .kr(1)
3510 .sr(1)
3511 .m(3)
3512 .n(n)
3513 .k(k)
3514 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003515 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003516 }
3517 }
3518}
3519
Marat Dukhande06f492020-04-09 00:19:31 -07003520TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003521 for (uint32_t n = 6; n <= 9; n += 3) {
3522 for (size_t k = 1; k <= 5; k += 2) {
3523 GemmMicrokernelTester()
3524 .mr(3)
3525 .nr(3)
3526 .kr(1)
3527 .sr(1)
3528 .m(3)
3529 .n(n)
3530 .k(k)
3531 .a_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003532 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003533 }
3534 }
3535}
3536
Marat Dukhande06f492020-04-09 00:19:31 -07003537TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003538 for (uint32_t n = 6; n <= 9; n += 3) {
3539 for (size_t k = 1; k <= 5; k += 2) {
3540 for (uint32_t m = 1; m <= 3; m++) {
3541 GemmMicrokernelTester()
3542 .mr(3)
3543 .nr(3)
3544 .kr(1)
3545 .sr(1)
3546 .m(m)
3547 .n(n)
3548 .k(k)
3549 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003550 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003551 }
3552 }
3553 }
3554}
3555
Marat Dukhande06f492020-04-09 00:19:31 -07003556TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003557 for (size_t k = 1; k <= 5; k += 2) {
Frank Barchard5e1a3032022-01-14 13:12:41 -08003558 for (uint32_t n = 1; n <= 3; n++) {
3559 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003560 GemmMicrokernelTester()
3561 .mr(3)
3562 .nr(3)
3563 .kr(1)
3564 .sr(1)
3565 .m(m)
3566 .n(n)
3567 .k(k)
3568 .cm_stride(5)
3569 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003570 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003571 }
3572 }
3573 }
3574}
3575
Marat Dukhande06f492020-04-09 00:19:31 -07003576TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003577 GemmMicrokernelTester()
3578 .mr(3)
3579 .nr(3)
3580 .kr(1)
3581 .sr(1)
3582 .m(3)
3583 .n(3)
3584 .k(1)
3585 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003586 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003587}
3588
Marat Dukhande06f492020-04-09 00:19:31 -07003589TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003590 GemmMicrokernelTester()
3591 .mr(3)
3592 .nr(3)
3593 .kr(1)
3594 .sr(1)
3595 .m(3)
3596 .n(3)
3597 .k(1)
3598 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003599 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003600}
3601
Marat Dukhande06f492020-04-09 00:19:31 -07003602TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003603 GemmMicrokernelTester()
3604 .mr(3)
3605 .nr(3)
3606 .kr(1)
3607 .sr(1)
3608 .m(3)
3609 .n(3)
3610 .k(1)
3611 .cm_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003612 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003613}