blob: 24e7751486052ecd1ffdc02a3360719b0a07d7f1 [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-ppmm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
22#include "gemm-microkernel-tester.h"
23
24
25#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070026 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -070036 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070037 }
38
Marat Dukhande06f492020-04-09 00:19:31 -070039 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070040 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -070050 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070051 }
52
Marat Dukhande06f492020-04-09 00:19:31 -070053 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070054 TEST_REQUIRES_ARM_NEON;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(1)
63 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -070064 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070065 }
66
Marat Dukhande06f492020-04-09 00:19:31 -070067 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070068 TEST_REQUIRES_ARM_NEON;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(1)
79 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -070080 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070081 }
82 }
83 }
84
Marat Dukhande06f492020-04-09 00:19:31 -070085 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070086 TEST_REQUIRES_ARM_NEON;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(1)
96 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -070097 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070098 }
99 }
100
Marat Dukhande06f492020-04-09 00:19:31 -0700101 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700102 TEST_REQUIRES_ARM_NEON;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(1)
112 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700113 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700114 }
115 }
116
Marat Dukhande06f492020-04-09 00:19:31 -0700117 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700118 TEST_REQUIRES_ARM_NEON;
119 for (size_t k = 2; k < 10; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700128 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700129 }
130 }
131
Marat Dukhande06f492020-04-09 00:19:31 -0700132 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700133 TEST_REQUIRES_ARM_NEON;
134 for (size_t k = 2; k < 10; k++) {
135 for (uint32_t m = 1; m <= 4; m++) {
136 for (uint32_t n = 1; n <= 8; n++) {
137 GemmMicrokernelTester()
138 .mr(4)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(m)
143 .n(n)
144 .k(k)
145 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700146 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700147 }
148 }
149 }
150 }
151
Marat Dukhande06f492020-04-09 00:19:31 -0700152 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700153 TEST_REQUIRES_ARM_NEON;
154 for (uint32_t n = 9; n < 16; n++) {
155 for (size_t k = 1; k <= 5; k += 2) {
156 GemmMicrokernelTester()
157 .mr(4)
158 .nr(8)
159 .kr(1)
160 .sr(1)
161 .m(4)
162 .n(8)
163 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700164 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700165 }
166 }
167 }
168
Marat Dukhande06f492020-04-09 00:19:31 -0700169 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700170 TEST_REQUIRES_ARM_NEON;
171 for (uint32_t n = 9; n < 16; n++) {
172 for (size_t k = 1; k <= 5; k += 2) {
173 GemmMicrokernelTester()
174 .mr(4)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(4)
179 .n(8)
180 .k(k)
181 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700182 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700183 }
184 }
185 }
186
Marat Dukhande06f492020-04-09 00:19:31 -0700187 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700188 TEST_REQUIRES_ARM_NEON;
189 for (uint32_t n = 9; n < 16; n++) {
190 for (size_t k = 1; k <= 5; k += 2) {
191 GemmMicrokernelTester()
192 .mr(4)
193 .nr(8)
194 .kr(1)
195 .sr(1)
196 .m(4)
197 .n(n)
198 .k(k)
199 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700200 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700201 }
202 }
203 }
204
Marat Dukhande06f492020-04-09 00:19:31 -0700205 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700206 TEST_REQUIRES_ARM_NEON;
207 for (uint32_t n = 9; n < 16; n++) {
208 for (size_t k = 1; k <= 5; k += 2) {
209 for (uint32_t m = 1; m <= 4; m++) {
210 GemmMicrokernelTester()
211 .mr(4)
212 .nr(8)
213 .kr(1)
214 .sr(1)
215 .m(m)
216 .n(n)
217 .k(k)
218 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700219 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700220 }
221 }
222 }
223 }
224
Marat Dukhande06f492020-04-09 00:19:31 -0700225 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700226 TEST_REQUIRES_ARM_NEON;
227 for (uint32_t n = 16; n <= 24; n += 8) {
228 for (size_t k = 1; k <= 5; k += 2) {
229 GemmMicrokernelTester()
230 .mr(4)
231 .nr(8)
232 .kr(1)
233 .sr(1)
234 .m(4)
235 .n(8)
236 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700237 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700238 }
239 }
240 }
241
Marat Dukhande06f492020-04-09 00:19:31 -0700242 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t n = 16; n <= 24; n += 8) {
245 for (size_t k = 1; k <= 5; k += 2) {
246 GemmMicrokernelTester()
247 .mr(4)
248 .nr(8)
249 .kr(1)
250 .sr(1)
251 .m(4)
252 .n(n)
253 .k(k)
254 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700255 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700256 }
257 }
258 }
259
Marat Dukhande06f492020-04-09 00:19:31 -0700260 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700261 TEST_REQUIRES_ARM_NEON;
262 for (uint32_t n = 16; n <= 24; n += 8) {
263 for (size_t k = 1; k <= 5; k += 2) {
264 GemmMicrokernelTester()
265 .mr(4)
266 .nr(8)
267 .kr(1)
268 .sr(1)
269 .m(4)
270 .n(n)
271 .k(k)
272 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700273 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700274 }
275 }
276 }
277
Marat Dukhande06f492020-04-09 00:19:31 -0700278 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700279 TEST_REQUIRES_ARM_NEON;
280 for (uint32_t n = 16; n <= 24; n += 8) {
281 for (size_t k = 1; k <= 5; k += 2) {
282 for (uint32_t m = 1; m <= 4; m++) {
283 GemmMicrokernelTester()
284 .mr(4)
285 .nr(8)
286 .kr(1)
287 .sr(1)
288 .m(m)
289 .n(n)
290 .k(k)
291 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700292 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700293 }
294 }
295 }
296 }
297
Marat Dukhande06f492020-04-09 00:19:31 -0700298 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700299 TEST_REQUIRES_ARM_NEON;
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t m = 1; m <= 4; m++) {
302 for (uint32_t n = 1; n <= 8; n++) {
303 GemmMicrokernelTester()
304 .mr(4)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700313 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700314 }
315 }
316 }
317 }
318
Marat Dukhande06f492020-04-09 00:19:31 -0700319 TEST(F32_PPMM_MINMAX_4X8__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700320 TEST_REQUIRES_ARM_NEON;
321 GemmMicrokernelTester()
322 .mr(4)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(4)
327 .n(8)
328 .k(1)
329 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700330 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700331 }
332
Marat Dukhande06f492020-04-09 00:19:31 -0700333 TEST(F32_PPMM_MINMAX_4X8__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700334 TEST_REQUIRES_ARM_NEON;
335 GemmMicrokernelTester()
336 .mr(4)
337 .nr(8)
338 .kr(1)
339 .sr(1)
340 .m(4)
341 .n(8)
342 .k(1)
343 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700344 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700345 }
346
Marat Dukhande06f492020-04-09 00:19:31 -0700347 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700348 TEST_REQUIRES_ARM_NEON;
349 GemmMicrokernelTester()
350 .mr(4)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(4)
355 .n(8)
356 .k(1)
357 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700358 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700359 }
360#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
361
362
363#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700364 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700365 TEST_REQUIRES_ARM_NEON_FMA;
366 GemmMicrokernelTester()
367 .mr(4)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(4)
372 .n(8)
373 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700374 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700375 }
376
Marat Dukhande06f492020-04-09 00:19:31 -0700377 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700378 TEST_REQUIRES_ARM_NEON_FMA;
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(4)
385 .n(8)
386 .k(1)
387 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700388 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700389 }
390
Marat Dukhande06f492020-04-09 00:19:31 -0700391 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700392 TEST_REQUIRES_ARM_NEON_FMA;
393 GemmMicrokernelTester()
394 .mr(4)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(4)
399 .n(8)
400 .k(1)
401 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -0700402 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700403 }
404
Marat Dukhande06f492020-04-09 00:19:31 -0700405 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700406 TEST_REQUIRES_ARM_NEON_FMA;
407 for (uint32_t m = 1; m <= 4; m++) {
408 for (uint32_t n = 1; n <= 8; n++) {
409 GemmMicrokernelTester()
410 .mr(4)
411 .nr(8)
412 .kr(1)
413 .sr(1)
414 .m(m)
415 .n(n)
416 .k(1)
417 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700418 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700419 }
420 }
421 }
422
Marat Dukhande06f492020-04-09 00:19:31 -0700423 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700424 TEST_REQUIRES_ARM_NEON_FMA;
425 for (uint32_t m = 1; m <= 4; m++) {
426 GemmMicrokernelTester()
427 .mr(4)
428 .nr(8)
429 .kr(1)
430 .sr(1)
431 .m(m)
432 .n(8)
433 .k(1)
434 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700435 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700436 }
437 }
438
Marat Dukhande06f492020-04-09 00:19:31 -0700439 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700440 TEST_REQUIRES_ARM_NEON_FMA;
441 for (uint32_t n = 1; n <= 8; n++) {
442 GemmMicrokernelTester()
443 .mr(4)
444 .nr(8)
445 .kr(1)
446 .sr(1)
447 .m(4)
448 .n(n)
449 .k(1)
450 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700451 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700452 }
453 }
454
Marat Dukhande06f492020-04-09 00:19:31 -0700455 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700456 TEST_REQUIRES_ARM_NEON_FMA;
457 for (size_t k = 2; k < 10; k++) {
458 GemmMicrokernelTester()
459 .mr(4)
460 .nr(8)
461 .kr(1)
462 .sr(1)
463 .m(4)
464 .n(8)
465 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700466 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700467 }
468 }
469
Marat Dukhande06f492020-04-09 00:19:31 -0700470 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700471 TEST_REQUIRES_ARM_NEON_FMA;
472 for (size_t k = 2; k < 10; k++) {
473 for (uint32_t m = 1; m <= 4; m++) {
474 for (uint32_t n = 1; n <= 8; n++) {
475 GemmMicrokernelTester()
476 .mr(4)
477 .nr(8)
478 .kr(1)
479 .sr(1)
480 .m(m)
481 .n(n)
482 .k(k)
483 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700484 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700485 }
486 }
487 }
488 }
489
Marat Dukhande06f492020-04-09 00:19:31 -0700490 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700491 TEST_REQUIRES_ARM_NEON_FMA;
492 for (uint32_t n = 9; n < 16; n++) {
493 for (size_t k = 1; k <= 5; k += 2) {
494 GemmMicrokernelTester()
495 .mr(4)
496 .nr(8)
497 .kr(1)
498 .sr(1)
499 .m(4)
500 .n(8)
501 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700502 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700503 }
504 }
505 }
506
Marat Dukhande06f492020-04-09 00:19:31 -0700507 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700508 TEST_REQUIRES_ARM_NEON_FMA;
509 for (uint32_t n = 9; n < 16; n++) {
510 for (size_t k = 1; k <= 5; k += 2) {
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(k)
519 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700520 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700521 }
522 }
523 }
524
Marat Dukhande06f492020-04-09 00:19:31 -0700525 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700526 TEST_REQUIRES_ARM_NEON_FMA;
527 for (uint32_t n = 9; n < 16; n++) {
528 for (size_t k = 1; k <= 5; k += 2) {
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(4)
535 .n(n)
536 .k(k)
537 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700538 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700539 }
540 }
541 }
542
Marat Dukhande06f492020-04-09 00:19:31 -0700543 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700544 TEST_REQUIRES_ARM_NEON_FMA;
545 for (uint32_t n = 9; n < 16; n++) {
546 for (size_t k = 1; k <= 5; k += 2) {
547 for (uint32_t m = 1; m <= 4; m++) {
548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(8)
551 .kr(1)
552 .sr(1)
553 .m(m)
554 .n(n)
555 .k(k)
556 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700557 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700558 }
559 }
560 }
561 }
562
Marat Dukhande06f492020-04-09 00:19:31 -0700563 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700564 TEST_REQUIRES_ARM_NEON_FMA;
565 for (uint32_t n = 16; n <= 24; n += 8) {
566 for (size_t k = 1; k <= 5; k += 2) {
567 GemmMicrokernelTester()
568 .mr(4)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(4)
573 .n(8)
574 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700575 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700576 }
577 }
578 }
579
Marat Dukhande06f492020-04-09 00:19:31 -0700580 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700581 TEST_REQUIRES_ARM_NEON_FMA;
582 for (uint32_t n = 16; n <= 24; n += 8) {
583 for (size_t k = 1; k <= 5; k += 2) {
584 GemmMicrokernelTester()
585 .mr(4)
586 .nr(8)
587 .kr(1)
588 .sr(1)
589 .m(4)
590 .n(n)
591 .k(k)
592 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700593 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700594 }
595 }
596 }
597
Marat Dukhande06f492020-04-09 00:19:31 -0700598 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700599 TEST_REQUIRES_ARM_NEON_FMA;
600 for (uint32_t n = 16; n <= 24; n += 8) {
601 for (size_t k = 1; k <= 5; k += 2) {
602 GemmMicrokernelTester()
603 .mr(4)
604 .nr(8)
605 .kr(1)
606 .sr(1)
607 .m(4)
608 .n(n)
609 .k(k)
610 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700611 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700612 }
613 }
614 }
615
Marat Dukhande06f492020-04-09 00:19:31 -0700616 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700617 TEST_REQUIRES_ARM_NEON_FMA;
618 for (uint32_t n = 16; n <= 24; n += 8) {
619 for (size_t k = 1; k <= 5; k += 2) {
620 for (uint32_t m = 1; m <= 4; m++) {
621 GemmMicrokernelTester()
622 .mr(4)
623 .nr(8)
624 .kr(1)
625 .sr(1)
626 .m(m)
627 .n(n)
628 .k(k)
629 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700630 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700631 }
632 }
633 }
634 }
635
Marat Dukhande06f492020-04-09 00:19:31 -0700636 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700637 TEST_REQUIRES_ARM_NEON_FMA;
638 for (size_t k = 1; k <= 5; k += 2) {
639 for (uint32_t m = 1; m <= 4; m++) {
640 for (uint32_t n = 1; n <= 8; n++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(8)
644 .kr(1)
645 .sr(1)
646 .m(m)
647 .n(n)
648 .k(k)
649 .cm_stride(11)
650 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700651 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700652 }
653 }
654 }
655 }
656
Marat Dukhande06f492020-04-09 00:19:31 -0700657 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700658 TEST_REQUIRES_ARM_NEON_FMA;
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(4)
665 .n(8)
666 .k(1)
667 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700668 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700669 }
670
Marat Dukhande06f492020-04-09 00:19:31 -0700671 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700672 TEST_REQUIRES_ARM_NEON_FMA;
673 GemmMicrokernelTester()
674 .mr(4)
675 .nr(8)
676 .kr(1)
677 .sr(1)
678 .m(4)
679 .n(8)
680 .k(1)
681 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700682 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700683 }
684
Marat Dukhande06f492020-04-09 00:19:31 -0700685 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700686 TEST_REQUIRES_ARM_NEON_FMA;
687 GemmMicrokernelTester()
688 .mr(4)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(4)
693 .n(8)
694 .k(1)
695 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700696 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700697 }
698#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
699
700
701#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700702 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700703 TEST_REQUIRES_ARM_NEON;
704 GemmMicrokernelTester()
705 .mr(8)
706 .nr(8)
707 .kr(1)
708 .sr(1)
709 .m(8)
710 .n(8)
711 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700712 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700713 }
714
Marat Dukhande06f492020-04-09 00:19:31 -0700715 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700716 TEST_REQUIRES_ARM_NEON;
717 GemmMicrokernelTester()
718 .mr(8)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(8)
723 .n(8)
724 .k(1)
725 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700726 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700727 }
728
Marat Dukhande06f492020-04-09 00:19:31 -0700729 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700730 TEST_REQUIRES_ARM_NEON;
731 GemmMicrokernelTester()
732 .mr(8)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(8)
737 .n(8)
738 .k(1)
739 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -0700740 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700741 }
742
Marat Dukhande06f492020-04-09 00:19:31 -0700743 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700744 TEST_REQUIRES_ARM_NEON;
745 for (uint32_t m = 1; m <= 8; m++) {
746 for (uint32_t n = 1; n <= 8; n++) {
747 GemmMicrokernelTester()
748 .mr(8)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(m)
753 .n(n)
754 .k(1)
755 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700756 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700757 }
758 }
759 }
760
Marat Dukhande06f492020-04-09 00:19:31 -0700761 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700762 TEST_REQUIRES_ARM_NEON;
763 for (uint32_t m = 1; m <= 8; m++) {
764 GemmMicrokernelTester()
765 .mr(8)
766 .nr(8)
767 .kr(1)
768 .sr(1)
769 .m(m)
770 .n(8)
771 .k(1)
772 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700773 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700774 }
775 }
776
Marat Dukhande06f492020-04-09 00:19:31 -0700777 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700778 TEST_REQUIRES_ARM_NEON;
779 for (uint32_t n = 1; n <= 8; n++) {
780 GemmMicrokernelTester()
781 .mr(8)
782 .nr(8)
783 .kr(1)
784 .sr(1)
785 .m(8)
786 .n(n)
787 .k(1)
788 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700789 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700790 }
791 }
792
Marat Dukhande06f492020-04-09 00:19:31 -0700793 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700794 TEST_REQUIRES_ARM_NEON;
795 for (size_t k = 2; k < 10; k++) {
796 GemmMicrokernelTester()
797 .mr(8)
798 .nr(8)
799 .kr(1)
800 .sr(1)
801 .m(8)
802 .n(8)
803 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700804 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700805 }
806 }
807
Marat Dukhande06f492020-04-09 00:19:31 -0700808 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700809 TEST_REQUIRES_ARM_NEON;
810 for (size_t k = 2; k < 10; k++) {
811 for (uint32_t m = 1; m <= 8; m++) {
812 for (uint32_t n = 1; n <= 8; n++) {
813 GemmMicrokernelTester()
814 .mr(8)
815 .nr(8)
816 .kr(1)
817 .sr(1)
818 .m(m)
819 .n(n)
820 .k(k)
821 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700822 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700823 }
824 }
825 }
826 }
827
Marat Dukhande06f492020-04-09 00:19:31 -0700828 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700829 TEST_REQUIRES_ARM_NEON;
830 for (uint32_t n = 9; n < 16; n++) {
831 for (size_t k = 1; k <= 5; k += 2) {
832 GemmMicrokernelTester()
833 .mr(8)
834 .nr(8)
835 .kr(1)
836 .sr(1)
837 .m(8)
838 .n(8)
839 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700840 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700841 }
842 }
843 }
844
Marat Dukhande06f492020-04-09 00:19:31 -0700845 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700846 TEST_REQUIRES_ARM_NEON;
847 for (uint32_t n = 9; n < 16; n++) {
848 for (size_t k = 1; k <= 5; k += 2) {
849 GemmMicrokernelTester()
850 .mr(8)
851 .nr(8)
852 .kr(1)
853 .sr(1)
854 .m(8)
855 .n(8)
856 .k(k)
857 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700858 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700859 }
860 }
861 }
862
Marat Dukhande06f492020-04-09 00:19:31 -0700863 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700864 TEST_REQUIRES_ARM_NEON;
865 for (uint32_t n = 9; n < 16; n++) {
866 for (size_t k = 1; k <= 5; k += 2) {
867 GemmMicrokernelTester()
868 .mr(8)
869 .nr(8)
870 .kr(1)
871 .sr(1)
872 .m(8)
873 .n(n)
874 .k(k)
875 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700876 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700877 }
878 }
879 }
880
Marat Dukhande06f492020-04-09 00:19:31 -0700881 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700882 TEST_REQUIRES_ARM_NEON;
883 for (uint32_t n = 9; n < 16; n++) {
884 for (size_t k = 1; k <= 5; k += 2) {
885 for (uint32_t m = 1; m <= 8; m++) {
886 GemmMicrokernelTester()
887 .mr(8)
888 .nr(8)
889 .kr(1)
890 .sr(1)
891 .m(m)
892 .n(n)
893 .k(k)
894 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700895 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700896 }
897 }
898 }
899 }
900
Marat Dukhande06f492020-04-09 00:19:31 -0700901 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700902 TEST_REQUIRES_ARM_NEON;
903 for (uint32_t n = 16; n <= 24; n += 8) {
904 for (size_t k = 1; k <= 5; k += 2) {
905 GemmMicrokernelTester()
906 .mr(8)
907 .nr(8)
908 .kr(1)
909 .sr(1)
910 .m(8)
911 .n(8)
912 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700913 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700914 }
915 }
916 }
917
Marat Dukhande06f492020-04-09 00:19:31 -0700918 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700919 TEST_REQUIRES_ARM_NEON;
920 for (uint32_t n = 16; n <= 24; n += 8) {
921 for (size_t k = 1; k <= 5; k += 2) {
922 GemmMicrokernelTester()
923 .mr(8)
924 .nr(8)
925 .kr(1)
926 .sr(1)
927 .m(8)
928 .n(n)
929 .k(k)
930 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700931 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700932 }
933 }
934 }
935
Marat Dukhande06f492020-04-09 00:19:31 -0700936 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700937 TEST_REQUIRES_ARM_NEON;
938 for (uint32_t n = 16; n <= 24; n += 8) {
939 for (size_t k = 1; k <= 5; k += 2) {
940 GemmMicrokernelTester()
941 .mr(8)
942 .nr(8)
943 .kr(1)
944 .sr(1)
945 .m(8)
946 .n(n)
947 .k(k)
948 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700949 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700950 }
951 }
952 }
953
Marat Dukhande06f492020-04-09 00:19:31 -0700954 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700955 TEST_REQUIRES_ARM_NEON;
956 for (uint32_t n = 16; n <= 24; n += 8) {
957 for (size_t k = 1; k <= 5; k += 2) {
958 for (uint32_t m = 1; m <= 8; m++) {
959 GemmMicrokernelTester()
960 .mr(8)
961 .nr(8)
962 .kr(1)
963 .sr(1)
964 .m(m)
965 .n(n)
966 .k(k)
967 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700968 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700969 }
970 }
971 }
972 }
973
Marat Dukhande06f492020-04-09 00:19:31 -0700974 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700975 TEST_REQUIRES_ARM_NEON;
976 for (size_t k = 1; k <= 5; k += 2) {
977 for (uint32_t m = 1; m <= 8; m++) {
978 for (uint32_t n = 1; n <= 8; n++) {
979 GemmMicrokernelTester()
980 .mr(8)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(m)
985 .n(n)
986 .k(k)
987 .cm_stride(11)
988 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700989 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700990 }
991 }
992 }
993 }
994
Marat Dukhande06f492020-04-09 00:19:31 -0700995 TEST(F32_PPMM_MINMAX_8X8__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(8)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(8)
1003 .n(8)
1004 .k(1)
1005 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001006 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -07001007 }
1008
Marat Dukhande06f492020-04-09 00:19:31 -07001009 TEST(F32_PPMM_MINMAX_8X8__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001010 TEST_REQUIRES_ARM_NEON;
1011 GemmMicrokernelTester()
1012 .mr(8)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(8)
1017 .n(8)
1018 .k(1)
1019 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001020 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -07001021 }
1022
Marat Dukhande06f492020-04-09 00:19:31 -07001023 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001024 TEST_REQUIRES_ARM_NEON;
1025 GemmMicrokernelTester()
1026 .mr(8)
1027 .nr(8)
1028 .kr(1)
1029 .sr(1)
1030 .m(8)
1031 .n(8)
1032 .k(1)
1033 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001034 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -07001035 }
1036#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1037
1038
1039#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07001040 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001041 TEST_REQUIRES_ARM_NEON_FMA;
1042 GemmMicrokernelTester()
1043 .mr(8)
1044 .nr(8)
1045 .kr(1)
1046 .sr(1)
1047 .m(8)
1048 .n(8)
1049 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001050 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001051 }
1052
Marat Dukhande06f492020-04-09 00:19:31 -07001053 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001054 TEST_REQUIRES_ARM_NEON_FMA;
1055 GemmMicrokernelTester()
1056 .mr(8)
1057 .nr(8)
1058 .kr(1)
1059 .sr(1)
1060 .m(8)
1061 .n(8)
1062 .k(1)
1063 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001064 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001065 }
1066
Marat Dukhande06f492020-04-09 00:19:31 -07001067 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001068 TEST_REQUIRES_ARM_NEON_FMA;
1069 GemmMicrokernelTester()
1070 .mr(8)
1071 .nr(8)
1072 .kr(1)
1073 .sr(1)
1074 .m(8)
1075 .n(8)
1076 .k(1)
1077 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07001078 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001079 }
1080
Marat Dukhande06f492020-04-09 00:19:31 -07001081 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001082 TEST_REQUIRES_ARM_NEON_FMA;
1083 for (uint32_t m = 1; m <= 8; m++) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 GemmMicrokernelTester()
1086 .mr(8)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(m)
1091 .n(n)
1092 .k(1)
1093 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001094 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001095 }
1096 }
1097 }
1098
Marat Dukhande06f492020-04-09 00:19:31 -07001099 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001100 TEST_REQUIRES_ARM_NEON_FMA;
1101 for (uint32_t m = 1; m <= 8; m++) {
1102 GemmMicrokernelTester()
1103 .mr(8)
1104 .nr(8)
1105 .kr(1)
1106 .sr(1)
1107 .m(m)
1108 .n(8)
1109 .k(1)
1110 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001111 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001112 }
1113 }
1114
Marat Dukhande06f492020-04-09 00:19:31 -07001115 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001116 TEST_REQUIRES_ARM_NEON_FMA;
1117 for (uint32_t n = 1; n <= 8; n++) {
1118 GemmMicrokernelTester()
1119 .mr(8)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(8)
1124 .n(n)
1125 .k(1)
1126 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001127 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001128 }
1129 }
1130
Marat Dukhande06f492020-04-09 00:19:31 -07001131 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001132 TEST_REQUIRES_ARM_NEON_FMA;
1133 for (size_t k = 2; k < 10; k++) {
1134 GemmMicrokernelTester()
1135 .mr(8)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(8)
1140 .n(8)
1141 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001142 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001143 }
1144 }
1145
Marat Dukhande06f492020-04-09 00:19:31 -07001146 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001147 TEST_REQUIRES_ARM_NEON_FMA;
1148 for (size_t k = 2; k < 10; k++) {
1149 for (uint32_t m = 1; m <= 8; m++) {
1150 for (uint32_t n = 1; n <= 8; n++) {
1151 GemmMicrokernelTester()
1152 .mr(8)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(k)
1159 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001160 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001161 }
1162 }
1163 }
1164 }
1165
Marat Dukhande06f492020-04-09 00:19:31 -07001166 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001167 TEST_REQUIRES_ARM_NEON_FMA;
1168 for (uint32_t n = 9; n < 16; n++) {
1169 for (size_t k = 1; k <= 5; k += 2) {
1170 GemmMicrokernelTester()
1171 .mr(8)
1172 .nr(8)
1173 .kr(1)
1174 .sr(1)
1175 .m(8)
1176 .n(8)
1177 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001178 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001179 }
1180 }
1181 }
1182
Marat Dukhande06f492020-04-09 00:19:31 -07001183 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001184 TEST_REQUIRES_ARM_NEON_FMA;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 5; k += 2) {
1187 GemmMicrokernelTester()
1188 .mr(8)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(8)
1193 .n(8)
1194 .k(k)
1195 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001196 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001197 }
1198 }
1199 }
1200
Marat Dukhande06f492020-04-09 00:19:31 -07001201 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001202 TEST_REQUIRES_ARM_NEON_FMA;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 5; k += 2) {
1205 GemmMicrokernelTester()
1206 .mr(8)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(8)
1211 .n(n)
1212 .k(k)
1213 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001214 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001215 }
1216 }
1217 }
1218
Marat Dukhande06f492020-04-09 00:19:31 -07001219 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001220 TEST_REQUIRES_ARM_NEON_FMA;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 5; k += 2) {
1223 for (uint32_t m = 1; m <= 8; m++) {
1224 GemmMicrokernelTester()
1225 .mr(8)
1226 .nr(8)
1227 .kr(1)
1228 .sr(1)
1229 .m(m)
1230 .n(n)
1231 .k(k)
1232 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001233 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001234 }
1235 }
1236 }
1237 }
1238
Marat Dukhande06f492020-04-09 00:19:31 -07001239 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001240 TEST_REQUIRES_ARM_NEON_FMA;
1241 for (uint32_t n = 16; n <= 24; n += 8) {
1242 for (size_t k = 1; k <= 5; k += 2) {
1243 GemmMicrokernelTester()
1244 .mr(8)
1245 .nr(8)
1246 .kr(1)
1247 .sr(1)
1248 .m(8)
1249 .n(8)
1250 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001251 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001252 }
1253 }
1254 }
1255
Marat Dukhande06f492020-04-09 00:19:31 -07001256 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001257 TEST_REQUIRES_ARM_NEON_FMA;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 5; k += 2) {
1260 GemmMicrokernelTester()
1261 .mr(8)
1262 .nr(8)
1263 .kr(1)
1264 .sr(1)
1265 .m(8)
1266 .n(n)
1267 .k(k)
1268 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001269 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001270 }
1271 }
1272 }
1273
Marat Dukhande06f492020-04-09 00:19:31 -07001274 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 5; k += 2) {
1278 GemmMicrokernelTester()
1279 .mr(8)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(8)
1284 .n(n)
1285 .k(k)
1286 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001287 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001288 }
1289 }
1290 }
1291
Marat Dukhande06f492020-04-09 00:19:31 -07001292 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001293 TEST_REQUIRES_ARM_NEON_FMA;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 5; k += 2) {
1296 for (uint32_t m = 1; m <= 8; m++) {
1297 GemmMicrokernelTester()
1298 .mr(8)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001306 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001307 }
1308 }
1309 }
1310 }
1311
Marat Dukhande06f492020-04-09 00:19:31 -07001312 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001313 TEST_REQUIRES_ARM_NEON_FMA;
1314 for (size_t k = 1; k <= 5; k += 2) {
1315 for (uint32_t m = 1; m <= 8; m++) {
1316 for (uint32_t n = 1; n <= 8; n++) {
1317 GemmMicrokernelTester()
1318 .mr(8)
1319 .nr(8)
1320 .kr(1)
1321 .sr(1)
1322 .m(m)
1323 .n(n)
1324 .k(k)
1325 .cm_stride(11)
1326 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001327 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001328 }
1329 }
1330 }
1331 }
1332
Marat Dukhande06f492020-04-09 00:19:31 -07001333 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001334 TEST_REQUIRES_ARM_NEON_FMA;
1335 GemmMicrokernelTester()
1336 .mr(8)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(8)
1341 .n(8)
1342 .k(1)
1343 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001344 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001345 }
1346
Marat Dukhande06f492020-04-09 00:19:31 -07001347 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001348 TEST_REQUIRES_ARM_NEON_FMA;
1349 GemmMicrokernelTester()
1350 .mr(8)
1351 .nr(8)
1352 .kr(1)
1353 .sr(1)
1354 .m(8)
1355 .n(8)
1356 .k(1)
1357 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001358 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001359 }
1360
Marat Dukhande06f492020-04-09 00:19:31 -07001361 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001362 TEST_REQUIRES_ARM_NEON_FMA;
1363 GemmMicrokernelTester()
1364 .mr(8)
1365 .nr(8)
1366 .kr(1)
1367 .sr(1)
1368 .m(8)
1369 .n(8)
1370 .k(1)
1371 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001372 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001373 }
1374#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1375
1376
1377#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -07001378 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001379 TEST_REQUIRES_X86_SSE;
1380 GemmMicrokernelTester()
1381 .mr(4)
1382 .nr(8)
1383 .kr(1)
1384 .sr(1)
1385 .m(4)
1386 .n(8)
1387 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001388 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001389 }
1390
Marat Dukhande06f492020-04-09 00:19:31 -07001391 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001392 TEST_REQUIRES_X86_SSE;
1393 GemmMicrokernelTester()
1394 .mr(4)
1395 .nr(8)
1396 .kr(1)
1397 .sr(1)
1398 .m(4)
1399 .n(8)
1400 .k(1)
1401 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001402 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001403 }
1404
Marat Dukhande06f492020-04-09 00:19:31 -07001405 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001406 TEST_REQUIRES_X86_SSE;
1407 GemmMicrokernelTester()
1408 .mr(4)
1409 .nr(8)
1410 .kr(1)
1411 .sr(1)
1412 .m(4)
1413 .n(8)
1414 .k(1)
1415 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07001416 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001417 }
1418
Marat Dukhande06f492020-04-09 00:19:31 -07001419 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001420 TEST_REQUIRES_X86_SSE;
1421 for (uint32_t m = 1; m <= 4; m++) {
1422 for (uint32_t n = 1; n <= 8; n++) {
1423 GemmMicrokernelTester()
1424 .mr(4)
1425 .nr(8)
1426 .kr(1)
1427 .sr(1)
1428 .m(m)
1429 .n(n)
1430 .k(1)
1431 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001432 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001433 }
1434 }
1435 }
1436
Marat Dukhande06f492020-04-09 00:19:31 -07001437 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001438 TEST_REQUIRES_X86_SSE;
1439 for (uint32_t m = 1; m <= 4; m++) {
1440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(8)
1443 .kr(1)
1444 .sr(1)
1445 .m(m)
1446 .n(8)
1447 .k(1)
1448 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001449 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001450 }
1451 }
1452
Marat Dukhande06f492020-04-09 00:19:31 -07001453 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001454 TEST_REQUIRES_X86_SSE;
1455 for (uint32_t n = 1; n <= 8; n++) {
1456 GemmMicrokernelTester()
1457 .mr(4)
1458 .nr(8)
1459 .kr(1)
1460 .sr(1)
1461 .m(4)
1462 .n(n)
1463 .k(1)
1464 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001465 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001466 }
1467 }
1468
Marat Dukhande06f492020-04-09 00:19:31 -07001469 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001470 TEST_REQUIRES_X86_SSE;
1471 for (size_t k = 2; k < 10; k++) {
1472 GemmMicrokernelTester()
1473 .mr(4)
1474 .nr(8)
1475 .kr(1)
1476 .sr(1)
1477 .m(4)
1478 .n(8)
1479 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001480 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001481 }
1482 }
1483
Marat Dukhande06f492020-04-09 00:19:31 -07001484 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001485 TEST_REQUIRES_X86_SSE;
1486 for (size_t k = 2; k < 10; k++) {
1487 for (uint32_t m = 1; m <= 4; m++) {
1488 for (uint32_t n = 1; n <= 8; n++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(1)
1493 .sr(1)
1494 .m(m)
1495 .n(n)
1496 .k(k)
1497 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001498 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001499 }
1500 }
1501 }
1502 }
1503
Marat Dukhande06f492020-04-09 00:19:31 -07001504 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001505 TEST_REQUIRES_X86_SSE;
1506 for (uint32_t n = 9; n < 16; n++) {
1507 for (size_t k = 1; k <= 5; k += 2) {
1508 GemmMicrokernelTester()
1509 .mr(4)
1510 .nr(8)
1511 .kr(1)
1512 .sr(1)
1513 .m(4)
1514 .n(8)
1515 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001516 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001517 }
1518 }
1519 }
1520
Marat Dukhande06f492020-04-09 00:19:31 -07001521 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001522 TEST_REQUIRES_X86_SSE;
1523 for (uint32_t n = 9; n < 16; n++) {
1524 for (size_t k = 1; k <= 5; k += 2) {
1525 GemmMicrokernelTester()
1526 .mr(4)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(4)
1531 .n(8)
1532 .k(k)
1533 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001534 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001535 }
1536 }
1537 }
1538
Marat Dukhande06f492020-04-09 00:19:31 -07001539 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001540 TEST_REQUIRES_X86_SSE;
1541 for (uint32_t n = 9; n < 16; n++) {
1542 for (size_t k = 1; k <= 5; k += 2) {
1543 GemmMicrokernelTester()
1544 .mr(4)
1545 .nr(8)
1546 .kr(1)
1547 .sr(1)
1548 .m(4)
1549 .n(n)
1550 .k(k)
1551 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001552 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001553 }
1554 }
1555 }
1556
Marat Dukhande06f492020-04-09 00:19:31 -07001557 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001558 TEST_REQUIRES_X86_SSE;
1559 for (uint32_t n = 9; n < 16; n++) {
1560 for (size_t k = 1; k <= 5; k += 2) {
1561 for (uint32_t m = 1; m <= 4; m++) {
1562 GemmMicrokernelTester()
1563 .mr(4)
1564 .nr(8)
1565 .kr(1)
1566 .sr(1)
1567 .m(m)
1568 .n(n)
1569 .k(k)
1570 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001571 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001572 }
1573 }
1574 }
1575 }
1576
Marat Dukhande06f492020-04-09 00:19:31 -07001577 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001578 TEST_REQUIRES_X86_SSE;
1579 for (uint32_t n = 16; n <= 24; n += 8) {
1580 for (size_t k = 1; k <= 5; k += 2) {
1581 GemmMicrokernelTester()
1582 .mr(4)
1583 .nr(8)
1584 .kr(1)
1585 .sr(1)
1586 .m(4)
1587 .n(8)
1588 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001589 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001590 }
1591 }
1592 }
1593
Marat Dukhande06f492020-04-09 00:19:31 -07001594 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001595 TEST_REQUIRES_X86_SSE;
1596 for (uint32_t n = 16; n <= 24; n += 8) {
1597 for (size_t k = 1; k <= 5; k += 2) {
1598 GemmMicrokernelTester()
1599 .mr(4)
1600 .nr(8)
1601 .kr(1)
1602 .sr(1)
1603 .m(4)
1604 .n(n)
1605 .k(k)
1606 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001607 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001608 }
1609 }
1610 }
1611
Marat Dukhande06f492020-04-09 00:19:31 -07001612 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001613 TEST_REQUIRES_X86_SSE;
1614 for (uint32_t n = 16; n <= 24; n += 8) {
1615 for (size_t k = 1; k <= 5; k += 2) {
1616 GemmMicrokernelTester()
1617 .mr(4)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(4)
1622 .n(n)
1623 .k(k)
1624 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001625 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001626 }
1627 }
1628 }
1629
Marat Dukhande06f492020-04-09 00:19:31 -07001630 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001631 TEST_REQUIRES_X86_SSE;
1632 for (uint32_t n = 16; n <= 24; n += 8) {
1633 for (size_t k = 1; k <= 5; k += 2) {
1634 for (uint32_t m = 1; m <= 4; m++) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(m)
1641 .n(n)
1642 .k(k)
1643 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001644 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001645 }
1646 }
1647 }
1648 }
1649
Marat Dukhande06f492020-04-09 00:19:31 -07001650 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001651 TEST_REQUIRES_X86_SSE;
1652 for (size_t k = 1; k <= 5; k += 2) {
1653 for (uint32_t m = 1; m <= 4; m++) {
1654 for (uint32_t n = 1; n <= 8; n++) {
1655 GemmMicrokernelTester()
1656 .mr(4)
1657 .nr(8)
1658 .kr(1)
1659 .sr(1)
1660 .m(m)
1661 .n(n)
1662 .k(k)
1663 .cm_stride(11)
1664 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001665 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001666 }
1667 }
1668 }
1669 }
1670
Marat Dukhande06f492020-04-09 00:19:31 -07001671 TEST(F32_PPMM_MINMAX_4X8__SSE, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001672 TEST_REQUIRES_X86_SSE;
1673 GemmMicrokernelTester()
1674 .mr(4)
1675 .nr(8)
1676 .kr(1)
1677 .sr(1)
1678 .m(4)
1679 .n(8)
1680 .k(1)
1681 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001682 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001683 }
1684
Marat Dukhande06f492020-04-09 00:19:31 -07001685 TEST(F32_PPMM_MINMAX_4X8__SSE, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001686 TEST_REQUIRES_X86_SSE;
1687 GemmMicrokernelTester()
1688 .mr(4)
1689 .nr(8)
1690 .kr(1)
1691 .sr(1)
1692 .m(4)
1693 .n(8)
1694 .k(1)
1695 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001696 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001697 }
1698
Marat Dukhande06f492020-04-09 00:19:31 -07001699 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001700 TEST_REQUIRES_X86_SSE;
1701 GemmMicrokernelTester()
1702 .mr(4)
1703 .nr(8)
1704 .kr(1)
1705 .sr(1)
1706 .m(4)
1707 .n(8)
1708 .k(1)
1709 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001710 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001711 }
1712#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1713
1714
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001715#if XNN_ARCH_WASMSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -08001716 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001717 GemmMicrokernelTester()
1718 .mr(4)
1719 .nr(8)
1720 .kr(1)
1721 .sr(1)
1722 .m(4)
1723 .n(8)
1724 .k(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001725 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001726 }
1727
Frank Barchard0725b8d2020-12-07 11:07:35 -08001728 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001729 GemmMicrokernelTester()
1730 .mr(4)
1731 .nr(8)
1732 .kr(1)
1733 .sr(1)
1734 .m(4)
1735 .n(8)
1736 .k(1)
1737 .cn_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001738 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001739 }
1740
Frank Barchard0725b8d2020-12-07 11:07:35 -08001741 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001742 GemmMicrokernelTester()
1743 .mr(4)
1744 .nr(8)
1745 .kr(1)
1746 .sr(1)
1747 .m(4)
1748 .n(8)
1749 .k(1)
1750 .a_stride(3)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001751 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001752 }
1753
Frank Barchard0725b8d2020-12-07 11:07:35 -08001754 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001755 for (uint32_t m = 1; m <= 4; m++) {
1756 for (uint32_t n = 1; n <= 8; n++) {
1757 GemmMicrokernelTester()
1758 .mr(4)
1759 .nr(8)
1760 .kr(1)
1761 .sr(1)
1762 .m(m)
1763 .n(n)
1764 .k(1)
1765 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001766 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001767 }
1768 }
1769 }
1770
Frank Barchard0725b8d2020-12-07 11:07:35 -08001771 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_m) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001772 for (uint32_t m = 1; m <= 4; m++) {
1773 GemmMicrokernelTester()
1774 .mr(4)
1775 .nr(8)
1776 .kr(1)
1777 .sr(1)
1778 .m(m)
1779 .n(8)
1780 .k(1)
1781 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001782 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001783 }
1784 }
1785
Frank Barchard0725b8d2020-12-07 11:07:35 -08001786 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_n) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001787 for (uint32_t n = 1; n <= 8; n++) {
1788 GemmMicrokernelTester()
1789 .mr(4)
1790 .nr(8)
1791 .kr(1)
1792 .sr(1)
1793 .m(4)
1794 .n(n)
1795 .k(1)
1796 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001797 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001798 }
1799 }
1800
Frank Barchard0725b8d2020-12-07 11:07:35 -08001801 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001802 for (size_t k = 2; k < 10; k++) {
1803 GemmMicrokernelTester()
1804 .mr(4)
1805 .nr(8)
1806 .kr(1)
1807 .sr(1)
1808 .m(4)
1809 .n(8)
1810 .k(k)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001811 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001812 }
1813 }
1814
Frank Barchard0725b8d2020-12-07 11:07:35 -08001815 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001816 for (size_t k = 2; k < 10; k++) {
1817 for (uint32_t m = 1; m <= 4; m++) {
1818 for (uint32_t n = 1; n <= 8; n++) {
1819 GemmMicrokernelTester()
1820 .mr(4)
1821 .nr(8)
1822 .kr(1)
1823 .sr(1)
1824 .m(m)
1825 .n(n)
1826 .k(k)
1827 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001828 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001829 }
1830 }
1831 }
1832 }
1833
Frank Barchard0725b8d2020-12-07 11:07:35 -08001834 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001835 for (uint32_t n = 9; n < 16; n++) {
1836 for (size_t k = 1; k <= 5; k += 2) {
1837 GemmMicrokernelTester()
1838 .mr(4)
1839 .nr(8)
1840 .kr(1)
1841 .sr(1)
1842 .m(4)
1843 .n(8)
1844 .k(k)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001845 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001846 }
1847 }
1848 }
1849
Frank Barchard0725b8d2020-12-07 11:07:35 -08001850 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001851 for (uint32_t n = 9; n < 16; n++) {
1852 for (size_t k = 1; k <= 5; k += 2) {
1853 GemmMicrokernelTester()
1854 .mr(4)
1855 .nr(8)
1856 .kr(1)
1857 .sr(1)
1858 .m(4)
1859 .n(8)
1860 .k(k)
1861 .cn_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001862 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001863 }
1864 }
1865 }
1866
Frank Barchard0725b8d2020-12-07 11:07:35 -08001867 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001868 for (uint32_t n = 9; n < 16; n++) {
1869 for (size_t k = 1; k <= 5; k += 2) {
1870 GemmMicrokernelTester()
1871 .mr(4)
1872 .nr(8)
1873 .kr(1)
1874 .sr(1)
1875 .m(4)
1876 .n(n)
1877 .k(k)
1878 .a_stride(7)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001879 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001880 }
1881 }
1882 }
1883
Frank Barchard0725b8d2020-12-07 11:07:35 -08001884 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001885 for (uint32_t n = 9; n < 16; n++) {
1886 for (size_t k = 1; k <= 5; k += 2) {
1887 for (uint32_t m = 1; m <= 4; m++) {
1888 GemmMicrokernelTester()
1889 .mr(4)
1890 .nr(8)
1891 .kr(1)
1892 .sr(1)
1893 .m(m)
1894 .n(n)
1895 .k(k)
1896 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001897 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001898 }
1899 }
1900 }
1901 }
1902
Frank Barchard0725b8d2020-12-07 11:07:35 -08001903 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001904 for (uint32_t n = 16; n <= 24; n += 8) {
1905 for (size_t k = 1; k <= 5; k += 2) {
1906 GemmMicrokernelTester()
1907 .mr(4)
1908 .nr(8)
1909 .kr(1)
1910 .sr(1)
1911 .m(4)
1912 .n(8)
1913 .k(k)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001914 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001915 }
1916 }
1917 }
1918
Frank Barchard0725b8d2020-12-07 11:07:35 -08001919 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001920 for (uint32_t n = 16; n <= 24; n += 8) {
1921 for (size_t k = 1; k <= 5; k += 2) {
1922 GemmMicrokernelTester()
1923 .mr(4)
1924 .nr(8)
1925 .kr(1)
1926 .sr(1)
1927 .m(4)
1928 .n(n)
1929 .k(k)
1930 .cn_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001931 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001932 }
1933 }
1934 }
1935
Frank Barchard0725b8d2020-12-07 11:07:35 -08001936 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001937 for (uint32_t n = 16; n <= 24; n += 8) {
1938 for (size_t k = 1; k <= 5; k += 2) {
1939 GemmMicrokernelTester()
1940 .mr(4)
1941 .nr(8)
1942 .kr(1)
1943 .sr(1)
1944 .m(4)
1945 .n(n)
1946 .k(k)
1947 .a_stride(7)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001948 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001949 }
1950 }
1951 }
1952
Frank Barchard0725b8d2020-12-07 11:07:35 -08001953 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001954 for (uint32_t n = 16; n <= 24; n += 8) {
1955 for (size_t k = 1; k <= 5; k += 2) {
1956 for (uint32_t m = 1; m <= 4; m++) {
1957 GemmMicrokernelTester()
1958 .mr(4)
1959 .nr(8)
1960 .kr(1)
1961 .sr(1)
1962 .m(m)
1963 .n(n)
1964 .k(k)
1965 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001966 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001967 }
1968 }
1969 }
1970 }
1971
Frank Barchard0725b8d2020-12-07 11:07:35 -08001972 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001973 for (size_t k = 1; k <= 5; k += 2) {
1974 for (uint32_t m = 1; m <= 4; m++) {
1975 for (uint32_t n = 1; n <= 8; n++) {
1976 GemmMicrokernelTester()
1977 .mr(4)
1978 .nr(8)
1979 .kr(1)
1980 .sr(1)
1981 .m(m)
1982 .n(n)
1983 .k(k)
1984 .cm_stride(11)
1985 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08001986 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001987 }
1988 }
1989 }
1990 }
1991
Frank Barchard0725b8d2020-12-07 11:07:35 -08001992 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07001993 GemmMicrokernelTester()
1994 .mr(4)
1995 .nr(8)
1996 .kr(1)
1997 .sr(1)
1998 .m(4)
1999 .n(8)
2000 .k(1)
2001 .qmin(128)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002002 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002003 }
2004
Frank Barchard0725b8d2020-12-07 11:07:35 -08002005 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002006 GemmMicrokernelTester()
2007 .mr(4)
2008 .nr(8)
2009 .kr(1)
2010 .sr(1)
2011 .m(4)
2012 .n(8)
2013 .k(1)
2014 .qmax(128)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002015 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002016 }
2017
Frank Barchard0725b8d2020-12-07 11:07:35 -08002018 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002019 GemmMicrokernelTester()
2020 .mr(4)
2021 .nr(8)
2022 .kr(1)
2023 .sr(1)
2024 .m(4)
2025 .n(8)
2026 .k(1)
2027 .cm_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002028 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002029 }
2030#endif // XNN_ARCH_WASMSIMD
2031
2032
2033#if XNN_ARCH_WASMSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -08002034 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002035 GemmMicrokernelTester()
2036 .mr(4)
2037 .nr(8)
2038 .kr(1)
2039 .sr(1)
2040 .m(4)
2041 .n(8)
2042 .k(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002043 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002044 }
2045
Frank Barchard0725b8d2020-12-07 11:07:35 -08002046 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002047 GemmMicrokernelTester()
2048 .mr(4)
2049 .nr(8)
2050 .kr(1)
2051 .sr(1)
2052 .m(4)
2053 .n(8)
2054 .k(1)
2055 .cn_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002056 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002057 }
2058
Frank Barchard0725b8d2020-12-07 11:07:35 -08002059 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002060 GemmMicrokernelTester()
2061 .mr(4)
2062 .nr(8)
2063 .kr(1)
2064 .sr(1)
2065 .m(4)
2066 .n(8)
2067 .k(1)
2068 .a_stride(3)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002069 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002070 }
2071
Frank Barchard0725b8d2020-12-07 11:07:35 -08002072 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002073 for (uint32_t m = 1; m <= 4; m++) {
2074 for (uint32_t n = 1; n <= 8; n++) {
2075 GemmMicrokernelTester()
2076 .mr(4)
2077 .nr(8)
2078 .kr(1)
2079 .sr(1)
2080 .m(m)
2081 .n(n)
2082 .k(1)
2083 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002084 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002085 }
2086 }
2087 }
2088
Frank Barchard0725b8d2020-12-07 11:07:35 -08002089 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_m) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002090 for (uint32_t m = 1; m <= 4; m++) {
2091 GemmMicrokernelTester()
2092 .mr(4)
2093 .nr(8)
2094 .kr(1)
2095 .sr(1)
2096 .m(m)
2097 .n(8)
2098 .k(1)
2099 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002100 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002101 }
2102 }
2103
Frank Barchard0725b8d2020-12-07 11:07:35 -08002104 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_n) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002105 for (uint32_t n = 1; n <= 8; n++) {
2106 GemmMicrokernelTester()
2107 .mr(4)
2108 .nr(8)
2109 .kr(1)
2110 .sr(1)
2111 .m(4)
2112 .n(n)
2113 .k(1)
2114 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002115 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002116 }
2117 }
2118
Frank Barchard0725b8d2020-12-07 11:07:35 -08002119 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002120 for (size_t k = 2; k < 10; k++) {
2121 GemmMicrokernelTester()
2122 .mr(4)
2123 .nr(8)
2124 .kr(1)
2125 .sr(1)
2126 .m(4)
2127 .n(8)
2128 .k(k)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002129 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002130 }
2131 }
2132
Frank Barchard0725b8d2020-12-07 11:07:35 -08002133 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002134 for (size_t k = 2; k < 10; k++) {
2135 for (uint32_t m = 1; m <= 4; m++) {
2136 for (uint32_t n = 1; n <= 8; n++) {
2137 GemmMicrokernelTester()
2138 .mr(4)
2139 .nr(8)
2140 .kr(1)
2141 .sr(1)
2142 .m(m)
2143 .n(n)
2144 .k(k)
2145 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002146 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002147 }
2148 }
2149 }
2150 }
2151
Frank Barchard0725b8d2020-12-07 11:07:35 -08002152 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002153 for (uint32_t n = 9; n < 16; n++) {
2154 for (size_t k = 1; k <= 5; k += 2) {
2155 GemmMicrokernelTester()
2156 .mr(4)
2157 .nr(8)
2158 .kr(1)
2159 .sr(1)
2160 .m(4)
2161 .n(8)
2162 .k(k)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002163 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002164 }
2165 }
2166 }
2167
Frank Barchard0725b8d2020-12-07 11:07:35 -08002168 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002169 for (uint32_t n = 9; n < 16; n++) {
2170 for (size_t k = 1; k <= 5; k += 2) {
2171 GemmMicrokernelTester()
2172 .mr(4)
2173 .nr(8)
2174 .kr(1)
2175 .sr(1)
2176 .m(4)
2177 .n(8)
2178 .k(k)
2179 .cn_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002180 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002181 }
2182 }
2183 }
2184
Frank Barchard0725b8d2020-12-07 11:07:35 -08002185 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002186 for (uint32_t n = 9; n < 16; n++) {
2187 for (size_t k = 1; k <= 5; k += 2) {
2188 GemmMicrokernelTester()
2189 .mr(4)
2190 .nr(8)
2191 .kr(1)
2192 .sr(1)
2193 .m(4)
2194 .n(n)
2195 .k(k)
2196 .a_stride(7)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002197 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002198 }
2199 }
2200 }
2201
Frank Barchard0725b8d2020-12-07 11:07:35 -08002202 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002203 for (uint32_t n = 9; n < 16; n++) {
2204 for (size_t k = 1; k <= 5; k += 2) {
2205 for (uint32_t m = 1; m <= 4; m++) {
2206 GemmMicrokernelTester()
2207 .mr(4)
2208 .nr(8)
2209 .kr(1)
2210 .sr(1)
2211 .m(m)
2212 .n(n)
2213 .k(k)
2214 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002215 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002216 }
2217 }
2218 }
2219 }
2220
Frank Barchard0725b8d2020-12-07 11:07:35 -08002221 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002222 for (uint32_t n = 16; n <= 24; n += 8) {
2223 for (size_t k = 1; k <= 5; k += 2) {
2224 GemmMicrokernelTester()
2225 .mr(4)
2226 .nr(8)
2227 .kr(1)
2228 .sr(1)
2229 .m(4)
2230 .n(8)
2231 .k(k)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002232 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002233 }
2234 }
2235 }
2236
Frank Barchard0725b8d2020-12-07 11:07:35 -08002237 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002238 for (uint32_t n = 16; n <= 24; n += 8) {
2239 for (size_t k = 1; k <= 5; k += 2) {
2240 GemmMicrokernelTester()
2241 .mr(4)
2242 .nr(8)
2243 .kr(1)
2244 .sr(1)
2245 .m(4)
2246 .n(n)
2247 .k(k)
2248 .cn_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002249 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002250 }
2251 }
2252 }
2253
Frank Barchard0725b8d2020-12-07 11:07:35 -08002254 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002255 for (uint32_t n = 16; n <= 24; n += 8) {
2256 for (size_t k = 1; k <= 5; k += 2) {
2257 GemmMicrokernelTester()
2258 .mr(4)
2259 .nr(8)
2260 .kr(1)
2261 .sr(1)
2262 .m(4)
2263 .n(n)
2264 .k(k)
2265 .a_stride(7)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002266 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002267 }
2268 }
2269 }
2270
Frank Barchard0725b8d2020-12-07 11:07:35 -08002271 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002272 for (uint32_t n = 16; n <= 24; n += 8) {
2273 for (size_t k = 1; k <= 5; k += 2) {
2274 for (uint32_t m = 1; m <= 4; m++) {
2275 GemmMicrokernelTester()
2276 .mr(4)
2277 .nr(8)
2278 .kr(1)
2279 .sr(1)
2280 .m(m)
2281 .n(n)
2282 .k(k)
2283 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002284 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002285 }
2286 }
2287 }
2288 }
2289
Frank Barchard0725b8d2020-12-07 11:07:35 -08002290 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002291 for (size_t k = 1; k <= 5; k += 2) {
2292 for (uint32_t m = 1; m <= 4; m++) {
2293 for (uint32_t n = 1; n <= 8; n++) {
2294 GemmMicrokernelTester()
2295 .mr(4)
2296 .nr(8)
2297 .kr(1)
2298 .sr(1)
2299 .m(m)
2300 .n(n)
2301 .k(k)
2302 .cm_stride(11)
2303 .iterations(1)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002304 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002305 }
2306 }
2307 }
2308 }
2309
Frank Barchard0725b8d2020-12-07 11:07:35 -08002310 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002311 GemmMicrokernelTester()
2312 .mr(4)
2313 .nr(8)
2314 .kr(1)
2315 .sr(1)
2316 .m(4)
2317 .n(8)
2318 .k(1)
2319 .qmin(128)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002320 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002321 }
2322
Frank Barchard0725b8d2020-12-07 11:07:35 -08002323 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002324 GemmMicrokernelTester()
2325 .mr(4)
2326 .nr(8)
2327 .kr(1)
2328 .sr(1)
2329 .m(4)
2330 .n(8)
2331 .k(1)
2332 .qmax(128)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002333 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002334 }
2335
Frank Barchard0725b8d2020-12-07 11:07:35 -08002336 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002337 GemmMicrokernelTester()
2338 .mr(4)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(4)
2343 .n(8)
2344 .k(1)
2345 .cm_stride(11)
Frank Barchard0725b8d2020-12-07 11:07:35 -08002346 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
Marat Dukhan0d0d8822020-07-23 23:37:56 -07002347 }
2348#endif // XNN_ARCH_WASMSIMD
2349
2350
Marat Dukhande06f492020-04-09 00:19:31 -07002351TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002352 GemmMicrokernelTester()
2353 .mr(4)
2354 .nr(2)
2355 .kr(1)
2356 .sr(1)
2357 .m(4)
2358 .n(2)
2359 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002360 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002361}
2362
Marat Dukhande06f492020-04-09 00:19:31 -07002363TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002364 GemmMicrokernelTester()
2365 .mr(4)
2366 .nr(2)
2367 .kr(1)
2368 .sr(1)
2369 .m(4)
2370 .n(2)
2371 .k(1)
2372 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002373 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002374}
2375
Marat Dukhande06f492020-04-09 00:19:31 -07002376TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002377 GemmMicrokernelTester()
2378 .mr(4)
2379 .nr(2)
2380 .kr(1)
2381 .sr(1)
2382 .m(4)
2383 .n(2)
2384 .k(1)
2385 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07002386 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002387}
2388
Marat Dukhande06f492020-04-09 00:19:31 -07002389TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002390 for (uint32_t m = 1; m <= 4; m++) {
2391 for (uint32_t n = 1; n <= 2; n++) {
2392 GemmMicrokernelTester()
2393 .mr(4)
2394 .nr(2)
2395 .kr(1)
2396 .sr(1)
2397 .m(m)
2398 .n(n)
2399 .k(1)
2400 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002401 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002402 }
2403 }
2404}
2405
Marat Dukhande06f492020-04-09 00:19:31 -07002406TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002407 for (uint32_t m = 1; m <= 4; m++) {
2408 GemmMicrokernelTester()
2409 .mr(4)
2410 .nr(2)
2411 .kr(1)
2412 .sr(1)
2413 .m(m)
2414 .n(2)
2415 .k(1)
2416 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002417 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002418 }
2419}
2420
Marat Dukhande06f492020-04-09 00:19:31 -07002421TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002422 for (uint32_t n = 1; n <= 2; n++) {
2423 GemmMicrokernelTester()
2424 .mr(4)
2425 .nr(2)
2426 .kr(1)
2427 .sr(1)
2428 .m(4)
2429 .n(n)
2430 .k(1)
2431 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002432 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002433 }
2434}
2435
Marat Dukhande06f492020-04-09 00:19:31 -07002436TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002437 for (size_t k = 2; k < 10; k++) {
2438 GemmMicrokernelTester()
2439 .mr(4)
2440 .nr(2)
2441 .kr(1)
2442 .sr(1)
2443 .m(4)
2444 .n(2)
2445 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002446 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002447 }
2448}
2449
Marat Dukhande06f492020-04-09 00:19:31 -07002450TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002451 for (size_t k = 2; k < 10; k++) {
2452 for (uint32_t m = 1; m <= 4; m++) {
2453 for (uint32_t n = 1; n <= 2; n++) {
2454 GemmMicrokernelTester()
2455 .mr(4)
2456 .nr(2)
2457 .kr(1)
2458 .sr(1)
2459 .m(m)
2460 .n(n)
2461 .k(k)
2462 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002463 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002464 }
2465 }
2466 }
2467}
2468
Marat Dukhande06f492020-04-09 00:19:31 -07002469TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002470 for (uint32_t n = 3; n < 4; n++) {
2471 for (size_t k = 1; k <= 5; k += 2) {
2472 GemmMicrokernelTester()
2473 .mr(4)
2474 .nr(2)
2475 .kr(1)
2476 .sr(1)
2477 .m(4)
2478 .n(2)
2479 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002480 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002481 }
2482 }
2483}
2484
Marat Dukhande06f492020-04-09 00:19:31 -07002485TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002486 for (uint32_t n = 3; n < 4; n++) {
2487 for (size_t k = 1; k <= 5; k += 2) {
2488 GemmMicrokernelTester()
2489 .mr(4)
2490 .nr(2)
2491 .kr(1)
2492 .sr(1)
2493 .m(4)
2494 .n(2)
2495 .k(k)
2496 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002497 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002498 }
2499 }
2500}
2501
Marat Dukhande06f492020-04-09 00:19:31 -07002502TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002503 for (uint32_t n = 3; n < 4; n++) {
2504 for (size_t k = 1; k <= 5; k += 2) {
2505 GemmMicrokernelTester()
2506 .mr(4)
2507 .nr(2)
2508 .kr(1)
2509 .sr(1)
2510 .m(4)
2511 .n(n)
2512 .k(k)
2513 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002514 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002515 }
2516 }
2517}
2518
Marat Dukhande06f492020-04-09 00:19:31 -07002519TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002520 for (uint32_t n = 3; n < 4; n++) {
2521 for (size_t k = 1; k <= 5; k += 2) {
2522 for (uint32_t m = 1; m <= 4; m++) {
2523 GemmMicrokernelTester()
2524 .mr(4)
2525 .nr(2)
2526 .kr(1)
2527 .sr(1)
2528 .m(m)
2529 .n(n)
2530 .k(k)
2531 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002532 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002533 }
2534 }
2535 }
2536}
2537
Marat Dukhande06f492020-04-09 00:19:31 -07002538TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002539 for (uint32_t n = 4; n <= 6; n += 2) {
2540 for (size_t k = 1; k <= 5; k += 2) {
2541 GemmMicrokernelTester()
2542 .mr(4)
2543 .nr(2)
2544 .kr(1)
2545 .sr(1)
2546 .m(4)
2547 .n(2)
2548 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002549 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002550 }
2551 }
2552}
2553
Marat Dukhande06f492020-04-09 00:19:31 -07002554TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002555 for (uint32_t n = 4; n <= 6; n += 2) {
2556 for (size_t k = 1; k <= 5; k += 2) {
2557 GemmMicrokernelTester()
2558 .mr(4)
2559 .nr(2)
2560 .kr(1)
2561 .sr(1)
2562 .m(4)
2563 .n(n)
2564 .k(k)
2565 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002566 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002567 }
2568 }
2569}
2570
Marat Dukhande06f492020-04-09 00:19:31 -07002571TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002572 for (uint32_t n = 4; n <= 6; n += 2) {
2573 for (size_t k = 1; k <= 5; k += 2) {
2574 GemmMicrokernelTester()
2575 .mr(4)
2576 .nr(2)
2577 .kr(1)
2578 .sr(1)
2579 .m(4)
2580 .n(n)
2581 .k(k)
2582 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002583 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002584 }
2585 }
2586}
2587
Marat Dukhande06f492020-04-09 00:19:31 -07002588TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002589 for (uint32_t n = 4; n <= 6; n += 2) {
2590 for (size_t k = 1; k <= 5; k += 2) {
2591 for (uint32_t m = 1; m <= 4; m++) {
2592 GemmMicrokernelTester()
2593 .mr(4)
2594 .nr(2)
2595 .kr(1)
2596 .sr(1)
2597 .m(m)
2598 .n(n)
2599 .k(k)
2600 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002601 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002602 }
2603 }
2604 }
2605}
2606
Marat Dukhande06f492020-04-09 00:19:31 -07002607TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002608 for (size_t k = 1; k <= 5; k += 2) {
2609 for (uint32_t m = 1; m <= 4; m++) {
2610 for (uint32_t n = 1; n <= 2; n++) {
2611 GemmMicrokernelTester()
2612 .mr(4)
2613 .nr(2)
2614 .kr(1)
2615 .sr(1)
2616 .m(m)
2617 .n(n)
2618 .k(k)
2619 .cm_stride(5)
2620 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002621 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002622 }
2623 }
2624 }
2625}
2626
Marat Dukhande06f492020-04-09 00:19:31 -07002627TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002628 GemmMicrokernelTester()
2629 .mr(4)
2630 .nr(2)
2631 .kr(1)
2632 .sr(1)
2633 .m(4)
2634 .n(2)
2635 .k(1)
2636 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002637 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002638}
2639
Marat Dukhande06f492020-04-09 00:19:31 -07002640TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002641 GemmMicrokernelTester()
2642 .mr(4)
2643 .nr(2)
2644 .kr(1)
2645 .sr(1)
2646 .m(4)
2647 .n(2)
2648 .k(1)
2649 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002650 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002651}
2652
Marat Dukhande06f492020-04-09 00:19:31 -07002653TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002654 GemmMicrokernelTester()
2655 .mr(4)
2656 .nr(2)
2657 .kr(1)
2658 .sr(1)
2659 .m(4)
2660 .n(2)
2661 .k(1)
2662 .cm_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002663 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002664}
2665
2666
Marat Dukhande06f492020-04-09 00:19:31 -07002667TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002668 GemmMicrokernelTester()
2669 .mr(2)
2670 .nr(4)
2671 .kr(1)
2672 .sr(1)
2673 .m(2)
2674 .n(4)
2675 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002676 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002677}
2678
Marat Dukhande06f492020-04-09 00:19:31 -07002679TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002680 GemmMicrokernelTester()
2681 .mr(2)
2682 .nr(4)
2683 .kr(1)
2684 .sr(1)
2685 .m(2)
2686 .n(4)
2687 .k(1)
2688 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002689 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002690}
2691
Marat Dukhande06f492020-04-09 00:19:31 -07002692TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002693 GemmMicrokernelTester()
2694 .mr(2)
2695 .nr(4)
2696 .kr(1)
2697 .sr(1)
2698 .m(2)
2699 .n(4)
2700 .k(1)
2701 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07002702 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002703}
2704
Marat Dukhande06f492020-04-09 00:19:31 -07002705TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002706 for (uint32_t m = 1; m <= 2; m++) {
2707 for (uint32_t n = 1; n <= 4; n++) {
2708 GemmMicrokernelTester()
2709 .mr(2)
2710 .nr(4)
2711 .kr(1)
2712 .sr(1)
2713 .m(m)
2714 .n(n)
2715 .k(1)
2716 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002717 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002718 }
2719 }
2720}
2721
Marat Dukhande06f492020-04-09 00:19:31 -07002722TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002723 for (uint32_t m = 1; m <= 2; m++) {
2724 GemmMicrokernelTester()
2725 .mr(2)
2726 .nr(4)
2727 .kr(1)
2728 .sr(1)
2729 .m(m)
2730 .n(4)
2731 .k(1)
2732 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002733 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002734 }
2735}
2736
Marat Dukhande06f492020-04-09 00:19:31 -07002737TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002738 for (uint32_t n = 1; n <= 4; n++) {
2739 GemmMicrokernelTester()
2740 .mr(2)
2741 .nr(4)
2742 .kr(1)
2743 .sr(1)
2744 .m(2)
2745 .n(n)
2746 .k(1)
2747 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002748 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002749 }
2750}
2751
Marat Dukhande06f492020-04-09 00:19:31 -07002752TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002753 for (size_t k = 2; k < 10; k++) {
2754 GemmMicrokernelTester()
2755 .mr(2)
2756 .nr(4)
2757 .kr(1)
2758 .sr(1)
2759 .m(2)
2760 .n(4)
2761 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002762 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002763 }
2764}
2765
Marat Dukhande06f492020-04-09 00:19:31 -07002766TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002767 for (size_t k = 2; k < 10; k++) {
2768 for (uint32_t m = 1; m <= 2; m++) {
2769 for (uint32_t n = 1; n <= 4; n++) {
2770 GemmMicrokernelTester()
2771 .mr(2)
2772 .nr(4)
2773 .kr(1)
2774 .sr(1)
2775 .m(m)
2776 .n(n)
2777 .k(k)
2778 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002779 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002780 }
2781 }
2782 }
2783}
2784
Marat Dukhande06f492020-04-09 00:19:31 -07002785TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002786 for (uint32_t n = 5; n < 8; n++) {
2787 for (size_t k = 1; k <= 5; k += 2) {
2788 GemmMicrokernelTester()
2789 .mr(2)
2790 .nr(4)
2791 .kr(1)
2792 .sr(1)
2793 .m(2)
2794 .n(4)
2795 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002796 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002797 }
2798 }
2799}
2800
Marat Dukhande06f492020-04-09 00:19:31 -07002801TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002802 for (uint32_t n = 5; n < 8; n++) {
2803 for (size_t k = 1; k <= 5; k += 2) {
2804 GemmMicrokernelTester()
2805 .mr(2)
2806 .nr(4)
2807 .kr(1)
2808 .sr(1)
2809 .m(2)
2810 .n(4)
2811 .k(k)
2812 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002813 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002814 }
2815 }
2816}
2817
Marat Dukhande06f492020-04-09 00:19:31 -07002818TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002819 for (uint32_t n = 5; n < 8; n++) {
2820 for (size_t k = 1; k <= 5; k += 2) {
2821 GemmMicrokernelTester()
2822 .mr(2)
2823 .nr(4)
2824 .kr(1)
2825 .sr(1)
2826 .m(2)
2827 .n(n)
2828 .k(k)
2829 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002830 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002831 }
2832 }
2833}
2834
Marat Dukhande06f492020-04-09 00:19:31 -07002835TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002836 for (uint32_t n = 5; n < 8; n++) {
2837 for (size_t k = 1; k <= 5; k += 2) {
2838 for (uint32_t m = 1; m <= 2; m++) {
2839 GemmMicrokernelTester()
2840 .mr(2)
2841 .nr(4)
2842 .kr(1)
2843 .sr(1)
2844 .m(m)
2845 .n(n)
2846 .k(k)
2847 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002848 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002849 }
2850 }
2851 }
2852}
2853
Marat Dukhande06f492020-04-09 00:19:31 -07002854TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002855 for (uint32_t n = 8; n <= 12; n += 4) {
2856 for (size_t k = 1; k <= 5; k += 2) {
2857 GemmMicrokernelTester()
2858 .mr(2)
2859 .nr(4)
2860 .kr(1)
2861 .sr(1)
2862 .m(2)
2863 .n(4)
2864 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002865 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002866 }
2867 }
2868}
2869
Marat Dukhande06f492020-04-09 00:19:31 -07002870TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002871 for (uint32_t n = 8; n <= 12; n += 4) {
2872 for (size_t k = 1; k <= 5; k += 2) {
2873 GemmMicrokernelTester()
2874 .mr(2)
2875 .nr(4)
2876 .kr(1)
2877 .sr(1)
2878 .m(2)
2879 .n(n)
2880 .k(k)
2881 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002882 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002883 }
2884 }
2885}
2886
Marat Dukhande06f492020-04-09 00:19:31 -07002887TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002888 for (uint32_t n = 8; n <= 12; n += 4) {
2889 for (size_t k = 1; k <= 5; k += 2) {
2890 GemmMicrokernelTester()
2891 .mr(2)
2892 .nr(4)
2893 .kr(1)
2894 .sr(1)
2895 .m(2)
2896 .n(n)
2897 .k(k)
2898 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002899 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002900 }
2901 }
2902}
2903
Marat Dukhande06f492020-04-09 00:19:31 -07002904TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002905 for (uint32_t n = 8; n <= 12; n += 4) {
2906 for (size_t k = 1; k <= 5; k += 2) {
2907 for (uint32_t m = 1; m <= 2; m++) {
2908 GemmMicrokernelTester()
2909 .mr(2)
2910 .nr(4)
2911 .kr(1)
2912 .sr(1)
2913 .m(m)
2914 .n(n)
2915 .k(k)
2916 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002917 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002918 }
2919 }
2920 }
2921}
2922
Marat Dukhande06f492020-04-09 00:19:31 -07002923TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002924 for (size_t k = 1; k <= 5; k += 2) {
2925 for (uint32_t m = 1; m <= 2; m++) {
2926 for (uint32_t n = 1; n <= 4; n++) {
2927 GemmMicrokernelTester()
2928 .mr(2)
2929 .nr(4)
2930 .kr(1)
2931 .sr(1)
2932 .m(m)
2933 .n(n)
2934 .k(k)
2935 .cm_stride(7)
2936 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002937 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002938 }
2939 }
2940 }
2941}
2942
Marat Dukhande06f492020-04-09 00:19:31 -07002943TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002944 GemmMicrokernelTester()
2945 .mr(2)
2946 .nr(4)
2947 .kr(1)
2948 .sr(1)
2949 .m(2)
2950 .n(4)
2951 .k(1)
2952 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002953 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002954}
2955
Marat Dukhande06f492020-04-09 00:19:31 -07002956TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002957 GemmMicrokernelTester()
2958 .mr(2)
2959 .nr(4)
2960 .kr(1)
2961 .sr(1)
2962 .m(2)
2963 .n(4)
2964 .k(1)
2965 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002966 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002967}
2968
Marat Dukhande06f492020-04-09 00:19:31 -07002969TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002970 GemmMicrokernelTester()
2971 .mr(2)
2972 .nr(4)
2973 .kr(1)
2974 .sr(1)
2975 .m(2)
2976 .n(4)
2977 .k(1)
2978 .cm_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002979 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002980}
2981
2982
Marat Dukhande06f492020-04-09 00:19:31 -07002983TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002984 GemmMicrokernelTester()
2985 .mr(4)
2986 .nr(4)
2987 .kr(1)
2988 .sr(1)
2989 .m(4)
2990 .n(4)
2991 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002992 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002993}
2994
Marat Dukhande06f492020-04-09 00:19:31 -07002995TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002996 GemmMicrokernelTester()
2997 .mr(4)
2998 .nr(4)
2999 .kr(1)
3000 .sr(1)
3001 .m(4)
3002 .n(4)
3003 .k(1)
3004 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003005 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003006}
3007
Marat Dukhande06f492020-04-09 00:19:31 -07003008TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003009 GemmMicrokernelTester()
3010 .mr(4)
3011 .nr(4)
3012 .kr(1)
3013 .sr(1)
3014 .m(4)
3015 .n(4)
3016 .k(1)
3017 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07003018 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003019}
3020
Marat Dukhande06f492020-04-09 00:19:31 -07003021TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003022 for (uint32_t m = 1; m <= 4; m++) {
3023 for (uint32_t n = 1; n <= 4; n++) {
3024 GemmMicrokernelTester()
3025 .mr(4)
3026 .nr(4)
3027 .kr(1)
3028 .sr(1)
3029 .m(m)
3030 .n(n)
3031 .k(1)
3032 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003033 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003034 }
3035 }
3036}
3037
Marat Dukhande06f492020-04-09 00:19:31 -07003038TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003039 for (uint32_t m = 1; m <= 4; m++) {
3040 GemmMicrokernelTester()
3041 .mr(4)
3042 .nr(4)
3043 .kr(1)
3044 .sr(1)
3045 .m(m)
3046 .n(4)
3047 .k(1)
3048 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003049 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003050 }
3051}
3052
Marat Dukhande06f492020-04-09 00:19:31 -07003053TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003054 for (uint32_t n = 1; n <= 4; n++) {
3055 GemmMicrokernelTester()
3056 .mr(4)
3057 .nr(4)
3058 .kr(1)
3059 .sr(1)
3060 .m(4)
3061 .n(n)
3062 .k(1)
3063 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003064 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003065 }
3066}
3067
Marat Dukhande06f492020-04-09 00:19:31 -07003068TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003069 for (size_t k = 2; k < 10; k++) {
3070 GemmMicrokernelTester()
3071 .mr(4)
3072 .nr(4)
3073 .kr(1)
3074 .sr(1)
3075 .m(4)
3076 .n(4)
3077 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003078 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003079 }
3080}
3081
Marat Dukhande06f492020-04-09 00:19:31 -07003082TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003083 for (size_t k = 2; k < 10; k++) {
3084 for (uint32_t m = 1; m <= 4; m++) {
3085 for (uint32_t n = 1; n <= 4; n++) {
3086 GemmMicrokernelTester()
3087 .mr(4)
3088 .nr(4)
3089 .kr(1)
3090 .sr(1)
3091 .m(m)
3092 .n(n)
3093 .k(k)
3094 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003095 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003096 }
3097 }
3098 }
3099}
3100
Marat Dukhande06f492020-04-09 00:19:31 -07003101TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003102 for (uint32_t n = 5; n < 8; n++) {
3103 for (size_t k = 1; k <= 5; k += 2) {
3104 GemmMicrokernelTester()
3105 .mr(4)
3106 .nr(4)
3107 .kr(1)
3108 .sr(1)
3109 .m(4)
3110 .n(4)
3111 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003112 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003113 }
3114 }
3115}
3116
Marat Dukhande06f492020-04-09 00:19:31 -07003117TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003118 for (uint32_t n = 5; n < 8; n++) {
3119 for (size_t k = 1; k <= 5; k += 2) {
3120 GemmMicrokernelTester()
3121 .mr(4)
3122 .nr(4)
3123 .kr(1)
3124 .sr(1)
3125 .m(4)
3126 .n(4)
3127 .k(k)
3128 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003129 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003130 }
3131 }
3132}
3133
Marat Dukhande06f492020-04-09 00:19:31 -07003134TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003135 for (uint32_t n = 5; n < 8; n++) {
3136 for (size_t k = 1; k <= 5; k += 2) {
3137 GemmMicrokernelTester()
3138 .mr(4)
3139 .nr(4)
3140 .kr(1)
3141 .sr(1)
3142 .m(4)
3143 .n(n)
3144 .k(k)
3145 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003146 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003147 }
3148 }
3149}
3150
Marat Dukhande06f492020-04-09 00:19:31 -07003151TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003152 for (uint32_t n = 5; n < 8; n++) {
3153 for (size_t k = 1; k <= 5; k += 2) {
3154 for (uint32_t m = 1; m <= 4; m++) {
3155 GemmMicrokernelTester()
3156 .mr(4)
3157 .nr(4)
3158 .kr(1)
3159 .sr(1)
3160 .m(m)
3161 .n(n)
3162 .k(k)
3163 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003164 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003165 }
3166 }
3167 }
3168}
3169
Marat Dukhande06f492020-04-09 00:19:31 -07003170TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003171 for (uint32_t n = 8; n <= 12; n += 4) {
3172 for (size_t k = 1; k <= 5; k += 2) {
3173 GemmMicrokernelTester()
3174 .mr(4)
3175 .nr(4)
3176 .kr(1)
3177 .sr(1)
3178 .m(4)
3179 .n(4)
3180 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003181 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003182 }
3183 }
3184}
3185
Marat Dukhande06f492020-04-09 00:19:31 -07003186TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003187 for (uint32_t n = 8; n <= 12; n += 4) {
3188 for (size_t k = 1; k <= 5; k += 2) {
3189 GemmMicrokernelTester()
3190 .mr(4)
3191 .nr(4)
3192 .kr(1)
3193 .sr(1)
3194 .m(4)
3195 .n(n)
3196 .k(k)
3197 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003198 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003199 }
3200 }
3201}
3202
Marat Dukhande06f492020-04-09 00:19:31 -07003203TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003204 for (uint32_t n = 8; n <= 12; n += 4) {
3205 for (size_t k = 1; k <= 5; k += 2) {
3206 GemmMicrokernelTester()
3207 .mr(4)
3208 .nr(4)
3209 .kr(1)
3210 .sr(1)
3211 .m(4)
3212 .n(n)
3213 .k(k)
3214 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003215 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003216 }
3217 }
3218}
3219
Marat Dukhande06f492020-04-09 00:19:31 -07003220TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003221 for (uint32_t n = 8; n <= 12; n += 4) {
3222 for (size_t k = 1; k <= 5; k += 2) {
3223 for (uint32_t m = 1; m <= 4; m++) {
3224 GemmMicrokernelTester()
3225 .mr(4)
3226 .nr(4)
3227 .kr(1)
3228 .sr(1)
3229 .m(m)
3230 .n(n)
3231 .k(k)
3232 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003233 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003234 }
3235 }
3236 }
3237}
3238
Marat Dukhande06f492020-04-09 00:19:31 -07003239TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003240 for (size_t k = 1; k <= 5; k += 2) {
3241 for (uint32_t m = 1; m <= 4; m++) {
3242 for (uint32_t n = 1; n <= 4; n++) {
3243 GemmMicrokernelTester()
3244 .mr(4)
3245 .nr(4)
3246 .kr(1)
3247 .sr(1)
3248 .m(m)
3249 .n(n)
3250 .k(k)
3251 .cm_stride(7)
3252 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003253 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003254 }
3255 }
3256 }
3257}
3258
Marat Dukhande06f492020-04-09 00:19:31 -07003259TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003260 GemmMicrokernelTester()
3261 .mr(4)
3262 .nr(4)
3263 .kr(1)
3264 .sr(1)
3265 .m(4)
3266 .n(4)
3267 .k(1)
3268 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07003269 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003270}
3271
Marat Dukhande06f492020-04-09 00:19:31 -07003272TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003273 GemmMicrokernelTester()
3274 .mr(4)
3275 .nr(4)
3276 .kr(1)
3277 .sr(1)
3278 .m(4)
3279 .n(4)
3280 .k(1)
3281 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07003282 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003283}
3284
Marat Dukhande06f492020-04-09 00:19:31 -07003285TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003286 GemmMicrokernelTester()
3287 .mr(4)
3288 .nr(4)
3289 .kr(1)
3290 .sr(1)
3291 .m(4)
3292 .n(4)
3293 .k(1)
3294 .cm_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003295 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003296}
3297
3298
Marat Dukhande06f492020-04-09 00:19:31 -07003299TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003300 GemmMicrokernelTester()
3301 .mr(3)
3302 .nr(3)
3303 .kr(1)
3304 .sr(1)
3305 .m(3)
3306 .n(3)
3307 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003308 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003309}
3310
Marat Dukhande06f492020-04-09 00:19:31 -07003311TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003312 GemmMicrokernelTester()
3313 .mr(3)
3314 .nr(3)
3315 .kr(1)
3316 .sr(1)
3317 .m(3)
3318 .n(3)
3319 .k(1)
3320 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003321 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003322}
3323
Marat Dukhande06f492020-04-09 00:19:31 -07003324TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003325 GemmMicrokernelTester()
3326 .mr(3)
3327 .nr(3)
3328 .kr(1)
3329 .sr(1)
3330 .m(3)
3331 .n(3)
3332 .k(1)
3333 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07003334 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003335}
3336
Marat Dukhande06f492020-04-09 00:19:31 -07003337TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003338 for (uint32_t m = 1; m <= 3; m++) {
3339 for (uint32_t n = 1; n <= 3; n++) {
3340 GemmMicrokernelTester()
3341 .mr(3)
3342 .nr(3)
3343 .kr(1)
3344 .sr(1)
3345 .m(m)
3346 .n(n)
3347 .k(1)
3348 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003349 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003350 }
3351 }
3352}
3353
Marat Dukhande06f492020-04-09 00:19:31 -07003354TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003355 for (uint32_t m = 1; m <= 3; m++) {
3356 GemmMicrokernelTester()
3357 .mr(3)
3358 .nr(3)
3359 .kr(1)
3360 .sr(1)
3361 .m(m)
3362 .n(3)
3363 .k(1)
3364 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003365 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003366 }
3367}
3368
Marat Dukhande06f492020-04-09 00:19:31 -07003369TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003370 for (uint32_t n = 1; n <= 3; n++) {
3371 GemmMicrokernelTester()
3372 .mr(3)
3373 .nr(3)
3374 .kr(1)
3375 .sr(1)
3376 .m(3)
3377 .n(n)
3378 .k(1)
3379 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003380 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003381 }
3382}
3383
Marat Dukhande06f492020-04-09 00:19:31 -07003384TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003385 for (size_t k = 2; k < 10; k++) {
3386 GemmMicrokernelTester()
3387 .mr(3)
3388 .nr(3)
3389 .kr(1)
3390 .sr(1)
3391 .m(3)
3392 .n(3)
3393 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003394 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003395 }
3396}
3397
Marat Dukhande06f492020-04-09 00:19:31 -07003398TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003399 for (size_t k = 2; k < 10; k++) {
3400 for (uint32_t m = 1; m <= 3; m++) {
3401 for (uint32_t n = 1; n <= 3; n++) {
3402 GemmMicrokernelTester()
3403 .mr(3)
3404 .nr(3)
3405 .kr(1)
3406 .sr(1)
3407 .m(m)
3408 .n(n)
3409 .k(k)
3410 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003411 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003412 }
3413 }
3414 }
3415}
3416
Marat Dukhande06f492020-04-09 00:19:31 -07003417TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003418 for (uint32_t n = 4; n < 6; n++) {
3419 for (size_t k = 1; k <= 5; k += 2) {
3420 GemmMicrokernelTester()
3421 .mr(3)
3422 .nr(3)
3423 .kr(1)
3424 .sr(1)
3425 .m(3)
3426 .n(3)
3427 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003428 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003429 }
3430 }
3431}
3432
Marat Dukhande06f492020-04-09 00:19:31 -07003433TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003434 for (uint32_t n = 4; n < 6; n++) {
3435 for (size_t k = 1; k <= 5; k += 2) {
3436 GemmMicrokernelTester()
3437 .mr(3)
3438 .nr(3)
3439 .kr(1)
3440 .sr(1)
3441 .m(3)
3442 .n(3)
3443 .k(k)
3444 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003445 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003446 }
3447 }
3448}
3449
Marat Dukhande06f492020-04-09 00:19:31 -07003450TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003451 for (uint32_t n = 4; n < 6; n++) {
3452 for (size_t k = 1; k <= 5; k += 2) {
3453 GemmMicrokernelTester()
3454 .mr(3)
3455 .nr(3)
3456 .kr(1)
3457 .sr(1)
3458 .m(3)
3459 .n(n)
3460 .k(k)
3461 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003462 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003463 }
3464 }
3465}
3466
Marat Dukhande06f492020-04-09 00:19:31 -07003467TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003468 for (uint32_t n = 4; n < 6; n++) {
3469 for (size_t k = 1; k <= 5; k += 2) {
3470 for (uint32_t m = 1; m <= 3; m++) {
3471 GemmMicrokernelTester()
3472 .mr(3)
3473 .nr(3)
3474 .kr(1)
3475 .sr(1)
3476 .m(m)
3477 .n(n)
3478 .k(k)
3479 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003480 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003481 }
3482 }
3483 }
3484}
3485
Marat Dukhande06f492020-04-09 00:19:31 -07003486TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003487 for (uint32_t n = 6; n <= 9; n += 3) {
3488 for (size_t k = 1; k <= 5; k += 2) {
3489 GemmMicrokernelTester()
3490 .mr(3)
3491 .nr(3)
3492 .kr(1)
3493 .sr(1)
3494 .m(3)
3495 .n(3)
3496 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003497 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003498 }
3499 }
3500}
3501
Marat Dukhande06f492020-04-09 00:19:31 -07003502TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003503 for (uint32_t n = 6; n <= 9; n += 3) {
3504 for (size_t k = 1; k <= 5; k += 2) {
3505 GemmMicrokernelTester()
3506 .mr(3)
3507 .nr(3)
3508 .kr(1)
3509 .sr(1)
3510 .m(3)
3511 .n(n)
3512 .k(k)
3513 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003514 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003515 }
3516 }
3517}
3518
Marat Dukhande06f492020-04-09 00:19:31 -07003519TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003520 for (uint32_t n = 6; n <= 9; n += 3) {
3521 for (size_t k = 1; k <= 5; k += 2) {
3522 GemmMicrokernelTester()
3523 .mr(3)
3524 .nr(3)
3525 .kr(1)
3526 .sr(1)
3527 .m(3)
3528 .n(n)
3529 .k(k)
3530 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003531 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003532 }
3533 }
3534}
3535
Marat Dukhande06f492020-04-09 00:19:31 -07003536TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003537 for (uint32_t n = 6; n <= 9; n += 3) {
3538 for (size_t k = 1; k <= 5; k += 2) {
3539 for (uint32_t m = 1; m <= 3; m++) {
3540 GemmMicrokernelTester()
3541 .mr(3)
3542 .nr(3)
3543 .kr(1)
3544 .sr(1)
3545 .m(m)
3546 .n(n)
3547 .k(k)
3548 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003549 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003550 }
3551 }
3552 }
3553}
3554
Marat Dukhande06f492020-04-09 00:19:31 -07003555TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003556 for (size_t k = 1; k <= 5; k += 2) {
3557 for (uint32_t m = 1; m <= 3; m++) {
3558 for (uint32_t n = 1; n <= 3; n++) {
3559 GemmMicrokernelTester()
3560 .mr(3)
3561 .nr(3)
3562 .kr(1)
3563 .sr(1)
3564 .m(m)
3565 .n(n)
3566 .k(k)
3567 .cm_stride(5)
3568 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003569 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003570 }
3571 }
3572 }
3573}
3574
Marat Dukhande06f492020-04-09 00:19:31 -07003575TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003576 GemmMicrokernelTester()
3577 .mr(3)
3578 .nr(3)
3579 .kr(1)
3580 .sr(1)
3581 .m(3)
3582 .n(3)
3583 .k(1)
3584 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07003585 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003586}
3587
Marat Dukhande06f492020-04-09 00:19:31 -07003588TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003589 GemmMicrokernelTester()
3590 .mr(3)
3591 .nr(3)
3592 .kr(1)
3593 .sr(1)
3594 .m(3)
3595 .n(3)
3596 .k(1)
3597 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07003598 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003599}
3600
Marat Dukhande06f492020-04-09 00:19:31 -07003601TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003602 GemmMicrokernelTester()
3603 .mr(3)
3604 .nr(3)
3605 .kr(1)
3606 .sr(1)
3607 .m(3)
3608 .n(3)
3609 .k(1)
3610 .cm_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003611 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003612}