blob: f5fdc9b6653aacd300a107fc807517c4b4a52890 [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-ppmm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
22#include "gemm-microkernel-tester.h"
23
24
25#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070026 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070027 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -070036 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070037 }
38
Marat Dukhande06f492020-04-09 00:19:31 -070039 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070040 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -070050 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070051 }
52
Marat Dukhande06f492020-04-09 00:19:31 -070053 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -070054 TEST_REQUIRES_ARM_NEON;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(1)
63 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -070064 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070065 }
66
Marat Dukhande06f492020-04-09 00:19:31 -070067 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070068 TEST_REQUIRES_ARM_NEON;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(1)
79 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -070080 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070081 }
82 }
83 }
84
Marat Dukhande06f492020-04-09 00:19:31 -070085 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070086 TEST_REQUIRES_ARM_NEON;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(1)
96 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -070097 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -070098 }
99 }
100
Marat Dukhande06f492020-04-09 00:19:31 -0700101 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700102 TEST_REQUIRES_ARM_NEON;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(1)
112 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700113 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700114 }
115 }
116
Marat Dukhande06f492020-04-09 00:19:31 -0700117 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700118 TEST_REQUIRES_ARM_NEON;
119 for (size_t k = 2; k < 10; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700128 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700129 }
130 }
131
Marat Dukhande06f492020-04-09 00:19:31 -0700132 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700133 TEST_REQUIRES_ARM_NEON;
134 for (size_t k = 2; k < 10; k++) {
135 for (uint32_t m = 1; m <= 4; m++) {
136 for (uint32_t n = 1; n <= 8; n++) {
137 GemmMicrokernelTester()
138 .mr(4)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(m)
143 .n(n)
144 .k(k)
145 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700146 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700147 }
148 }
149 }
150 }
151
Marat Dukhande06f492020-04-09 00:19:31 -0700152 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700153 TEST_REQUIRES_ARM_NEON;
154 for (uint32_t n = 9; n < 16; n++) {
155 for (size_t k = 1; k <= 5; k += 2) {
156 GemmMicrokernelTester()
157 .mr(4)
158 .nr(8)
159 .kr(1)
160 .sr(1)
161 .m(4)
162 .n(8)
163 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700164 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700165 }
166 }
167 }
168
Marat Dukhande06f492020-04-09 00:19:31 -0700169 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700170 TEST_REQUIRES_ARM_NEON;
171 for (uint32_t n = 9; n < 16; n++) {
172 for (size_t k = 1; k <= 5; k += 2) {
173 GemmMicrokernelTester()
174 .mr(4)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(4)
179 .n(8)
180 .k(k)
181 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700182 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700183 }
184 }
185 }
186
Marat Dukhande06f492020-04-09 00:19:31 -0700187 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700188 TEST_REQUIRES_ARM_NEON;
189 for (uint32_t n = 9; n < 16; n++) {
190 for (size_t k = 1; k <= 5; k += 2) {
191 GemmMicrokernelTester()
192 .mr(4)
193 .nr(8)
194 .kr(1)
195 .sr(1)
196 .m(4)
197 .n(n)
198 .k(k)
199 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700200 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700201 }
202 }
203 }
204
Marat Dukhande06f492020-04-09 00:19:31 -0700205 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700206 TEST_REQUIRES_ARM_NEON;
207 for (uint32_t n = 9; n < 16; n++) {
208 for (size_t k = 1; k <= 5; k += 2) {
209 for (uint32_t m = 1; m <= 4; m++) {
210 GemmMicrokernelTester()
211 .mr(4)
212 .nr(8)
213 .kr(1)
214 .sr(1)
215 .m(m)
216 .n(n)
217 .k(k)
218 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700219 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700220 }
221 }
222 }
223 }
224
Marat Dukhande06f492020-04-09 00:19:31 -0700225 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700226 TEST_REQUIRES_ARM_NEON;
227 for (uint32_t n = 16; n <= 24; n += 8) {
228 for (size_t k = 1; k <= 5; k += 2) {
229 GemmMicrokernelTester()
230 .mr(4)
231 .nr(8)
232 .kr(1)
233 .sr(1)
234 .m(4)
235 .n(8)
236 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700237 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700238 }
239 }
240 }
241
Marat Dukhande06f492020-04-09 00:19:31 -0700242 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t n = 16; n <= 24; n += 8) {
245 for (size_t k = 1; k <= 5; k += 2) {
246 GemmMicrokernelTester()
247 .mr(4)
248 .nr(8)
249 .kr(1)
250 .sr(1)
251 .m(4)
252 .n(n)
253 .k(k)
254 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700255 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700256 }
257 }
258 }
259
Marat Dukhande06f492020-04-09 00:19:31 -0700260 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700261 TEST_REQUIRES_ARM_NEON;
262 for (uint32_t n = 16; n <= 24; n += 8) {
263 for (size_t k = 1; k <= 5; k += 2) {
264 GemmMicrokernelTester()
265 .mr(4)
266 .nr(8)
267 .kr(1)
268 .sr(1)
269 .m(4)
270 .n(n)
271 .k(k)
272 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700273 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700274 }
275 }
276 }
277
Marat Dukhande06f492020-04-09 00:19:31 -0700278 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700279 TEST_REQUIRES_ARM_NEON;
280 for (uint32_t n = 16; n <= 24; n += 8) {
281 for (size_t k = 1; k <= 5; k += 2) {
282 for (uint32_t m = 1; m <= 4; m++) {
283 GemmMicrokernelTester()
284 .mr(4)
285 .nr(8)
286 .kr(1)
287 .sr(1)
288 .m(m)
289 .n(n)
290 .k(k)
291 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700292 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700293 }
294 }
295 }
296 }
297
Marat Dukhande06f492020-04-09 00:19:31 -0700298 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700299 TEST_REQUIRES_ARM_NEON;
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t m = 1; m <= 4; m++) {
302 for (uint32_t n = 1; n <= 8; n++) {
303 GemmMicrokernelTester()
304 .mr(4)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700313 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700314 }
315 }
316 }
317 }
318
Marat Dukhande06f492020-04-09 00:19:31 -0700319 TEST(F32_PPMM_MINMAX_4X8__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700320 TEST_REQUIRES_ARM_NEON;
321 GemmMicrokernelTester()
322 .mr(4)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(4)
327 .n(8)
328 .k(1)
329 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700330 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700331 }
332
Marat Dukhande06f492020-04-09 00:19:31 -0700333 TEST(F32_PPMM_MINMAX_4X8__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700334 TEST_REQUIRES_ARM_NEON;
335 GemmMicrokernelTester()
336 .mr(4)
337 .nr(8)
338 .kr(1)
339 .sr(1)
340 .m(4)
341 .n(8)
342 .k(1)
343 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700344 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700345 }
346
Marat Dukhande06f492020-04-09 00:19:31 -0700347 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700348 TEST_REQUIRES_ARM_NEON;
349 GemmMicrokernelTester()
350 .mr(4)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(4)
355 .n(8)
356 .k(1)
357 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700358 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700359 }
360#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
361
362
363#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700364 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700365 TEST_REQUIRES_ARM_NEON_FMA;
366 GemmMicrokernelTester()
367 .mr(4)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(4)
372 .n(8)
373 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700374 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700375 }
376
Marat Dukhande06f492020-04-09 00:19:31 -0700377 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700378 TEST_REQUIRES_ARM_NEON_FMA;
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(4)
385 .n(8)
386 .k(1)
387 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700388 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700389 }
390
Marat Dukhande06f492020-04-09 00:19:31 -0700391 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700392 TEST_REQUIRES_ARM_NEON_FMA;
393 GemmMicrokernelTester()
394 .mr(4)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(4)
399 .n(8)
400 .k(1)
401 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -0700402 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700403 }
404
Marat Dukhande06f492020-04-09 00:19:31 -0700405 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700406 TEST_REQUIRES_ARM_NEON_FMA;
407 for (uint32_t m = 1; m <= 4; m++) {
408 for (uint32_t n = 1; n <= 8; n++) {
409 GemmMicrokernelTester()
410 .mr(4)
411 .nr(8)
412 .kr(1)
413 .sr(1)
414 .m(m)
415 .n(n)
416 .k(1)
417 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700418 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700419 }
420 }
421 }
422
Marat Dukhande06f492020-04-09 00:19:31 -0700423 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700424 TEST_REQUIRES_ARM_NEON_FMA;
425 for (uint32_t m = 1; m <= 4; m++) {
426 GemmMicrokernelTester()
427 .mr(4)
428 .nr(8)
429 .kr(1)
430 .sr(1)
431 .m(m)
432 .n(8)
433 .k(1)
434 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700435 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700436 }
437 }
438
Marat Dukhande06f492020-04-09 00:19:31 -0700439 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700440 TEST_REQUIRES_ARM_NEON_FMA;
441 for (uint32_t n = 1; n <= 8; n++) {
442 GemmMicrokernelTester()
443 .mr(4)
444 .nr(8)
445 .kr(1)
446 .sr(1)
447 .m(4)
448 .n(n)
449 .k(1)
450 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700451 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700452 }
453 }
454
Marat Dukhande06f492020-04-09 00:19:31 -0700455 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700456 TEST_REQUIRES_ARM_NEON_FMA;
457 for (size_t k = 2; k < 10; k++) {
458 GemmMicrokernelTester()
459 .mr(4)
460 .nr(8)
461 .kr(1)
462 .sr(1)
463 .m(4)
464 .n(8)
465 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700466 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700467 }
468 }
469
Marat Dukhande06f492020-04-09 00:19:31 -0700470 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700471 TEST_REQUIRES_ARM_NEON_FMA;
472 for (size_t k = 2; k < 10; k++) {
473 for (uint32_t m = 1; m <= 4; m++) {
474 for (uint32_t n = 1; n <= 8; n++) {
475 GemmMicrokernelTester()
476 .mr(4)
477 .nr(8)
478 .kr(1)
479 .sr(1)
480 .m(m)
481 .n(n)
482 .k(k)
483 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700484 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700485 }
486 }
487 }
488 }
489
Marat Dukhande06f492020-04-09 00:19:31 -0700490 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700491 TEST_REQUIRES_ARM_NEON_FMA;
492 for (uint32_t n = 9; n < 16; n++) {
493 for (size_t k = 1; k <= 5; k += 2) {
494 GemmMicrokernelTester()
495 .mr(4)
496 .nr(8)
497 .kr(1)
498 .sr(1)
499 .m(4)
500 .n(8)
501 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700502 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700503 }
504 }
505 }
506
Marat Dukhande06f492020-04-09 00:19:31 -0700507 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700508 TEST_REQUIRES_ARM_NEON_FMA;
509 for (uint32_t n = 9; n < 16; n++) {
510 for (size_t k = 1; k <= 5; k += 2) {
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(k)
519 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700520 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700521 }
522 }
523 }
524
Marat Dukhande06f492020-04-09 00:19:31 -0700525 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700526 TEST_REQUIRES_ARM_NEON_FMA;
527 for (uint32_t n = 9; n < 16; n++) {
528 for (size_t k = 1; k <= 5; k += 2) {
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(4)
535 .n(n)
536 .k(k)
537 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700538 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700539 }
540 }
541 }
542
Marat Dukhande06f492020-04-09 00:19:31 -0700543 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700544 TEST_REQUIRES_ARM_NEON_FMA;
545 for (uint32_t n = 9; n < 16; n++) {
546 for (size_t k = 1; k <= 5; k += 2) {
547 for (uint32_t m = 1; m <= 4; m++) {
548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(8)
551 .kr(1)
552 .sr(1)
553 .m(m)
554 .n(n)
555 .k(k)
556 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700557 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700558 }
559 }
560 }
561 }
562
Marat Dukhande06f492020-04-09 00:19:31 -0700563 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700564 TEST_REQUIRES_ARM_NEON_FMA;
565 for (uint32_t n = 16; n <= 24; n += 8) {
566 for (size_t k = 1; k <= 5; k += 2) {
567 GemmMicrokernelTester()
568 .mr(4)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(4)
573 .n(8)
574 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700575 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700576 }
577 }
578 }
579
Marat Dukhande06f492020-04-09 00:19:31 -0700580 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700581 TEST_REQUIRES_ARM_NEON_FMA;
582 for (uint32_t n = 16; n <= 24; n += 8) {
583 for (size_t k = 1; k <= 5; k += 2) {
584 GemmMicrokernelTester()
585 .mr(4)
586 .nr(8)
587 .kr(1)
588 .sr(1)
589 .m(4)
590 .n(n)
591 .k(k)
592 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700593 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700594 }
595 }
596 }
597
Marat Dukhande06f492020-04-09 00:19:31 -0700598 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700599 TEST_REQUIRES_ARM_NEON_FMA;
600 for (uint32_t n = 16; n <= 24; n += 8) {
601 for (size_t k = 1; k <= 5; k += 2) {
602 GemmMicrokernelTester()
603 .mr(4)
604 .nr(8)
605 .kr(1)
606 .sr(1)
607 .m(4)
608 .n(n)
609 .k(k)
610 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700611 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700612 }
613 }
614 }
615
Marat Dukhande06f492020-04-09 00:19:31 -0700616 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700617 TEST_REQUIRES_ARM_NEON_FMA;
618 for (uint32_t n = 16; n <= 24; n += 8) {
619 for (size_t k = 1; k <= 5; k += 2) {
620 for (uint32_t m = 1; m <= 4; m++) {
621 GemmMicrokernelTester()
622 .mr(4)
623 .nr(8)
624 .kr(1)
625 .sr(1)
626 .m(m)
627 .n(n)
628 .k(k)
629 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700630 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700631 }
632 }
633 }
634 }
635
Marat Dukhande06f492020-04-09 00:19:31 -0700636 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700637 TEST_REQUIRES_ARM_NEON_FMA;
638 for (size_t k = 1; k <= 5; k += 2) {
639 for (uint32_t m = 1; m <= 4; m++) {
640 for (uint32_t n = 1; n <= 8; n++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(8)
644 .kr(1)
645 .sr(1)
646 .m(m)
647 .n(n)
648 .k(k)
649 .cm_stride(11)
650 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700651 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700652 }
653 }
654 }
655 }
656
Marat Dukhande06f492020-04-09 00:19:31 -0700657 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700658 TEST_REQUIRES_ARM_NEON_FMA;
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(4)
665 .n(8)
666 .k(1)
667 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700668 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700669 }
670
Marat Dukhande06f492020-04-09 00:19:31 -0700671 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700672 TEST_REQUIRES_ARM_NEON_FMA;
673 GemmMicrokernelTester()
674 .mr(4)
675 .nr(8)
676 .kr(1)
677 .sr(1)
678 .m(4)
679 .n(8)
680 .k(1)
681 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700682 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700683 }
684
Marat Dukhande06f492020-04-09 00:19:31 -0700685 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700686 TEST_REQUIRES_ARM_NEON_FMA;
687 GemmMicrokernelTester()
688 .mr(4)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(4)
693 .n(8)
694 .k(1)
695 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700696 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -0700697 }
698#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
699
700
701#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700702 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700703 TEST_REQUIRES_ARM_NEON;
704 GemmMicrokernelTester()
705 .mr(8)
706 .nr(8)
707 .kr(1)
708 .sr(1)
709 .m(8)
710 .n(8)
711 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700712 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700713 }
714
Marat Dukhande06f492020-04-09 00:19:31 -0700715 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700716 TEST_REQUIRES_ARM_NEON;
717 GemmMicrokernelTester()
718 .mr(8)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(8)
723 .n(8)
724 .k(1)
725 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700726 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700727 }
728
Marat Dukhande06f492020-04-09 00:19:31 -0700729 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700730 TEST_REQUIRES_ARM_NEON;
731 GemmMicrokernelTester()
732 .mr(8)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(8)
737 .n(8)
738 .k(1)
739 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -0700740 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700741 }
742
Marat Dukhande06f492020-04-09 00:19:31 -0700743 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700744 TEST_REQUIRES_ARM_NEON;
745 for (uint32_t m = 1; m <= 8; m++) {
746 for (uint32_t n = 1; n <= 8; n++) {
747 GemmMicrokernelTester()
748 .mr(8)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(m)
753 .n(n)
754 .k(1)
755 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700756 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700757 }
758 }
759 }
760
Marat Dukhande06f492020-04-09 00:19:31 -0700761 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700762 TEST_REQUIRES_ARM_NEON;
763 for (uint32_t m = 1; m <= 8; m++) {
764 GemmMicrokernelTester()
765 .mr(8)
766 .nr(8)
767 .kr(1)
768 .sr(1)
769 .m(m)
770 .n(8)
771 .k(1)
772 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700773 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700774 }
775 }
776
Marat Dukhande06f492020-04-09 00:19:31 -0700777 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700778 TEST_REQUIRES_ARM_NEON;
779 for (uint32_t n = 1; n <= 8; n++) {
780 GemmMicrokernelTester()
781 .mr(8)
782 .nr(8)
783 .kr(1)
784 .sr(1)
785 .m(8)
786 .n(n)
787 .k(1)
788 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700789 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700790 }
791 }
792
Marat Dukhande06f492020-04-09 00:19:31 -0700793 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700794 TEST_REQUIRES_ARM_NEON;
795 for (size_t k = 2; k < 10; k++) {
796 GemmMicrokernelTester()
797 .mr(8)
798 .nr(8)
799 .kr(1)
800 .sr(1)
801 .m(8)
802 .n(8)
803 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700804 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700805 }
806 }
807
Marat Dukhande06f492020-04-09 00:19:31 -0700808 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700809 TEST_REQUIRES_ARM_NEON;
810 for (size_t k = 2; k < 10; k++) {
811 for (uint32_t m = 1; m <= 8; m++) {
812 for (uint32_t n = 1; n <= 8; n++) {
813 GemmMicrokernelTester()
814 .mr(8)
815 .nr(8)
816 .kr(1)
817 .sr(1)
818 .m(m)
819 .n(n)
820 .k(k)
821 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700822 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700823 }
824 }
825 }
826 }
827
Marat Dukhande06f492020-04-09 00:19:31 -0700828 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700829 TEST_REQUIRES_ARM_NEON;
830 for (uint32_t n = 9; n < 16; n++) {
831 for (size_t k = 1; k <= 5; k += 2) {
832 GemmMicrokernelTester()
833 .mr(8)
834 .nr(8)
835 .kr(1)
836 .sr(1)
837 .m(8)
838 .n(8)
839 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700840 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700841 }
842 }
843 }
844
Marat Dukhande06f492020-04-09 00:19:31 -0700845 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700846 TEST_REQUIRES_ARM_NEON;
847 for (uint32_t n = 9; n < 16; n++) {
848 for (size_t k = 1; k <= 5; k += 2) {
849 GemmMicrokernelTester()
850 .mr(8)
851 .nr(8)
852 .kr(1)
853 .sr(1)
854 .m(8)
855 .n(8)
856 .k(k)
857 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700858 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700859 }
860 }
861 }
862
Marat Dukhande06f492020-04-09 00:19:31 -0700863 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700864 TEST_REQUIRES_ARM_NEON;
865 for (uint32_t n = 9; n < 16; n++) {
866 for (size_t k = 1; k <= 5; k += 2) {
867 GemmMicrokernelTester()
868 .mr(8)
869 .nr(8)
870 .kr(1)
871 .sr(1)
872 .m(8)
873 .n(n)
874 .k(k)
875 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700876 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700877 }
878 }
879 }
880
Marat Dukhande06f492020-04-09 00:19:31 -0700881 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700882 TEST_REQUIRES_ARM_NEON;
883 for (uint32_t n = 9; n < 16; n++) {
884 for (size_t k = 1; k <= 5; k += 2) {
885 for (uint32_t m = 1; m <= 8; m++) {
886 GemmMicrokernelTester()
887 .mr(8)
888 .nr(8)
889 .kr(1)
890 .sr(1)
891 .m(m)
892 .n(n)
893 .k(k)
894 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700895 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700896 }
897 }
898 }
899 }
900
Marat Dukhande06f492020-04-09 00:19:31 -0700901 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700902 TEST_REQUIRES_ARM_NEON;
903 for (uint32_t n = 16; n <= 24; n += 8) {
904 for (size_t k = 1; k <= 5; k += 2) {
905 GemmMicrokernelTester()
906 .mr(8)
907 .nr(8)
908 .kr(1)
909 .sr(1)
910 .m(8)
911 .n(8)
912 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700913 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700914 }
915 }
916 }
917
Marat Dukhande06f492020-04-09 00:19:31 -0700918 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700919 TEST_REQUIRES_ARM_NEON;
920 for (uint32_t n = 16; n <= 24; n += 8) {
921 for (size_t k = 1; k <= 5; k += 2) {
922 GemmMicrokernelTester()
923 .mr(8)
924 .nr(8)
925 .kr(1)
926 .sr(1)
927 .m(8)
928 .n(n)
929 .k(k)
930 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700931 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700932 }
933 }
934 }
935
Marat Dukhande06f492020-04-09 00:19:31 -0700936 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700937 TEST_REQUIRES_ARM_NEON;
938 for (uint32_t n = 16; n <= 24; n += 8) {
939 for (size_t k = 1; k <= 5; k += 2) {
940 GemmMicrokernelTester()
941 .mr(8)
942 .nr(8)
943 .kr(1)
944 .sr(1)
945 .m(8)
946 .n(n)
947 .k(k)
948 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700949 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700950 }
951 }
952 }
953
Marat Dukhande06f492020-04-09 00:19:31 -0700954 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700955 TEST_REQUIRES_ARM_NEON;
956 for (uint32_t n = 16; n <= 24; n += 8) {
957 for (size_t k = 1; k <= 5; k += 2) {
958 for (uint32_t m = 1; m <= 8; m++) {
959 GemmMicrokernelTester()
960 .mr(8)
961 .nr(8)
962 .kr(1)
963 .sr(1)
964 .m(m)
965 .n(n)
966 .k(k)
967 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700968 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700969 }
970 }
971 }
972 }
973
Marat Dukhande06f492020-04-09 00:19:31 -0700974 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700975 TEST_REQUIRES_ARM_NEON;
976 for (size_t k = 1; k <= 5; k += 2) {
977 for (uint32_t m = 1; m <= 8; m++) {
978 for (uint32_t n = 1; n <= 8; n++) {
979 GemmMicrokernelTester()
980 .mr(8)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(m)
985 .n(n)
986 .k(k)
987 .cm_stride(11)
988 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700989 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -0700990 }
991 }
992 }
993 }
994
Marat Dukhande06f492020-04-09 00:19:31 -0700995 TEST(F32_PPMM_MINMAX_8X8__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(8)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(8)
1003 .n(8)
1004 .k(1)
1005 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001006 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -07001007 }
1008
Marat Dukhande06f492020-04-09 00:19:31 -07001009 TEST(F32_PPMM_MINMAX_8X8__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001010 TEST_REQUIRES_ARM_NEON;
1011 GemmMicrokernelTester()
1012 .mr(8)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(8)
1017 .n(8)
1018 .k(1)
1019 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001020 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -07001021 }
1022
Marat Dukhande06f492020-04-09 00:19:31 -07001023 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001024 TEST_REQUIRES_ARM_NEON;
1025 GemmMicrokernelTester()
1026 .mr(8)
1027 .nr(8)
1028 .kr(1)
1029 .sr(1)
1030 .m(8)
1031 .n(8)
1032 .k(1)
1033 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001034 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
Marat Dukhan1c587112020-04-08 20:04:28 -07001035 }
1036#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1037
1038
1039#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07001040 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001041 TEST_REQUIRES_ARM_NEON_FMA;
1042 GemmMicrokernelTester()
1043 .mr(8)
1044 .nr(8)
1045 .kr(1)
1046 .sr(1)
1047 .m(8)
1048 .n(8)
1049 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001050 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001051 }
1052
Marat Dukhande06f492020-04-09 00:19:31 -07001053 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001054 TEST_REQUIRES_ARM_NEON_FMA;
1055 GemmMicrokernelTester()
1056 .mr(8)
1057 .nr(8)
1058 .kr(1)
1059 .sr(1)
1060 .m(8)
1061 .n(8)
1062 .k(1)
1063 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001064 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001065 }
1066
Marat Dukhande06f492020-04-09 00:19:31 -07001067 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001068 TEST_REQUIRES_ARM_NEON_FMA;
1069 GemmMicrokernelTester()
1070 .mr(8)
1071 .nr(8)
1072 .kr(1)
1073 .sr(1)
1074 .m(8)
1075 .n(8)
1076 .k(1)
1077 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07001078 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001079 }
1080
Marat Dukhande06f492020-04-09 00:19:31 -07001081 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001082 TEST_REQUIRES_ARM_NEON_FMA;
1083 for (uint32_t m = 1; m <= 8; m++) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 GemmMicrokernelTester()
1086 .mr(8)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(m)
1091 .n(n)
1092 .k(1)
1093 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001094 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001095 }
1096 }
1097 }
1098
Marat Dukhande06f492020-04-09 00:19:31 -07001099 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001100 TEST_REQUIRES_ARM_NEON_FMA;
1101 for (uint32_t m = 1; m <= 8; m++) {
1102 GemmMicrokernelTester()
1103 .mr(8)
1104 .nr(8)
1105 .kr(1)
1106 .sr(1)
1107 .m(m)
1108 .n(8)
1109 .k(1)
1110 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001111 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001112 }
1113 }
1114
Marat Dukhande06f492020-04-09 00:19:31 -07001115 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001116 TEST_REQUIRES_ARM_NEON_FMA;
1117 for (uint32_t n = 1; n <= 8; n++) {
1118 GemmMicrokernelTester()
1119 .mr(8)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(8)
1124 .n(n)
1125 .k(1)
1126 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001127 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001128 }
1129 }
1130
Marat Dukhande06f492020-04-09 00:19:31 -07001131 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001132 TEST_REQUIRES_ARM_NEON_FMA;
1133 for (size_t k = 2; k < 10; k++) {
1134 GemmMicrokernelTester()
1135 .mr(8)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(8)
1140 .n(8)
1141 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001142 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001143 }
1144 }
1145
Marat Dukhande06f492020-04-09 00:19:31 -07001146 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001147 TEST_REQUIRES_ARM_NEON_FMA;
1148 for (size_t k = 2; k < 10; k++) {
1149 for (uint32_t m = 1; m <= 8; m++) {
1150 for (uint32_t n = 1; n <= 8; n++) {
1151 GemmMicrokernelTester()
1152 .mr(8)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(k)
1159 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001160 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001161 }
1162 }
1163 }
1164 }
1165
Marat Dukhande06f492020-04-09 00:19:31 -07001166 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001167 TEST_REQUIRES_ARM_NEON_FMA;
1168 for (uint32_t n = 9; n < 16; n++) {
1169 for (size_t k = 1; k <= 5; k += 2) {
1170 GemmMicrokernelTester()
1171 .mr(8)
1172 .nr(8)
1173 .kr(1)
1174 .sr(1)
1175 .m(8)
1176 .n(8)
1177 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001178 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001179 }
1180 }
1181 }
1182
Marat Dukhande06f492020-04-09 00:19:31 -07001183 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001184 TEST_REQUIRES_ARM_NEON_FMA;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 5; k += 2) {
1187 GemmMicrokernelTester()
1188 .mr(8)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(8)
1193 .n(8)
1194 .k(k)
1195 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001196 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001197 }
1198 }
1199 }
1200
Marat Dukhande06f492020-04-09 00:19:31 -07001201 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001202 TEST_REQUIRES_ARM_NEON_FMA;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 5; k += 2) {
1205 GemmMicrokernelTester()
1206 .mr(8)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(8)
1211 .n(n)
1212 .k(k)
1213 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001214 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001215 }
1216 }
1217 }
1218
Marat Dukhande06f492020-04-09 00:19:31 -07001219 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001220 TEST_REQUIRES_ARM_NEON_FMA;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 5; k += 2) {
1223 for (uint32_t m = 1; m <= 8; m++) {
1224 GemmMicrokernelTester()
1225 .mr(8)
1226 .nr(8)
1227 .kr(1)
1228 .sr(1)
1229 .m(m)
1230 .n(n)
1231 .k(k)
1232 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001233 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001234 }
1235 }
1236 }
1237 }
1238
Marat Dukhande06f492020-04-09 00:19:31 -07001239 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001240 TEST_REQUIRES_ARM_NEON_FMA;
1241 for (uint32_t n = 16; n <= 24; n += 8) {
1242 for (size_t k = 1; k <= 5; k += 2) {
1243 GemmMicrokernelTester()
1244 .mr(8)
1245 .nr(8)
1246 .kr(1)
1247 .sr(1)
1248 .m(8)
1249 .n(8)
1250 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001251 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001252 }
1253 }
1254 }
1255
Marat Dukhande06f492020-04-09 00:19:31 -07001256 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001257 TEST_REQUIRES_ARM_NEON_FMA;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 5; k += 2) {
1260 GemmMicrokernelTester()
1261 .mr(8)
1262 .nr(8)
1263 .kr(1)
1264 .sr(1)
1265 .m(8)
1266 .n(n)
1267 .k(k)
1268 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001269 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001270 }
1271 }
1272 }
1273
Marat Dukhande06f492020-04-09 00:19:31 -07001274 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 5; k += 2) {
1278 GemmMicrokernelTester()
1279 .mr(8)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(8)
1284 .n(n)
1285 .k(k)
1286 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001287 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001288 }
1289 }
1290 }
1291
Marat Dukhande06f492020-04-09 00:19:31 -07001292 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001293 TEST_REQUIRES_ARM_NEON_FMA;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 5; k += 2) {
1296 for (uint32_t m = 1; m <= 8; m++) {
1297 GemmMicrokernelTester()
1298 .mr(8)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001306 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001307 }
1308 }
1309 }
1310 }
1311
Marat Dukhande06f492020-04-09 00:19:31 -07001312 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001313 TEST_REQUIRES_ARM_NEON_FMA;
1314 for (size_t k = 1; k <= 5; k += 2) {
1315 for (uint32_t m = 1; m <= 8; m++) {
1316 for (uint32_t n = 1; n <= 8; n++) {
1317 GemmMicrokernelTester()
1318 .mr(8)
1319 .nr(8)
1320 .kr(1)
1321 .sr(1)
1322 .m(m)
1323 .n(n)
1324 .k(k)
1325 .cm_stride(11)
1326 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001327 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001328 }
1329 }
1330 }
1331 }
1332
Marat Dukhande06f492020-04-09 00:19:31 -07001333 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001334 TEST_REQUIRES_ARM_NEON_FMA;
1335 GemmMicrokernelTester()
1336 .mr(8)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(8)
1341 .n(8)
1342 .k(1)
1343 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001344 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001345 }
1346
Marat Dukhande06f492020-04-09 00:19:31 -07001347 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001348 TEST_REQUIRES_ARM_NEON_FMA;
1349 GemmMicrokernelTester()
1350 .mr(8)
1351 .nr(8)
1352 .kr(1)
1353 .sr(1)
1354 .m(8)
1355 .n(8)
1356 .k(1)
1357 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001358 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001359 }
1360
Marat Dukhande06f492020-04-09 00:19:31 -07001361 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001362 TEST_REQUIRES_ARM_NEON_FMA;
1363 GemmMicrokernelTester()
1364 .mr(8)
1365 .nr(8)
1366 .kr(1)
1367 .sr(1)
1368 .m(8)
1369 .n(8)
1370 .k(1)
1371 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001372 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
Marat Dukhan1c587112020-04-08 20:04:28 -07001373 }
1374#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1375
1376
1377#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -07001378 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001379 TEST_REQUIRES_X86_SSE;
1380 GemmMicrokernelTester()
1381 .mr(4)
1382 .nr(8)
1383 .kr(1)
1384 .sr(1)
1385 .m(4)
1386 .n(8)
1387 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001388 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001389 }
1390
Marat Dukhande06f492020-04-09 00:19:31 -07001391 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001392 TEST_REQUIRES_X86_SSE;
1393 GemmMicrokernelTester()
1394 .mr(4)
1395 .nr(8)
1396 .kr(1)
1397 .sr(1)
1398 .m(4)
1399 .n(8)
1400 .k(1)
1401 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001402 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001403 }
1404
Marat Dukhande06f492020-04-09 00:19:31 -07001405 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001406 TEST_REQUIRES_X86_SSE;
1407 GemmMicrokernelTester()
1408 .mr(4)
1409 .nr(8)
1410 .kr(1)
1411 .sr(1)
1412 .m(4)
1413 .n(8)
1414 .k(1)
1415 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07001416 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001417 }
1418
Marat Dukhande06f492020-04-09 00:19:31 -07001419 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001420 TEST_REQUIRES_X86_SSE;
1421 for (uint32_t m = 1; m <= 4; m++) {
1422 for (uint32_t n = 1; n <= 8; n++) {
1423 GemmMicrokernelTester()
1424 .mr(4)
1425 .nr(8)
1426 .kr(1)
1427 .sr(1)
1428 .m(m)
1429 .n(n)
1430 .k(1)
1431 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001432 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001433 }
1434 }
1435 }
1436
Marat Dukhande06f492020-04-09 00:19:31 -07001437 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001438 TEST_REQUIRES_X86_SSE;
1439 for (uint32_t m = 1; m <= 4; m++) {
1440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(8)
1443 .kr(1)
1444 .sr(1)
1445 .m(m)
1446 .n(8)
1447 .k(1)
1448 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001449 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001450 }
1451 }
1452
Marat Dukhande06f492020-04-09 00:19:31 -07001453 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001454 TEST_REQUIRES_X86_SSE;
1455 for (uint32_t n = 1; n <= 8; n++) {
1456 GemmMicrokernelTester()
1457 .mr(4)
1458 .nr(8)
1459 .kr(1)
1460 .sr(1)
1461 .m(4)
1462 .n(n)
1463 .k(1)
1464 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001465 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001466 }
1467 }
1468
Marat Dukhande06f492020-04-09 00:19:31 -07001469 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001470 TEST_REQUIRES_X86_SSE;
1471 for (size_t k = 2; k < 10; k++) {
1472 GemmMicrokernelTester()
1473 .mr(4)
1474 .nr(8)
1475 .kr(1)
1476 .sr(1)
1477 .m(4)
1478 .n(8)
1479 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001480 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001481 }
1482 }
1483
Marat Dukhande06f492020-04-09 00:19:31 -07001484 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001485 TEST_REQUIRES_X86_SSE;
1486 for (size_t k = 2; k < 10; k++) {
1487 for (uint32_t m = 1; m <= 4; m++) {
1488 for (uint32_t n = 1; n <= 8; n++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(1)
1493 .sr(1)
1494 .m(m)
1495 .n(n)
1496 .k(k)
1497 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001498 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001499 }
1500 }
1501 }
1502 }
1503
Marat Dukhande06f492020-04-09 00:19:31 -07001504 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001505 TEST_REQUIRES_X86_SSE;
1506 for (uint32_t n = 9; n < 16; n++) {
1507 for (size_t k = 1; k <= 5; k += 2) {
1508 GemmMicrokernelTester()
1509 .mr(4)
1510 .nr(8)
1511 .kr(1)
1512 .sr(1)
1513 .m(4)
1514 .n(8)
1515 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001516 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001517 }
1518 }
1519 }
1520
Marat Dukhande06f492020-04-09 00:19:31 -07001521 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001522 TEST_REQUIRES_X86_SSE;
1523 for (uint32_t n = 9; n < 16; n++) {
1524 for (size_t k = 1; k <= 5; k += 2) {
1525 GemmMicrokernelTester()
1526 .mr(4)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(4)
1531 .n(8)
1532 .k(k)
1533 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001534 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001535 }
1536 }
1537 }
1538
Marat Dukhande06f492020-04-09 00:19:31 -07001539 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001540 TEST_REQUIRES_X86_SSE;
1541 for (uint32_t n = 9; n < 16; n++) {
1542 for (size_t k = 1; k <= 5; k += 2) {
1543 GemmMicrokernelTester()
1544 .mr(4)
1545 .nr(8)
1546 .kr(1)
1547 .sr(1)
1548 .m(4)
1549 .n(n)
1550 .k(k)
1551 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001552 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001553 }
1554 }
1555 }
1556
Marat Dukhande06f492020-04-09 00:19:31 -07001557 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001558 TEST_REQUIRES_X86_SSE;
1559 for (uint32_t n = 9; n < 16; n++) {
1560 for (size_t k = 1; k <= 5; k += 2) {
1561 for (uint32_t m = 1; m <= 4; m++) {
1562 GemmMicrokernelTester()
1563 .mr(4)
1564 .nr(8)
1565 .kr(1)
1566 .sr(1)
1567 .m(m)
1568 .n(n)
1569 .k(k)
1570 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001571 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001572 }
1573 }
1574 }
1575 }
1576
Marat Dukhande06f492020-04-09 00:19:31 -07001577 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001578 TEST_REQUIRES_X86_SSE;
1579 for (uint32_t n = 16; n <= 24; n += 8) {
1580 for (size_t k = 1; k <= 5; k += 2) {
1581 GemmMicrokernelTester()
1582 .mr(4)
1583 .nr(8)
1584 .kr(1)
1585 .sr(1)
1586 .m(4)
1587 .n(8)
1588 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001589 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001590 }
1591 }
1592 }
1593
Marat Dukhande06f492020-04-09 00:19:31 -07001594 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001595 TEST_REQUIRES_X86_SSE;
1596 for (uint32_t n = 16; n <= 24; n += 8) {
1597 for (size_t k = 1; k <= 5; k += 2) {
1598 GemmMicrokernelTester()
1599 .mr(4)
1600 .nr(8)
1601 .kr(1)
1602 .sr(1)
1603 .m(4)
1604 .n(n)
1605 .k(k)
1606 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001607 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001608 }
1609 }
1610 }
1611
Marat Dukhande06f492020-04-09 00:19:31 -07001612 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001613 TEST_REQUIRES_X86_SSE;
1614 for (uint32_t n = 16; n <= 24; n += 8) {
1615 for (size_t k = 1; k <= 5; k += 2) {
1616 GemmMicrokernelTester()
1617 .mr(4)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(4)
1622 .n(n)
1623 .k(k)
1624 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001625 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001626 }
1627 }
1628 }
1629
Marat Dukhande06f492020-04-09 00:19:31 -07001630 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001631 TEST_REQUIRES_X86_SSE;
1632 for (uint32_t n = 16; n <= 24; n += 8) {
1633 for (size_t k = 1; k <= 5; k += 2) {
1634 for (uint32_t m = 1; m <= 4; m++) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(m)
1641 .n(n)
1642 .k(k)
1643 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001644 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001645 }
1646 }
1647 }
1648 }
1649
Marat Dukhande06f492020-04-09 00:19:31 -07001650 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001651 TEST_REQUIRES_X86_SSE;
1652 for (size_t k = 1; k <= 5; k += 2) {
1653 for (uint32_t m = 1; m <= 4; m++) {
1654 for (uint32_t n = 1; n <= 8; n++) {
1655 GemmMicrokernelTester()
1656 .mr(4)
1657 .nr(8)
1658 .kr(1)
1659 .sr(1)
1660 .m(m)
1661 .n(n)
1662 .k(k)
1663 .cm_stride(11)
1664 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001665 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001666 }
1667 }
1668 }
1669 }
1670
Marat Dukhande06f492020-04-09 00:19:31 -07001671 TEST(F32_PPMM_MINMAX_4X8__SSE, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001672 TEST_REQUIRES_X86_SSE;
1673 GemmMicrokernelTester()
1674 .mr(4)
1675 .nr(8)
1676 .kr(1)
1677 .sr(1)
1678 .m(4)
1679 .n(8)
1680 .k(1)
1681 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001682 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001683 }
1684
Marat Dukhande06f492020-04-09 00:19:31 -07001685 TEST(F32_PPMM_MINMAX_4X8__SSE, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001686 TEST_REQUIRES_X86_SSE;
1687 GemmMicrokernelTester()
1688 .mr(4)
1689 .nr(8)
1690 .kr(1)
1691 .sr(1)
1692 .m(4)
1693 .n(8)
1694 .k(1)
1695 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001696 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001697 }
1698
Marat Dukhande06f492020-04-09 00:19:31 -07001699 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001700 TEST_REQUIRES_X86_SSE;
1701 GemmMicrokernelTester()
1702 .mr(4)
1703 .nr(8)
1704 .kr(1)
1705 .sr(1)
1706 .m(4)
1707 .n(8)
1708 .k(1)
1709 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001710 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
Marat Dukhan1c587112020-04-08 20:04:28 -07001711 }
1712#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1713
1714
Marat Dukhan29c6b262020-04-14 18:07:56 -07001715#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhande06f492020-04-09 00:19:31 -07001716 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001717 TEST_REQUIRES_PSIMD;
1718 GemmMicrokernelTester()
1719 .mr(4)
1720 .nr(8)
1721 .kr(1)
1722 .sr(1)
1723 .m(4)
1724 .n(8)
1725 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001726 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001727 }
1728
Marat Dukhande06f492020-04-09 00:19:31 -07001729 TEST(F32_PPMM_MINMAX_4X8__PSIMD, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001730 TEST_REQUIRES_PSIMD;
1731 GemmMicrokernelTester()
1732 .mr(4)
1733 .nr(8)
1734 .kr(1)
1735 .sr(1)
1736 .m(4)
1737 .n(8)
1738 .k(1)
1739 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001740 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001741 }
1742
Marat Dukhande06f492020-04-09 00:19:31 -07001743 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001744 TEST_REQUIRES_PSIMD;
1745 GemmMicrokernelTester()
1746 .mr(4)
1747 .nr(8)
1748 .kr(1)
1749 .sr(1)
1750 .m(4)
1751 .n(8)
1752 .k(1)
1753 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07001754 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001755 }
1756
Marat Dukhande06f492020-04-09 00:19:31 -07001757 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001758 TEST_REQUIRES_PSIMD;
1759 for (uint32_t m = 1; m <= 4; m++) {
1760 for (uint32_t n = 1; n <= 8; n++) {
1761 GemmMicrokernelTester()
1762 .mr(4)
1763 .nr(8)
1764 .kr(1)
1765 .sr(1)
1766 .m(m)
1767 .n(n)
1768 .k(1)
1769 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001770 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001771 }
1772 }
1773 }
1774
Marat Dukhande06f492020-04-09 00:19:31 -07001775 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001776 TEST_REQUIRES_PSIMD;
1777 for (uint32_t m = 1; m <= 4; m++) {
1778 GemmMicrokernelTester()
1779 .mr(4)
1780 .nr(8)
1781 .kr(1)
1782 .sr(1)
1783 .m(m)
1784 .n(8)
1785 .k(1)
1786 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001787 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001788 }
1789 }
1790
Marat Dukhande06f492020-04-09 00:19:31 -07001791 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001792 TEST_REQUIRES_PSIMD;
1793 for (uint32_t n = 1; n <= 8; n++) {
1794 GemmMicrokernelTester()
1795 .mr(4)
1796 .nr(8)
1797 .kr(1)
1798 .sr(1)
1799 .m(4)
1800 .n(n)
1801 .k(1)
1802 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001803 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001804 }
1805 }
1806
Marat Dukhande06f492020-04-09 00:19:31 -07001807 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001808 TEST_REQUIRES_PSIMD;
1809 for (size_t k = 2; k < 10; k++) {
1810 GemmMicrokernelTester()
1811 .mr(4)
1812 .nr(8)
1813 .kr(1)
1814 .sr(1)
1815 .m(4)
1816 .n(8)
1817 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001818 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001819 }
1820 }
1821
Marat Dukhande06f492020-04-09 00:19:31 -07001822 TEST(F32_PPMM_MINMAX_4X8__PSIMD, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001823 TEST_REQUIRES_PSIMD;
1824 for (size_t k = 2; k < 10; k++) {
1825 for (uint32_t m = 1; m <= 4; m++) {
1826 for (uint32_t n = 1; n <= 8; n++) {
1827 GemmMicrokernelTester()
1828 .mr(4)
1829 .nr(8)
1830 .kr(1)
1831 .sr(1)
1832 .m(m)
1833 .n(n)
1834 .k(k)
1835 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001836 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001837 }
1838 }
1839 }
1840 }
1841
Marat Dukhande06f492020-04-09 00:19:31 -07001842 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001843 TEST_REQUIRES_PSIMD;
1844 for (uint32_t n = 9; n < 16; n++) {
1845 for (size_t k = 1; k <= 5; k += 2) {
1846 GemmMicrokernelTester()
1847 .mr(4)
1848 .nr(8)
1849 .kr(1)
1850 .sr(1)
1851 .m(4)
1852 .n(8)
1853 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001854 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001855 }
1856 }
1857 }
1858
Marat Dukhande06f492020-04-09 00:19:31 -07001859 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001860 TEST_REQUIRES_PSIMD;
1861 for (uint32_t n = 9; n < 16; n++) {
1862 for (size_t k = 1; k <= 5; k += 2) {
1863 GemmMicrokernelTester()
1864 .mr(4)
1865 .nr(8)
1866 .kr(1)
1867 .sr(1)
1868 .m(4)
1869 .n(8)
1870 .k(k)
1871 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001872 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001873 }
1874 }
1875 }
1876
Marat Dukhande06f492020-04-09 00:19:31 -07001877 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_gt_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001878 TEST_REQUIRES_PSIMD;
1879 for (uint32_t n = 9; n < 16; n++) {
1880 for (size_t k = 1; k <= 5; k += 2) {
1881 GemmMicrokernelTester()
1882 .mr(4)
1883 .nr(8)
1884 .kr(1)
1885 .sr(1)
1886 .m(4)
1887 .n(n)
1888 .k(k)
1889 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001890 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001891 }
1892 }
1893 }
1894
Marat Dukhande06f492020-04-09 00:19:31 -07001895 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001896 TEST_REQUIRES_PSIMD;
1897 for (uint32_t n = 9; n < 16; n++) {
1898 for (size_t k = 1; k <= 5; k += 2) {
1899 for (uint32_t m = 1; m <= 4; m++) {
1900 GemmMicrokernelTester()
1901 .mr(4)
1902 .nr(8)
1903 .kr(1)
1904 .sr(1)
1905 .m(m)
1906 .n(n)
1907 .k(k)
1908 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001909 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001910 }
1911 }
1912 }
1913 }
1914
Marat Dukhande06f492020-04-09 00:19:31 -07001915 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001916 TEST_REQUIRES_PSIMD;
1917 for (uint32_t n = 16; n <= 24; n += 8) {
1918 for (size_t k = 1; k <= 5; k += 2) {
1919 GemmMicrokernelTester()
1920 .mr(4)
1921 .nr(8)
1922 .kr(1)
1923 .sr(1)
1924 .m(4)
1925 .n(8)
1926 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001927 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001928 }
1929 }
1930 }
1931
Marat Dukhande06f492020-04-09 00:19:31 -07001932 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001933 TEST_REQUIRES_PSIMD;
1934 for (uint32_t n = 16; n <= 24; n += 8) {
1935 for (size_t k = 1; k <= 5; k += 2) {
1936 GemmMicrokernelTester()
1937 .mr(4)
1938 .nr(8)
1939 .kr(1)
1940 .sr(1)
1941 .m(4)
1942 .n(n)
1943 .k(k)
1944 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001945 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001946 }
1947 }
1948 }
1949
Marat Dukhande06f492020-04-09 00:19:31 -07001950 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_div_8_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001951 TEST_REQUIRES_PSIMD;
1952 for (uint32_t n = 16; n <= 24; n += 8) {
1953 for (size_t k = 1; k <= 5; k += 2) {
1954 GemmMicrokernelTester()
1955 .mr(4)
1956 .nr(8)
1957 .kr(1)
1958 .sr(1)
1959 .m(4)
1960 .n(n)
1961 .k(k)
1962 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001963 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001964 }
1965 }
1966 }
1967
Marat Dukhande06f492020-04-09 00:19:31 -07001968 TEST(F32_PPMM_MINMAX_4X8__PSIMD, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001969 TEST_REQUIRES_PSIMD;
1970 for (uint32_t n = 16; n <= 24; n += 8) {
1971 for (size_t k = 1; k <= 5; k += 2) {
1972 for (uint32_t m = 1; m <= 4; m++) {
1973 GemmMicrokernelTester()
1974 .mr(4)
1975 .nr(8)
1976 .kr(1)
1977 .sr(1)
1978 .m(m)
1979 .n(n)
1980 .k(k)
1981 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001982 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07001983 }
1984 }
1985 }
1986 }
1987
Marat Dukhande06f492020-04-09 00:19:31 -07001988 TEST(F32_PPMM_MINMAX_4X8__PSIMD, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001989 TEST_REQUIRES_PSIMD;
1990 for (size_t k = 1; k <= 5; k += 2) {
1991 for (uint32_t m = 1; m <= 4; m++) {
1992 for (uint32_t n = 1; n <= 8; n++) {
1993 GemmMicrokernelTester()
1994 .mr(4)
1995 .nr(8)
1996 .kr(1)
1997 .sr(1)
1998 .m(m)
1999 .n(n)
2000 .k(k)
2001 .cm_stride(11)
2002 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002003 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002004 }
2005 }
2006 }
2007 }
2008
Marat Dukhande06f492020-04-09 00:19:31 -07002009 TEST(F32_PPMM_MINMAX_4X8__PSIMD, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002010 TEST_REQUIRES_PSIMD;
2011 GemmMicrokernelTester()
2012 .mr(4)
2013 .nr(8)
2014 .kr(1)
2015 .sr(1)
2016 .m(4)
2017 .n(8)
2018 .k(1)
2019 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002020 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002021 }
2022
Marat Dukhande06f492020-04-09 00:19:31 -07002023 TEST(F32_PPMM_MINMAX_4X8__PSIMD, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002024 TEST_REQUIRES_PSIMD;
2025 GemmMicrokernelTester()
2026 .mr(4)
2027 .nr(8)
2028 .kr(1)
2029 .sr(1)
2030 .m(4)
2031 .n(8)
2032 .k(1)
2033 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002034 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002035 }
2036
Marat Dukhande06f492020-04-09 00:19:31 -07002037 TEST(F32_PPMM_MINMAX_4X8__PSIMD, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002038 TEST_REQUIRES_PSIMD;
2039 GemmMicrokernelTester()
2040 .mr(4)
2041 .nr(8)
2042 .kr(1)
2043 .sr(1)
2044 .m(4)
2045 .n(8)
2046 .k(1)
2047 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07002048 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002049 }
Marat Dukhan29c6b262020-04-14 18:07:56 -07002050#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhan1c587112020-04-08 20:04:28 -07002051
2052
Marat Dukhande06f492020-04-09 00:19:31 -07002053TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002054 GemmMicrokernelTester()
2055 .mr(4)
2056 .nr(2)
2057 .kr(1)
2058 .sr(1)
2059 .m(4)
2060 .n(2)
2061 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002062 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002063}
2064
Marat Dukhande06f492020-04-09 00:19:31 -07002065TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002066 GemmMicrokernelTester()
2067 .mr(4)
2068 .nr(2)
2069 .kr(1)
2070 .sr(1)
2071 .m(4)
2072 .n(2)
2073 .k(1)
2074 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002075 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002076}
2077
Marat Dukhande06f492020-04-09 00:19:31 -07002078TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002079 GemmMicrokernelTester()
2080 .mr(4)
2081 .nr(2)
2082 .kr(1)
2083 .sr(1)
2084 .m(4)
2085 .n(2)
2086 .k(1)
2087 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07002088 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002089}
2090
Marat Dukhande06f492020-04-09 00:19:31 -07002091TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002092 for (uint32_t m = 1; m <= 4; m++) {
2093 for (uint32_t n = 1; n <= 2; n++) {
2094 GemmMicrokernelTester()
2095 .mr(4)
2096 .nr(2)
2097 .kr(1)
2098 .sr(1)
2099 .m(m)
2100 .n(n)
2101 .k(1)
2102 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002103 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002104 }
2105 }
2106}
2107
Marat Dukhande06f492020-04-09 00:19:31 -07002108TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002109 for (uint32_t m = 1; m <= 4; m++) {
2110 GemmMicrokernelTester()
2111 .mr(4)
2112 .nr(2)
2113 .kr(1)
2114 .sr(1)
2115 .m(m)
2116 .n(2)
2117 .k(1)
2118 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002119 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002120 }
2121}
2122
Marat Dukhande06f492020-04-09 00:19:31 -07002123TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002124 for (uint32_t n = 1; n <= 2; n++) {
2125 GemmMicrokernelTester()
2126 .mr(4)
2127 .nr(2)
2128 .kr(1)
2129 .sr(1)
2130 .m(4)
2131 .n(n)
2132 .k(1)
2133 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002134 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002135 }
2136}
2137
Marat Dukhande06f492020-04-09 00:19:31 -07002138TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002139 for (size_t k = 2; k < 10; k++) {
2140 GemmMicrokernelTester()
2141 .mr(4)
2142 .nr(2)
2143 .kr(1)
2144 .sr(1)
2145 .m(4)
2146 .n(2)
2147 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002148 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002149 }
2150}
2151
Marat Dukhande06f492020-04-09 00:19:31 -07002152TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002153 for (size_t k = 2; k < 10; k++) {
2154 for (uint32_t m = 1; m <= 4; m++) {
2155 for (uint32_t n = 1; n <= 2; n++) {
2156 GemmMicrokernelTester()
2157 .mr(4)
2158 .nr(2)
2159 .kr(1)
2160 .sr(1)
2161 .m(m)
2162 .n(n)
2163 .k(k)
2164 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002165 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002166 }
2167 }
2168 }
2169}
2170
Marat Dukhande06f492020-04-09 00:19:31 -07002171TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002172 for (uint32_t n = 3; n < 4; n++) {
2173 for (size_t k = 1; k <= 5; k += 2) {
2174 GemmMicrokernelTester()
2175 .mr(4)
2176 .nr(2)
2177 .kr(1)
2178 .sr(1)
2179 .m(4)
2180 .n(2)
2181 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002182 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002183 }
2184 }
2185}
2186
Marat Dukhande06f492020-04-09 00:19:31 -07002187TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002188 for (uint32_t n = 3; n < 4; n++) {
2189 for (size_t k = 1; k <= 5; k += 2) {
2190 GemmMicrokernelTester()
2191 .mr(4)
2192 .nr(2)
2193 .kr(1)
2194 .sr(1)
2195 .m(4)
2196 .n(2)
2197 .k(k)
2198 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002199 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002200 }
2201 }
2202}
2203
Marat Dukhande06f492020-04-09 00:19:31 -07002204TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002205 for (uint32_t n = 3; n < 4; n++) {
2206 for (size_t k = 1; k <= 5; k += 2) {
2207 GemmMicrokernelTester()
2208 .mr(4)
2209 .nr(2)
2210 .kr(1)
2211 .sr(1)
2212 .m(4)
2213 .n(n)
2214 .k(k)
2215 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002216 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002217 }
2218 }
2219}
2220
Marat Dukhande06f492020-04-09 00:19:31 -07002221TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002222 for (uint32_t n = 3; n < 4; n++) {
2223 for (size_t k = 1; k <= 5; k += 2) {
2224 for (uint32_t m = 1; m <= 4; m++) {
2225 GemmMicrokernelTester()
2226 .mr(4)
2227 .nr(2)
2228 .kr(1)
2229 .sr(1)
2230 .m(m)
2231 .n(n)
2232 .k(k)
2233 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002234 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002235 }
2236 }
2237 }
2238}
2239
Marat Dukhande06f492020-04-09 00:19:31 -07002240TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002241 for (uint32_t n = 4; n <= 6; n += 2) {
2242 for (size_t k = 1; k <= 5; k += 2) {
2243 GemmMicrokernelTester()
2244 .mr(4)
2245 .nr(2)
2246 .kr(1)
2247 .sr(1)
2248 .m(4)
2249 .n(2)
2250 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002251 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002252 }
2253 }
2254}
2255
Marat Dukhande06f492020-04-09 00:19:31 -07002256TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002257 for (uint32_t n = 4; n <= 6; n += 2) {
2258 for (size_t k = 1; k <= 5; k += 2) {
2259 GemmMicrokernelTester()
2260 .mr(4)
2261 .nr(2)
2262 .kr(1)
2263 .sr(1)
2264 .m(4)
2265 .n(n)
2266 .k(k)
2267 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002268 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002269 }
2270 }
2271}
2272
Marat Dukhande06f492020-04-09 00:19:31 -07002273TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002274 for (uint32_t n = 4; n <= 6; n += 2) {
2275 for (size_t k = 1; k <= 5; k += 2) {
2276 GemmMicrokernelTester()
2277 .mr(4)
2278 .nr(2)
2279 .kr(1)
2280 .sr(1)
2281 .m(4)
2282 .n(n)
2283 .k(k)
2284 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002285 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002286 }
2287 }
2288}
2289
Marat Dukhande06f492020-04-09 00:19:31 -07002290TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002291 for (uint32_t n = 4; n <= 6; n += 2) {
2292 for (size_t k = 1; k <= 5; k += 2) {
2293 for (uint32_t m = 1; m <= 4; m++) {
2294 GemmMicrokernelTester()
2295 .mr(4)
2296 .nr(2)
2297 .kr(1)
2298 .sr(1)
2299 .m(m)
2300 .n(n)
2301 .k(k)
2302 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002303 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002304 }
2305 }
2306 }
2307}
2308
Marat Dukhande06f492020-04-09 00:19:31 -07002309TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002310 for (size_t k = 1; k <= 5; k += 2) {
2311 for (uint32_t m = 1; m <= 4; m++) {
2312 for (uint32_t n = 1; n <= 2; n++) {
2313 GemmMicrokernelTester()
2314 .mr(4)
2315 .nr(2)
2316 .kr(1)
2317 .sr(1)
2318 .m(m)
2319 .n(n)
2320 .k(k)
2321 .cm_stride(5)
2322 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002323 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002324 }
2325 }
2326 }
2327}
2328
Marat Dukhande06f492020-04-09 00:19:31 -07002329TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002330 GemmMicrokernelTester()
2331 .mr(4)
2332 .nr(2)
2333 .kr(1)
2334 .sr(1)
2335 .m(4)
2336 .n(2)
2337 .k(1)
2338 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002339 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002340}
2341
Marat Dukhande06f492020-04-09 00:19:31 -07002342TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002343 GemmMicrokernelTester()
2344 .mr(4)
2345 .nr(2)
2346 .kr(1)
2347 .sr(1)
2348 .m(4)
2349 .n(2)
2350 .k(1)
2351 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002352 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002353}
2354
Marat Dukhande06f492020-04-09 00:19:31 -07002355TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002356 GemmMicrokernelTester()
2357 .mr(4)
2358 .nr(2)
2359 .kr(1)
2360 .sr(1)
2361 .m(4)
2362 .n(2)
2363 .k(1)
2364 .cm_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07002365 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002366}
2367
2368
Marat Dukhande06f492020-04-09 00:19:31 -07002369TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002370 GemmMicrokernelTester()
2371 .mr(2)
2372 .nr(4)
2373 .kr(1)
2374 .sr(1)
2375 .m(2)
2376 .n(4)
2377 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002378 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002379}
2380
Marat Dukhande06f492020-04-09 00:19:31 -07002381TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002382 GemmMicrokernelTester()
2383 .mr(2)
2384 .nr(4)
2385 .kr(1)
2386 .sr(1)
2387 .m(2)
2388 .n(4)
2389 .k(1)
2390 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002391 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002392}
2393
Marat Dukhande06f492020-04-09 00:19:31 -07002394TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002395 GemmMicrokernelTester()
2396 .mr(2)
2397 .nr(4)
2398 .kr(1)
2399 .sr(1)
2400 .m(2)
2401 .n(4)
2402 .k(1)
2403 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07002404 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002405}
2406
Marat Dukhande06f492020-04-09 00:19:31 -07002407TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002408 for (uint32_t m = 1; m <= 2; m++) {
2409 for (uint32_t n = 1; n <= 4; n++) {
2410 GemmMicrokernelTester()
2411 .mr(2)
2412 .nr(4)
2413 .kr(1)
2414 .sr(1)
2415 .m(m)
2416 .n(n)
2417 .k(1)
2418 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002419 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002420 }
2421 }
2422}
2423
Marat Dukhande06f492020-04-09 00:19:31 -07002424TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002425 for (uint32_t m = 1; m <= 2; m++) {
2426 GemmMicrokernelTester()
2427 .mr(2)
2428 .nr(4)
2429 .kr(1)
2430 .sr(1)
2431 .m(m)
2432 .n(4)
2433 .k(1)
2434 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002435 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002436 }
2437}
2438
Marat Dukhande06f492020-04-09 00:19:31 -07002439TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002440 for (uint32_t n = 1; n <= 4; n++) {
2441 GemmMicrokernelTester()
2442 .mr(2)
2443 .nr(4)
2444 .kr(1)
2445 .sr(1)
2446 .m(2)
2447 .n(n)
2448 .k(1)
2449 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002450 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002451 }
2452}
2453
Marat Dukhande06f492020-04-09 00:19:31 -07002454TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002455 for (size_t k = 2; k < 10; k++) {
2456 GemmMicrokernelTester()
2457 .mr(2)
2458 .nr(4)
2459 .kr(1)
2460 .sr(1)
2461 .m(2)
2462 .n(4)
2463 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002464 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002465 }
2466}
2467
Marat Dukhande06f492020-04-09 00:19:31 -07002468TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002469 for (size_t k = 2; k < 10; k++) {
2470 for (uint32_t m = 1; m <= 2; m++) {
2471 for (uint32_t n = 1; n <= 4; n++) {
2472 GemmMicrokernelTester()
2473 .mr(2)
2474 .nr(4)
2475 .kr(1)
2476 .sr(1)
2477 .m(m)
2478 .n(n)
2479 .k(k)
2480 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002481 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002482 }
2483 }
2484 }
2485}
2486
Marat Dukhande06f492020-04-09 00:19:31 -07002487TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002488 for (uint32_t n = 5; n < 8; n++) {
2489 for (size_t k = 1; k <= 5; k += 2) {
2490 GemmMicrokernelTester()
2491 .mr(2)
2492 .nr(4)
2493 .kr(1)
2494 .sr(1)
2495 .m(2)
2496 .n(4)
2497 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002498 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002499 }
2500 }
2501}
2502
Marat Dukhande06f492020-04-09 00:19:31 -07002503TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002504 for (uint32_t n = 5; n < 8; n++) {
2505 for (size_t k = 1; k <= 5; k += 2) {
2506 GemmMicrokernelTester()
2507 .mr(2)
2508 .nr(4)
2509 .kr(1)
2510 .sr(1)
2511 .m(2)
2512 .n(4)
2513 .k(k)
2514 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002515 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002516 }
2517 }
2518}
2519
Marat Dukhande06f492020-04-09 00:19:31 -07002520TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002521 for (uint32_t n = 5; n < 8; n++) {
2522 for (size_t k = 1; k <= 5; k += 2) {
2523 GemmMicrokernelTester()
2524 .mr(2)
2525 .nr(4)
2526 .kr(1)
2527 .sr(1)
2528 .m(2)
2529 .n(n)
2530 .k(k)
2531 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002532 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002533 }
2534 }
2535}
2536
Marat Dukhande06f492020-04-09 00:19:31 -07002537TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002538 for (uint32_t n = 5; n < 8; n++) {
2539 for (size_t k = 1; k <= 5; k += 2) {
2540 for (uint32_t m = 1; m <= 2; m++) {
2541 GemmMicrokernelTester()
2542 .mr(2)
2543 .nr(4)
2544 .kr(1)
2545 .sr(1)
2546 .m(m)
2547 .n(n)
2548 .k(k)
2549 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002550 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002551 }
2552 }
2553 }
2554}
2555
Marat Dukhande06f492020-04-09 00:19:31 -07002556TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002557 for (uint32_t n = 8; n <= 12; n += 4) {
2558 for (size_t k = 1; k <= 5; k += 2) {
2559 GemmMicrokernelTester()
2560 .mr(2)
2561 .nr(4)
2562 .kr(1)
2563 .sr(1)
2564 .m(2)
2565 .n(4)
2566 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002567 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002568 }
2569 }
2570}
2571
Marat Dukhande06f492020-04-09 00:19:31 -07002572TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002573 for (uint32_t n = 8; n <= 12; n += 4) {
2574 for (size_t k = 1; k <= 5; k += 2) {
2575 GemmMicrokernelTester()
2576 .mr(2)
2577 .nr(4)
2578 .kr(1)
2579 .sr(1)
2580 .m(2)
2581 .n(n)
2582 .k(k)
2583 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002584 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002585 }
2586 }
2587}
2588
Marat Dukhande06f492020-04-09 00:19:31 -07002589TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002590 for (uint32_t n = 8; n <= 12; n += 4) {
2591 for (size_t k = 1; k <= 5; k += 2) {
2592 GemmMicrokernelTester()
2593 .mr(2)
2594 .nr(4)
2595 .kr(1)
2596 .sr(1)
2597 .m(2)
2598 .n(n)
2599 .k(k)
2600 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002601 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002602 }
2603 }
2604}
2605
Marat Dukhande06f492020-04-09 00:19:31 -07002606TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002607 for (uint32_t n = 8; n <= 12; n += 4) {
2608 for (size_t k = 1; k <= 5; k += 2) {
2609 for (uint32_t m = 1; m <= 2; m++) {
2610 GemmMicrokernelTester()
2611 .mr(2)
2612 .nr(4)
2613 .kr(1)
2614 .sr(1)
2615 .m(m)
2616 .n(n)
2617 .k(k)
2618 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002619 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002620 }
2621 }
2622 }
2623}
2624
Marat Dukhande06f492020-04-09 00:19:31 -07002625TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002626 for (size_t k = 1; k <= 5; k += 2) {
2627 for (uint32_t m = 1; m <= 2; m++) {
2628 for (uint32_t n = 1; n <= 4; n++) {
2629 GemmMicrokernelTester()
2630 .mr(2)
2631 .nr(4)
2632 .kr(1)
2633 .sr(1)
2634 .m(m)
2635 .n(n)
2636 .k(k)
2637 .cm_stride(7)
2638 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002639 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002640 }
2641 }
2642 }
2643}
2644
Marat Dukhande06f492020-04-09 00:19:31 -07002645TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002646 GemmMicrokernelTester()
2647 .mr(2)
2648 .nr(4)
2649 .kr(1)
2650 .sr(1)
2651 .m(2)
2652 .n(4)
2653 .k(1)
2654 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002655 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002656}
2657
Marat Dukhande06f492020-04-09 00:19:31 -07002658TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002659 GemmMicrokernelTester()
2660 .mr(2)
2661 .nr(4)
2662 .kr(1)
2663 .sr(1)
2664 .m(2)
2665 .n(4)
2666 .k(1)
2667 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002668 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002669}
2670
Marat Dukhande06f492020-04-09 00:19:31 -07002671TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002672 GemmMicrokernelTester()
2673 .mr(2)
2674 .nr(4)
2675 .kr(1)
2676 .sr(1)
2677 .m(2)
2678 .n(4)
2679 .k(1)
2680 .cm_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002681 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002682}
2683
2684
Marat Dukhande06f492020-04-09 00:19:31 -07002685TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002686 GemmMicrokernelTester()
2687 .mr(4)
2688 .nr(4)
2689 .kr(1)
2690 .sr(1)
2691 .m(4)
2692 .n(4)
2693 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002694 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002695}
2696
Marat Dukhande06f492020-04-09 00:19:31 -07002697TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002698 GemmMicrokernelTester()
2699 .mr(4)
2700 .nr(4)
2701 .kr(1)
2702 .sr(1)
2703 .m(4)
2704 .n(4)
2705 .k(1)
2706 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002707 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002708}
2709
Marat Dukhande06f492020-04-09 00:19:31 -07002710TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002711 GemmMicrokernelTester()
2712 .mr(4)
2713 .nr(4)
2714 .kr(1)
2715 .sr(1)
2716 .m(4)
2717 .n(4)
2718 .k(1)
2719 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07002720 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002721}
2722
Marat Dukhande06f492020-04-09 00:19:31 -07002723TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002724 for (uint32_t m = 1; m <= 4; m++) {
2725 for (uint32_t n = 1; n <= 4; n++) {
2726 GemmMicrokernelTester()
2727 .mr(4)
2728 .nr(4)
2729 .kr(1)
2730 .sr(1)
2731 .m(m)
2732 .n(n)
2733 .k(1)
2734 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002735 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002736 }
2737 }
2738}
2739
Marat Dukhande06f492020-04-09 00:19:31 -07002740TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002741 for (uint32_t m = 1; m <= 4; m++) {
2742 GemmMicrokernelTester()
2743 .mr(4)
2744 .nr(4)
2745 .kr(1)
2746 .sr(1)
2747 .m(m)
2748 .n(4)
2749 .k(1)
2750 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002751 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002752 }
2753}
2754
Marat Dukhande06f492020-04-09 00:19:31 -07002755TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002756 for (uint32_t n = 1; n <= 4; n++) {
2757 GemmMicrokernelTester()
2758 .mr(4)
2759 .nr(4)
2760 .kr(1)
2761 .sr(1)
2762 .m(4)
2763 .n(n)
2764 .k(1)
2765 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002766 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002767 }
2768}
2769
Marat Dukhande06f492020-04-09 00:19:31 -07002770TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002771 for (size_t k = 2; k < 10; k++) {
2772 GemmMicrokernelTester()
2773 .mr(4)
2774 .nr(4)
2775 .kr(1)
2776 .sr(1)
2777 .m(4)
2778 .n(4)
2779 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002780 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002781 }
2782}
2783
Marat Dukhande06f492020-04-09 00:19:31 -07002784TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002785 for (size_t k = 2; k < 10; k++) {
2786 for (uint32_t m = 1; m <= 4; m++) {
2787 for (uint32_t n = 1; n <= 4; n++) {
2788 GemmMicrokernelTester()
2789 .mr(4)
2790 .nr(4)
2791 .kr(1)
2792 .sr(1)
2793 .m(m)
2794 .n(n)
2795 .k(k)
2796 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002797 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002798 }
2799 }
2800 }
2801}
2802
Marat Dukhande06f492020-04-09 00:19:31 -07002803TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002804 for (uint32_t n = 5; n < 8; n++) {
2805 for (size_t k = 1; k <= 5; k += 2) {
2806 GemmMicrokernelTester()
2807 .mr(4)
2808 .nr(4)
2809 .kr(1)
2810 .sr(1)
2811 .m(4)
2812 .n(4)
2813 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002814 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002815 }
2816 }
2817}
2818
Marat Dukhande06f492020-04-09 00:19:31 -07002819TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002820 for (uint32_t n = 5; n < 8; n++) {
2821 for (size_t k = 1; k <= 5; k += 2) {
2822 GemmMicrokernelTester()
2823 .mr(4)
2824 .nr(4)
2825 .kr(1)
2826 .sr(1)
2827 .m(4)
2828 .n(4)
2829 .k(k)
2830 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002831 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002832 }
2833 }
2834}
2835
Marat Dukhande06f492020-04-09 00:19:31 -07002836TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002837 for (uint32_t n = 5; n < 8; n++) {
2838 for (size_t k = 1; k <= 5; k += 2) {
2839 GemmMicrokernelTester()
2840 .mr(4)
2841 .nr(4)
2842 .kr(1)
2843 .sr(1)
2844 .m(4)
2845 .n(n)
2846 .k(k)
2847 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002848 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002849 }
2850 }
2851}
2852
Marat Dukhande06f492020-04-09 00:19:31 -07002853TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002854 for (uint32_t n = 5; n < 8; n++) {
2855 for (size_t k = 1; k <= 5; k += 2) {
2856 for (uint32_t m = 1; m <= 4; m++) {
2857 GemmMicrokernelTester()
2858 .mr(4)
2859 .nr(4)
2860 .kr(1)
2861 .sr(1)
2862 .m(m)
2863 .n(n)
2864 .k(k)
2865 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002866 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002867 }
2868 }
2869 }
2870}
2871
Marat Dukhande06f492020-04-09 00:19:31 -07002872TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002873 for (uint32_t n = 8; n <= 12; n += 4) {
2874 for (size_t k = 1; k <= 5; k += 2) {
2875 GemmMicrokernelTester()
2876 .mr(4)
2877 .nr(4)
2878 .kr(1)
2879 .sr(1)
2880 .m(4)
2881 .n(4)
2882 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07002883 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002884 }
2885 }
2886}
2887
Marat Dukhande06f492020-04-09 00:19:31 -07002888TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002889 for (uint32_t n = 8; n <= 12; n += 4) {
2890 for (size_t k = 1; k <= 5; k += 2) {
2891 GemmMicrokernelTester()
2892 .mr(4)
2893 .nr(4)
2894 .kr(1)
2895 .sr(1)
2896 .m(4)
2897 .n(n)
2898 .k(k)
2899 .cn_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002900 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002901 }
2902 }
2903}
2904
Marat Dukhande06f492020-04-09 00:19:31 -07002905TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002906 for (uint32_t n = 8; n <= 12; n += 4) {
2907 for (size_t k = 1; k <= 5; k += 2) {
2908 GemmMicrokernelTester()
2909 .mr(4)
2910 .nr(4)
2911 .kr(1)
2912 .sr(1)
2913 .m(4)
2914 .n(n)
2915 .k(k)
2916 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002917 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002918 }
2919 }
2920}
2921
Marat Dukhande06f492020-04-09 00:19:31 -07002922TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002923 for (uint32_t n = 8; n <= 12; n += 4) {
2924 for (size_t k = 1; k <= 5; k += 2) {
2925 for (uint32_t m = 1; m <= 4; m++) {
2926 GemmMicrokernelTester()
2927 .mr(4)
2928 .nr(4)
2929 .kr(1)
2930 .sr(1)
2931 .m(m)
2932 .n(n)
2933 .k(k)
2934 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002935 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002936 }
2937 }
2938 }
2939}
2940
Marat Dukhande06f492020-04-09 00:19:31 -07002941TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002942 for (size_t k = 1; k <= 5; k += 2) {
2943 for (uint32_t m = 1; m <= 4; m++) {
2944 for (uint32_t n = 1; n <= 4; n++) {
2945 GemmMicrokernelTester()
2946 .mr(4)
2947 .nr(4)
2948 .kr(1)
2949 .sr(1)
2950 .m(m)
2951 .n(n)
2952 .k(k)
2953 .cm_stride(7)
2954 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07002955 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002956 }
2957 }
2958 }
2959}
2960
Marat Dukhande06f492020-04-09 00:19:31 -07002961TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002962 GemmMicrokernelTester()
2963 .mr(4)
2964 .nr(4)
2965 .kr(1)
2966 .sr(1)
2967 .m(4)
2968 .n(4)
2969 .k(1)
2970 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002971 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002972}
2973
Marat Dukhande06f492020-04-09 00:19:31 -07002974TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002975 GemmMicrokernelTester()
2976 .mr(4)
2977 .nr(4)
2978 .kr(1)
2979 .sr(1)
2980 .m(4)
2981 .n(4)
2982 .k(1)
2983 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07002984 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002985}
2986
Marat Dukhande06f492020-04-09 00:19:31 -07002987TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002988 GemmMicrokernelTester()
2989 .mr(4)
2990 .nr(4)
2991 .kr(1)
2992 .sr(1)
2993 .m(4)
2994 .n(4)
2995 .k(1)
2996 .cm_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07002997 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07002998}
2999
3000
Marat Dukhande06f492020-04-09 00:19:31 -07003001TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003002 GemmMicrokernelTester()
3003 .mr(3)
3004 .nr(3)
3005 .kr(1)
3006 .sr(1)
3007 .m(3)
3008 .n(3)
3009 .k(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003010 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003011}
3012
Marat Dukhande06f492020-04-09 00:19:31 -07003013TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003014 GemmMicrokernelTester()
3015 .mr(3)
3016 .nr(3)
3017 .kr(1)
3018 .sr(1)
3019 .m(3)
3020 .n(3)
3021 .k(1)
3022 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003023 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003024}
3025
Marat Dukhande06f492020-04-09 00:19:31 -07003026TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003027 GemmMicrokernelTester()
3028 .mr(3)
3029 .nr(3)
3030 .kr(1)
3031 .sr(1)
3032 .m(3)
3033 .n(3)
3034 .k(1)
3035 .a_stride(3)
Marat Dukhande06f492020-04-09 00:19:31 -07003036 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003037}
3038
Marat Dukhande06f492020-04-09 00:19:31 -07003039TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003040 for (uint32_t m = 1; m <= 3; m++) {
3041 for (uint32_t n = 1; n <= 3; n++) {
3042 GemmMicrokernelTester()
3043 .mr(3)
3044 .nr(3)
3045 .kr(1)
3046 .sr(1)
3047 .m(m)
3048 .n(n)
3049 .k(1)
3050 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003051 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003052 }
3053 }
3054}
3055
Marat Dukhande06f492020-04-09 00:19:31 -07003056TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003057 for (uint32_t m = 1; m <= 3; m++) {
3058 GemmMicrokernelTester()
3059 .mr(3)
3060 .nr(3)
3061 .kr(1)
3062 .sr(1)
3063 .m(m)
3064 .n(3)
3065 .k(1)
3066 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003067 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003068 }
3069}
3070
Marat Dukhande06f492020-04-09 00:19:31 -07003071TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003072 for (uint32_t n = 1; n <= 3; n++) {
3073 GemmMicrokernelTester()
3074 .mr(3)
3075 .nr(3)
3076 .kr(1)
3077 .sr(1)
3078 .m(3)
3079 .n(n)
3080 .k(1)
3081 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003082 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003083 }
3084}
3085
Marat Dukhande06f492020-04-09 00:19:31 -07003086TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003087 for (size_t k = 2; k < 10; k++) {
3088 GemmMicrokernelTester()
3089 .mr(3)
3090 .nr(3)
3091 .kr(1)
3092 .sr(1)
3093 .m(3)
3094 .n(3)
3095 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003096 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003097 }
3098}
3099
Marat Dukhande06f492020-04-09 00:19:31 -07003100TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003101 for (size_t k = 2; k < 10; k++) {
3102 for (uint32_t m = 1; m <= 3; m++) {
3103 for (uint32_t n = 1; n <= 3; n++) {
3104 GemmMicrokernelTester()
3105 .mr(3)
3106 .nr(3)
3107 .kr(1)
3108 .sr(1)
3109 .m(m)
3110 .n(n)
3111 .k(k)
3112 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003113 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003114 }
3115 }
3116 }
3117}
3118
Marat Dukhande06f492020-04-09 00:19:31 -07003119TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003120 for (uint32_t n = 4; n < 6; n++) {
3121 for (size_t k = 1; k <= 5; k += 2) {
3122 GemmMicrokernelTester()
3123 .mr(3)
3124 .nr(3)
3125 .kr(1)
3126 .sr(1)
3127 .m(3)
3128 .n(3)
3129 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003130 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003131 }
3132 }
3133}
3134
Marat Dukhande06f492020-04-09 00:19:31 -07003135TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003136 for (uint32_t n = 4; n < 6; n++) {
3137 for (size_t k = 1; k <= 5; k += 2) {
3138 GemmMicrokernelTester()
3139 .mr(3)
3140 .nr(3)
3141 .kr(1)
3142 .sr(1)
3143 .m(3)
3144 .n(3)
3145 .k(k)
3146 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003147 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003148 }
3149 }
3150}
3151
Marat Dukhande06f492020-04-09 00:19:31 -07003152TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003153 for (uint32_t n = 4; n < 6; n++) {
3154 for (size_t k = 1; k <= 5; k += 2) {
3155 GemmMicrokernelTester()
3156 .mr(3)
3157 .nr(3)
3158 .kr(1)
3159 .sr(1)
3160 .m(3)
3161 .n(n)
3162 .k(k)
3163 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003164 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003165 }
3166 }
3167}
3168
Marat Dukhande06f492020-04-09 00:19:31 -07003169TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003170 for (uint32_t n = 4; n < 6; n++) {
3171 for (size_t k = 1; k <= 5; k += 2) {
3172 for (uint32_t m = 1; m <= 3; m++) {
3173 GemmMicrokernelTester()
3174 .mr(3)
3175 .nr(3)
3176 .kr(1)
3177 .sr(1)
3178 .m(m)
3179 .n(n)
3180 .k(k)
3181 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003182 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003183 }
3184 }
3185 }
3186}
3187
Marat Dukhande06f492020-04-09 00:19:31 -07003188TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003189 for (uint32_t n = 6; n <= 9; n += 3) {
3190 for (size_t k = 1; k <= 5; k += 2) {
3191 GemmMicrokernelTester()
3192 .mr(3)
3193 .nr(3)
3194 .kr(1)
3195 .sr(1)
3196 .m(3)
3197 .n(3)
3198 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07003199 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003200 }
3201 }
3202}
3203
Marat Dukhande06f492020-04-09 00:19:31 -07003204TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003205 for (uint32_t n = 6; n <= 9; n += 3) {
3206 for (size_t k = 1; k <= 5; k += 2) {
3207 GemmMicrokernelTester()
3208 .mr(3)
3209 .nr(3)
3210 .kr(1)
3211 .sr(1)
3212 .m(3)
3213 .n(n)
3214 .k(k)
3215 .cn_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003216 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003217 }
3218 }
3219}
3220
Marat Dukhande06f492020-04-09 00:19:31 -07003221TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_a) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003222 for (uint32_t n = 6; n <= 9; n += 3) {
3223 for (size_t k = 1; k <= 5; k += 2) {
3224 GemmMicrokernelTester()
3225 .mr(3)
3226 .nr(3)
3227 .kr(1)
3228 .sr(1)
3229 .m(3)
3230 .n(n)
3231 .k(k)
3232 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07003233 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003234 }
3235 }
3236}
3237
Marat Dukhande06f492020-04-09 00:19:31 -07003238TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003239 for (uint32_t n = 6; n <= 9; n += 3) {
3240 for (size_t k = 1; k <= 5; k += 2) {
3241 for (uint32_t m = 1; m <= 3; m++) {
3242 GemmMicrokernelTester()
3243 .mr(3)
3244 .nr(3)
3245 .kr(1)
3246 .sr(1)
3247 .m(m)
3248 .n(n)
3249 .k(k)
3250 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003251 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003252 }
3253 }
3254 }
3255}
3256
Marat Dukhande06f492020-04-09 00:19:31 -07003257TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003258 for (size_t k = 1; k <= 5; k += 2) {
3259 for (uint32_t m = 1; m <= 3; m++) {
3260 for (uint32_t n = 1; n <= 3; n++) {
3261 GemmMicrokernelTester()
3262 .mr(3)
3263 .nr(3)
3264 .kr(1)
3265 .sr(1)
3266 .m(m)
3267 .n(n)
3268 .k(k)
3269 .cm_stride(5)
3270 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07003271 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003272 }
3273 }
3274 }
3275}
3276
Marat Dukhande06f492020-04-09 00:19:31 -07003277TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003278 GemmMicrokernelTester()
3279 .mr(3)
3280 .nr(3)
3281 .kr(1)
3282 .sr(1)
3283 .m(3)
3284 .n(3)
3285 .k(1)
3286 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07003287 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003288}
3289
Marat Dukhande06f492020-04-09 00:19:31 -07003290TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003291 GemmMicrokernelTester()
3292 .mr(3)
3293 .nr(3)
3294 .kr(1)
3295 .sr(1)
3296 .m(3)
3297 .n(3)
3298 .k(1)
3299 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07003300 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003301}
3302
Marat Dukhande06f492020-04-09 00:19:31 -07003303TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003304 GemmMicrokernelTester()
3305 .mr(3)
3306 .nr(3)
3307 .kr(1)
3308 .sr(1)
3309 .m(3)
3310 .n(3)
3311 .k(1)
3312 .cm_stride(5)
Marat Dukhande06f492020-04-09 00:19:31 -07003313 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
Marat Dukhan1c587112020-04-08 20:04:28 -07003314}