blob: 09a344d2f5f44e8b61a0ccf08cd3b154c04441c8 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f16-gemm.yaml
11// Generator: tools/generate-gemm-test.py
12
13
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <gtest/gtest.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4) {
27 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(4)
36 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
37 }
38
39 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, strided_cn) {
40 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(4)
49 .cn_stride(11)
50 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
51 }
52
53 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
54 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(4)
63 .a_stride(7)
64 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
65 }
66
67 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
68 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(4)
79 .iterations(1)
80 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
81 }
82 }
83 }
84
85 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
86 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(4)
96 .iterations(1)
97 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
98 }
99 }
100
101 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
102 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(4)
112 .iterations(1)
113 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
114 }
115 }
116
117 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_lt_4) {
118 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
119 for (size_t k = 1; k < 4; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
128 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
129 }
130 }
131
132 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
133 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
134 for (size_t k = 1; k < 4; k++) {
135 GemmMicrokernelTester()
136 .mr(4)
137 .nr(8)
138 .kr(1)
139 .sr(1)
140 .m(4)
141 .n(8)
142 .k(k)
143 .a_stride(7)
144 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
145 }
146 }
147
148 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
149 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
150 for (size_t k = 1; k < 4; k++) {
151 for (uint32_t m = 1; m <= 4; m++) {
152 for (uint32_t n = 1; n <= 8; n++) {
153 GemmMicrokernelTester()
154 .mr(4)
155 .nr(8)
156 .kr(1)
157 .sr(1)
158 .m(m)
159 .n(n)
160 .k(k)
161 .iterations(1)
162 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
163 }
164 }
165 }
166 }
167
168 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_gt_4) {
169 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
170 for (size_t k = 5; k < 8; k++) {
171 GemmMicrokernelTester()
172 .mr(4)
173 .nr(8)
174 .kr(1)
175 .sr(1)
176 .m(4)
177 .n(8)
178 .k(k)
179 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
180 }
181 }
182
183 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
184 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
185 for (size_t k = 5; k < 8; k++) {
186 GemmMicrokernelTester()
187 .mr(4)
188 .nr(8)
189 .kr(1)
190 .sr(1)
191 .m(4)
192 .n(8)
193 .k(k)
194 .a_stride(11)
195 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
196 }
197 }
198
199 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
200 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
201 for (size_t k = 5; k < 8; k++) {
202 for (uint32_t m = 1; m <= 4; m++) {
203 for (uint32_t n = 1; n <= 8; n++) {
204 GemmMicrokernelTester()
205 .mr(4)
206 .nr(8)
207 .kr(1)
208 .sr(1)
209 .m(m)
210 .n(n)
211 .k(k)
212 .iterations(1)
213 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
214 }
215 }
216 }
217 }
218
219 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_div_4) {
220 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
221 for (size_t k = 8; k <= 40; k += 4) {
222 GemmMicrokernelTester()
223 .mr(4)
224 .nr(8)
225 .kr(1)
226 .sr(1)
227 .m(4)
228 .n(8)
229 .k(k)
230 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
231 }
232 }
233
234 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
235 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
236 for (size_t k = 8; k <= 40; k += 4) {
237 GemmMicrokernelTester()
238 .mr(4)
239 .nr(8)
240 .kr(1)
241 .sr(1)
242 .m(4)
243 .n(8)
244 .k(k)
245 .a_stride(43)
246 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
247 }
248 }
249
250 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
251 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
252 for (size_t k = 8; k <= 40; k += 4) {
253 for (uint32_t m = 1; m <= 4; m++) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 GemmMicrokernelTester()
256 .mr(4)
257 .nr(8)
258 .kr(1)
259 .sr(1)
260 .m(m)
261 .n(n)
262 .k(k)
263 .iterations(1)
264 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
265 }
266 }
267 }
268 }
269
270 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8) {
271 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
272 for (uint32_t n = 9; n < 16; n++) {
273 for (size_t k = 1; k <= 20; k += 5) {
274 GemmMicrokernelTester()
275 .mr(4)
276 .nr(8)
277 .kr(1)
278 .sr(1)
279 .m(4)
280 .n(8)
281 .k(k)
282 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
283 }
284 }
285 }
286
287 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
288 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
289 for (uint32_t n = 9; n < 16; n++) {
290 for (size_t k = 1; k <= 20; k += 5) {
291 GemmMicrokernelTester()
292 .mr(4)
293 .nr(8)
294 .kr(1)
295 .sr(1)
296 .m(4)
297 .n(8)
298 .k(k)
299 .cn_stride(11)
300 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
301 }
302 }
303 }
304
305 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
306 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
307 for (uint32_t n = 9; n < 16; n++) {
308 for (size_t k = 1; k <= 20; k += 5) {
309 GemmMicrokernelTester()
310 .mr(4)
311 .nr(8)
312 .kr(1)
313 .sr(1)
314 .m(4)
315 .n(n)
316 .k(k)
317 .a_stride(23)
318 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
319 }
320 }
321 }
322
323 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
324 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
325 for (uint32_t n = 9; n < 16; n++) {
326 for (size_t k = 1; k <= 20; k += 5) {
327 for (uint32_t m = 1; m <= 4; m++) {
328 GemmMicrokernelTester()
329 .mr(4)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(m)
334 .n(n)
335 .k(k)
336 .iterations(1)
337 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
338 }
339 }
340 }
341 }
342
343 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8) {
344 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
345 for (uint32_t n = 16; n <= 24; n += 8) {
346 for (size_t k = 1; k <= 20; k += 5) {
347 GemmMicrokernelTester()
348 .mr(4)
349 .nr(8)
350 .kr(1)
351 .sr(1)
352 .m(4)
353 .n(8)
354 .k(k)
355 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
356 }
357 }
358 }
359
360 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
361 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
362 for (uint32_t n = 16; n <= 24; n += 8) {
363 for (size_t k = 1; k <= 20; k += 5) {
364 GemmMicrokernelTester()
365 .mr(4)
366 .nr(8)
367 .kr(1)
368 .sr(1)
369 .m(4)
370 .n(n)
371 .k(k)
372 .cn_stride(11)
373 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
374 }
375 }
376 }
377
378 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
379 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
380 for (uint32_t n = 16; n <= 24; n += 8) {
381 for (size_t k = 1; k <= 20; k += 5) {
382 GemmMicrokernelTester()
383 .mr(4)
384 .nr(8)
385 .kr(1)
386 .sr(1)
387 .m(4)
388 .n(n)
389 .k(k)
390 .a_stride(23)
391 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
392 }
393 }
394 }
395
396 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
397 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
398 for (uint32_t n = 16; n <= 24; n += 8) {
399 for (size_t k = 1; k <= 20; k += 5) {
400 for (uint32_t m = 1; m <= 4; m++) {
401 GemmMicrokernelTester()
402 .mr(4)
403 .nr(8)
404 .kr(1)
405 .sr(1)
406 .m(m)
407 .n(n)
408 .k(k)
409 .iterations(1)
410 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
411 }
412 }
413 }
414 }
415
416 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
417 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
418 for (size_t k = 1; k <= 20; k += 5) {
419 for (uint32_t m = 1; m <= 4; m++) {
420 for (uint32_t n = 1; n <= 8; n++) {
421 GemmMicrokernelTester()
422 .mr(4)
423 .nr(8)
424 .kr(1)
425 .sr(1)
426 .m(m)
427 .n(n)
428 .k(k)
429 .cm_stride(11)
430 .iterations(1)
431 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
432 }
433 }
434 }
435 }
436
437 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, qmin) {
438 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
439 GemmMicrokernelTester()
440 .mr(4)
441 .nr(8)
442 .kr(1)
443 .sr(1)
444 .m(4)
445 .n(8)
446 .k(4)
447 .qmin(128)
448 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
449 }
450
451 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, qmax) {
452 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(4)
459 .n(8)
460 .k(4)
461 .qmax(128)
462 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
463 }
464
465 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, strided_cm) {
466 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(4)
473 .n(8)
474 .k(4)
475 .cm_stride(11)
476 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
477 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700478#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700479
480
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700481#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700482 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4) {
483 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
484 GemmMicrokernelTester()
485 .mr(6)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(6)
490 .n(8)
491 .k(4)
492 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
493 }
494
495 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, strided_cn) {
496 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
497 GemmMicrokernelTester()
498 .mr(6)
499 .nr(8)
500 .kr(1)
501 .sr(1)
502 .m(6)
503 .n(8)
504 .k(4)
505 .cn_stride(11)
506 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
507 }
508
509 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
510 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
511 GemmMicrokernelTester()
512 .mr(6)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(6)
517 .n(8)
518 .k(4)
519 .a_stride(7)
520 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
521 }
522
523 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
524 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
525 for (uint32_t m = 1; m <= 6; m++) {
526 for (uint32_t n = 1; n <= 8; n++) {
527 GemmMicrokernelTester()
528 .mr(6)
529 .nr(8)
530 .kr(1)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(4)
535 .iterations(1)
536 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
537 }
538 }
539 }
540
541 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
542 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
543 for (uint32_t m = 1; m <= 6; m++) {
544 GemmMicrokernelTester()
545 .mr(6)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(m)
550 .n(8)
551 .k(4)
552 .iterations(1)
553 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
554 }
555 }
556
557 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
558 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
559 for (uint32_t n = 1; n <= 8; n++) {
560 GemmMicrokernelTester()
561 .mr(6)
562 .nr(8)
563 .kr(1)
564 .sr(1)
565 .m(6)
566 .n(n)
567 .k(4)
568 .iterations(1)
569 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
570 }
571 }
572
573 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_lt_4) {
574 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
575 for (size_t k = 1; k < 4; k++) {
576 GemmMicrokernelTester()
577 .mr(6)
578 .nr(8)
579 .kr(1)
580 .sr(1)
581 .m(6)
582 .n(8)
583 .k(k)
584 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
585 }
586 }
587
588 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
589 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
590 for (size_t k = 1; k < 4; k++) {
591 GemmMicrokernelTester()
592 .mr(6)
593 .nr(8)
594 .kr(1)
595 .sr(1)
596 .m(6)
597 .n(8)
598 .k(k)
599 .a_stride(7)
600 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
601 }
602 }
603
604 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
605 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
606 for (size_t k = 1; k < 4; k++) {
607 for (uint32_t m = 1; m <= 6; m++) {
608 for (uint32_t n = 1; n <= 8; n++) {
609 GemmMicrokernelTester()
610 .mr(6)
611 .nr(8)
612 .kr(1)
613 .sr(1)
614 .m(m)
615 .n(n)
616 .k(k)
617 .iterations(1)
618 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
619 }
620 }
621 }
622 }
623
624 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_gt_4) {
625 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
626 for (size_t k = 5; k < 8; k++) {
627 GemmMicrokernelTester()
628 .mr(6)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(6)
633 .n(8)
634 .k(k)
635 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
636 }
637 }
638
639 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
640 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
641 for (size_t k = 5; k < 8; k++) {
642 GemmMicrokernelTester()
643 .mr(6)
644 .nr(8)
645 .kr(1)
646 .sr(1)
647 .m(6)
648 .n(8)
649 .k(k)
650 .a_stride(11)
651 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
652 }
653 }
654
655 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
656 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
657 for (size_t k = 5; k < 8; k++) {
658 for (uint32_t m = 1; m <= 6; m++) {
659 for (uint32_t n = 1; n <= 8; n++) {
660 GemmMicrokernelTester()
661 .mr(6)
662 .nr(8)
663 .kr(1)
664 .sr(1)
665 .m(m)
666 .n(n)
667 .k(k)
668 .iterations(1)
669 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
670 }
671 }
672 }
673 }
674
675 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_div_4) {
676 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
677 for (size_t k = 8; k <= 40; k += 4) {
678 GemmMicrokernelTester()
679 .mr(6)
680 .nr(8)
681 .kr(1)
682 .sr(1)
683 .m(6)
684 .n(8)
685 .k(k)
686 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
687 }
688 }
689
690 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
691 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
692 for (size_t k = 8; k <= 40; k += 4) {
693 GemmMicrokernelTester()
694 .mr(6)
695 .nr(8)
696 .kr(1)
697 .sr(1)
698 .m(6)
699 .n(8)
700 .k(k)
701 .a_stride(43)
702 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
703 }
704 }
705
706 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
707 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
708 for (size_t k = 8; k <= 40; k += 4) {
709 for (uint32_t m = 1; m <= 6; m++) {
710 for (uint32_t n = 1; n <= 8; n++) {
711 GemmMicrokernelTester()
712 .mr(6)
713 .nr(8)
714 .kr(1)
715 .sr(1)
716 .m(m)
717 .n(n)
718 .k(k)
719 .iterations(1)
720 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
721 }
722 }
723 }
724 }
725
726 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8) {
727 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
728 for (uint32_t n = 9; n < 16; n++) {
729 for (size_t k = 1; k <= 20; k += 5) {
730 GemmMicrokernelTester()
731 .mr(6)
732 .nr(8)
733 .kr(1)
734 .sr(1)
735 .m(6)
736 .n(8)
737 .k(k)
738 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
739 }
740 }
741 }
742
743 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
744 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
745 for (uint32_t n = 9; n < 16; n++) {
746 for (size_t k = 1; k <= 20; k += 5) {
747 GemmMicrokernelTester()
748 .mr(6)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(6)
753 .n(8)
754 .k(k)
755 .cn_stride(11)
756 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
757 }
758 }
759 }
760
761 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
762 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
763 for (uint32_t n = 9; n < 16; n++) {
764 for (size_t k = 1; k <= 20; k += 5) {
765 GemmMicrokernelTester()
766 .mr(6)
767 .nr(8)
768 .kr(1)
769 .sr(1)
770 .m(6)
771 .n(n)
772 .k(k)
773 .a_stride(23)
774 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
775 }
776 }
777 }
778
779 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
780 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
781 for (uint32_t n = 9; n < 16; n++) {
782 for (size_t k = 1; k <= 20; k += 5) {
783 for (uint32_t m = 1; m <= 6; m++) {
784 GemmMicrokernelTester()
785 .mr(6)
786 .nr(8)
787 .kr(1)
788 .sr(1)
789 .m(m)
790 .n(n)
791 .k(k)
792 .iterations(1)
793 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
794 }
795 }
796 }
797 }
798
799 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8) {
800 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
801 for (uint32_t n = 16; n <= 24; n += 8) {
802 for (size_t k = 1; k <= 20; k += 5) {
803 GemmMicrokernelTester()
804 .mr(6)
805 .nr(8)
806 .kr(1)
807 .sr(1)
808 .m(6)
809 .n(8)
810 .k(k)
811 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
812 }
813 }
814 }
815
816 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
817 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
818 for (uint32_t n = 16; n <= 24; n += 8) {
819 for (size_t k = 1; k <= 20; k += 5) {
820 GemmMicrokernelTester()
821 .mr(6)
822 .nr(8)
823 .kr(1)
824 .sr(1)
825 .m(6)
826 .n(n)
827 .k(k)
828 .cn_stride(11)
829 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
830 }
831 }
832 }
833
834 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
835 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
836 for (uint32_t n = 16; n <= 24; n += 8) {
837 for (size_t k = 1; k <= 20; k += 5) {
838 GemmMicrokernelTester()
839 .mr(6)
840 .nr(8)
841 .kr(1)
842 .sr(1)
843 .m(6)
844 .n(n)
845 .k(k)
846 .a_stride(23)
847 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
848 }
849 }
850 }
851
852 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
853 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
854 for (uint32_t n = 16; n <= 24; n += 8) {
855 for (size_t k = 1; k <= 20; k += 5) {
856 for (uint32_t m = 1; m <= 6; m++) {
857 GemmMicrokernelTester()
858 .mr(6)
859 .nr(8)
860 .kr(1)
861 .sr(1)
862 .m(m)
863 .n(n)
864 .k(k)
865 .iterations(1)
866 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
867 }
868 }
869 }
870 }
871
872 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
873 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
874 for (size_t k = 1; k <= 20; k += 5) {
875 for (uint32_t m = 1; m <= 6; m++) {
876 for (uint32_t n = 1; n <= 8; n++) {
877 GemmMicrokernelTester()
878 .mr(6)
879 .nr(8)
880 .kr(1)
881 .sr(1)
882 .m(m)
883 .n(n)
884 .k(k)
885 .cm_stride(11)
886 .iterations(1)
887 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
888 }
889 }
890 }
891 }
892
893 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, qmin) {
894 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
895 GemmMicrokernelTester()
896 .mr(6)
897 .nr(8)
898 .kr(1)
899 .sr(1)
900 .m(6)
901 .n(8)
902 .k(4)
903 .qmin(128)
904 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
905 }
906
907 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, qmax) {
908 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
909 GemmMicrokernelTester()
910 .mr(6)
911 .nr(8)
912 .kr(1)
913 .sr(1)
914 .m(6)
915 .n(8)
916 .k(4)
917 .qmax(128)
918 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
919 }
920
921 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, strided_cm) {
922 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
923 GemmMicrokernelTester()
924 .mr(6)
925 .nr(8)
926 .kr(1)
927 .sr(1)
928 .m(6)
929 .n(8)
930 .k(4)
931 .cm_stride(11)
932 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
933 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700934#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700935
936
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700937#if XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700938 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4) {
939 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
940 GemmMicrokernelTester()
941 .mr(8)
942 .nr(8)
943 .kr(1)
944 .sr(1)
945 .m(8)
946 .n(8)
947 .k(4)
948 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
949 }
950
951 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, strided_cn) {
952 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
953 GemmMicrokernelTester()
954 .mr(8)
955 .nr(8)
956 .kr(1)
957 .sr(1)
958 .m(8)
959 .n(8)
960 .k(4)
961 .cn_stride(11)
962 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
963 }
964
965 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
966 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
967 GemmMicrokernelTester()
968 .mr(8)
969 .nr(8)
970 .kr(1)
971 .sr(1)
972 .m(8)
973 .n(8)
974 .k(4)
975 .a_stride(7)
976 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
977 }
978
979 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
980 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
981 for (uint32_t m = 1; m <= 8; m++) {
982 for (uint32_t n = 1; n <= 8; n++) {
983 GemmMicrokernelTester()
984 .mr(8)
985 .nr(8)
986 .kr(1)
987 .sr(1)
988 .m(m)
989 .n(n)
990 .k(4)
991 .iterations(1)
992 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
993 }
994 }
995 }
996
997 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
998 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
999 for (uint32_t m = 1; m <= 8; m++) {
1000 GemmMicrokernelTester()
1001 .mr(8)
1002 .nr(8)
1003 .kr(1)
1004 .sr(1)
1005 .m(m)
1006 .n(8)
1007 .k(4)
1008 .iterations(1)
1009 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1010 }
1011 }
1012
1013 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
1014 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1015 for (uint32_t n = 1; n <= 8; n++) {
1016 GemmMicrokernelTester()
1017 .mr(8)
1018 .nr(8)
1019 .kr(1)
1020 .sr(1)
1021 .m(8)
1022 .n(n)
1023 .k(4)
1024 .iterations(1)
1025 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1026 }
1027 }
1028
1029 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_lt_4) {
1030 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1031 for (size_t k = 1; k < 4; k++) {
1032 GemmMicrokernelTester()
1033 .mr(8)
1034 .nr(8)
1035 .kr(1)
1036 .sr(1)
1037 .m(8)
1038 .n(8)
1039 .k(k)
1040 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1041 }
1042 }
1043
1044 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
1045 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1046 for (size_t k = 1; k < 4; k++) {
1047 GemmMicrokernelTester()
1048 .mr(8)
1049 .nr(8)
1050 .kr(1)
1051 .sr(1)
1052 .m(8)
1053 .n(8)
1054 .k(k)
1055 .a_stride(7)
1056 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1057 }
1058 }
1059
1060 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
1061 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1062 for (size_t k = 1; k < 4; k++) {
1063 for (uint32_t m = 1; m <= 8; m++) {
1064 for (uint32_t n = 1; n <= 8; n++) {
1065 GemmMicrokernelTester()
1066 .mr(8)
1067 .nr(8)
1068 .kr(1)
1069 .sr(1)
1070 .m(m)
1071 .n(n)
1072 .k(k)
1073 .iterations(1)
1074 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1075 }
1076 }
1077 }
1078 }
1079
1080 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_gt_4) {
1081 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1082 for (size_t k = 5; k < 8; k++) {
1083 GemmMicrokernelTester()
1084 .mr(8)
1085 .nr(8)
1086 .kr(1)
1087 .sr(1)
1088 .m(8)
1089 .n(8)
1090 .k(k)
1091 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1092 }
1093 }
1094
1095 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
1096 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1097 for (size_t k = 5; k < 8; k++) {
1098 GemmMicrokernelTester()
1099 .mr(8)
1100 .nr(8)
1101 .kr(1)
1102 .sr(1)
1103 .m(8)
1104 .n(8)
1105 .k(k)
1106 .a_stride(11)
1107 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1108 }
1109 }
1110
1111 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
1112 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1113 for (size_t k = 5; k < 8; k++) {
1114 for (uint32_t m = 1; m <= 8; m++) {
1115 for (uint32_t n = 1; n <= 8; n++) {
1116 GemmMicrokernelTester()
1117 .mr(8)
1118 .nr(8)
1119 .kr(1)
1120 .sr(1)
1121 .m(m)
1122 .n(n)
1123 .k(k)
1124 .iterations(1)
1125 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1126 }
1127 }
1128 }
1129 }
1130
1131 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_div_4) {
1132 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1133 for (size_t k = 8; k <= 40; k += 4) {
1134 GemmMicrokernelTester()
1135 .mr(8)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(8)
1140 .n(8)
1141 .k(k)
1142 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1143 }
1144 }
1145
1146 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
1147 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1148 for (size_t k = 8; k <= 40; k += 4) {
1149 GemmMicrokernelTester()
1150 .mr(8)
1151 .nr(8)
1152 .kr(1)
1153 .sr(1)
1154 .m(8)
1155 .n(8)
1156 .k(k)
1157 .a_stride(43)
1158 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1159 }
1160 }
1161
1162 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
1163 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1164 for (size_t k = 8; k <= 40; k += 4) {
1165 for (uint32_t m = 1; m <= 8; m++) {
1166 for (uint32_t n = 1; n <= 8; n++) {
1167 GemmMicrokernelTester()
1168 .mr(8)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(m)
1173 .n(n)
1174 .k(k)
1175 .iterations(1)
1176 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1177 }
1178 }
1179 }
1180 }
1181
1182 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8) {
1183 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1184 for (uint32_t n = 9; n < 16; n++) {
1185 for (size_t k = 1; k <= 20; k += 5) {
1186 GemmMicrokernelTester()
1187 .mr(8)
1188 .nr(8)
1189 .kr(1)
1190 .sr(1)
1191 .m(8)
1192 .n(8)
1193 .k(k)
1194 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1195 }
1196 }
1197 }
1198
1199 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
1200 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1201 for (uint32_t n = 9; n < 16; n++) {
1202 for (size_t k = 1; k <= 20; k += 5) {
1203 GemmMicrokernelTester()
1204 .mr(8)
1205 .nr(8)
1206 .kr(1)
1207 .sr(1)
1208 .m(8)
1209 .n(8)
1210 .k(k)
1211 .cn_stride(11)
1212 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1213 }
1214 }
1215 }
1216
1217 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
1218 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1219 for (uint32_t n = 9; n < 16; n++) {
1220 for (size_t k = 1; k <= 20; k += 5) {
1221 GemmMicrokernelTester()
1222 .mr(8)
1223 .nr(8)
1224 .kr(1)
1225 .sr(1)
1226 .m(8)
1227 .n(n)
1228 .k(k)
1229 .a_stride(23)
1230 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1231 }
1232 }
1233 }
1234
1235 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
1236 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1237 for (uint32_t n = 9; n < 16; n++) {
1238 for (size_t k = 1; k <= 20; k += 5) {
1239 for (uint32_t m = 1; m <= 8; m++) {
1240 GemmMicrokernelTester()
1241 .mr(8)
1242 .nr(8)
1243 .kr(1)
1244 .sr(1)
1245 .m(m)
1246 .n(n)
1247 .k(k)
1248 .iterations(1)
1249 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1250 }
1251 }
1252 }
1253 }
1254
1255 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8) {
1256 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1257 for (uint32_t n = 16; n <= 24; n += 8) {
1258 for (size_t k = 1; k <= 20; k += 5) {
1259 GemmMicrokernelTester()
1260 .mr(8)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(8)
1265 .n(8)
1266 .k(k)
1267 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1268 }
1269 }
1270 }
1271
1272 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
1273 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1274 for (uint32_t n = 16; n <= 24; n += 8) {
1275 for (size_t k = 1; k <= 20; k += 5) {
1276 GemmMicrokernelTester()
1277 .mr(8)
1278 .nr(8)
1279 .kr(1)
1280 .sr(1)
1281 .m(8)
1282 .n(n)
1283 .k(k)
1284 .cn_stride(11)
1285 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1286 }
1287 }
1288 }
1289
1290 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
1291 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1292 for (uint32_t n = 16; n <= 24; n += 8) {
1293 for (size_t k = 1; k <= 20; k += 5) {
1294 GemmMicrokernelTester()
1295 .mr(8)
1296 .nr(8)
1297 .kr(1)
1298 .sr(1)
1299 .m(8)
1300 .n(n)
1301 .k(k)
1302 .a_stride(23)
1303 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1304 }
1305 }
1306 }
1307
1308 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
1309 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1310 for (uint32_t n = 16; n <= 24; n += 8) {
1311 for (size_t k = 1; k <= 20; k += 5) {
1312 for (uint32_t m = 1; m <= 8; m++) {
1313 GemmMicrokernelTester()
1314 .mr(8)
1315 .nr(8)
1316 .kr(1)
1317 .sr(1)
1318 .m(m)
1319 .n(n)
1320 .k(k)
1321 .iterations(1)
1322 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1323 }
1324 }
1325 }
1326 }
1327
1328 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
1329 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1330 for (size_t k = 1; k <= 20; k += 5) {
1331 for (uint32_t m = 1; m <= 8; m++) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 GemmMicrokernelTester()
1334 .mr(8)
1335 .nr(8)
1336 .kr(1)
1337 .sr(1)
1338 .m(m)
1339 .n(n)
1340 .k(k)
1341 .cm_stride(11)
1342 .iterations(1)
1343 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1344 }
1345 }
1346 }
1347 }
1348
1349 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, qmin) {
1350 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1351 GemmMicrokernelTester()
1352 .mr(8)
1353 .nr(8)
1354 .kr(1)
1355 .sr(1)
1356 .m(8)
1357 .n(8)
1358 .k(4)
1359 .qmin(128)
1360 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1361 }
1362
1363 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, qmax) {
1364 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1365 GemmMicrokernelTester()
1366 .mr(8)
1367 .nr(8)
1368 .kr(1)
1369 .sr(1)
1370 .m(8)
1371 .n(8)
1372 .k(4)
1373 .qmax(128)
1374 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1375 }
1376
1377 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, strided_cm) {
1378 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1379 GemmMicrokernelTester()
1380 .mr(8)
1381 .nr(8)
1382 .kr(1)
1383 .sr(1)
1384 .m(8)
1385 .n(8)
1386 .k(4)
1387 .cm_stride(11)
1388 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
1389 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001390#endif // XNN_ARCH_ARM64