XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | # Copyright 2019 Google LLC |
| 3 | # |
| 4 | # This source code is licensed under the BSD-style license found in the |
| 5 | # LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | #################################### Scalar ################################### |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 8 | tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D INC=0 -o src/f32-gemm/gen/1x4-scalar.c |
| 9 | tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D INC=1 -o src/f32-gemm/gen-inc/1x4-scalar.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 10 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 11 | tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D INC=0 -o src/f32-gemm/gen/2x4-scalar.c |
| 12 | tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D INC=1 -o src/f32-gemm/gen-inc/2x4-scalar.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 13 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 14 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D INC=0 -o src/f32-gemm/gen/4x2-scalar.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 15 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 16 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D INC=0 -o src/f32-gemm/gen/4x4-scalar.c |
| 17 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D INC=1 -o src/f32-gemm/gen-inc/4x4-scalar.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 18 | |
| 19 | ############################### AArch64 assembly ############################## |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 20 | tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x12-aarch64-neonfma-cortex-a53.S |
| 21 | tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x12-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 22 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 23 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a53.S |
| 24 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 25 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 26 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a57.S |
| 27 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a57.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 28 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 29 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a75.S |
| 30 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 31 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 32 | tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x12-aarch64-neonfma-cortex-a53.S |
| 33 | tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x12-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 34 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 35 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a53.S |
| 36 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 37 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 38 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a57.S |
| 39 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a57.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 40 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 41 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a75.S |
| 42 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 43 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 44 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-ld128.S |
| 45 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld128.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 46 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 47 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-ld64.S |
| 48 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld64.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 49 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 50 | tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a75.S |
| 51 | tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 52 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 53 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a53.S |
| 54 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 55 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 56 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a57.S |
| 57 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a57.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 58 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 59 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a73.S |
| 60 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a73.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 61 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 62 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a75.S |
| 63 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 64 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 65 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S |
| 66 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld64.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 67 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 68 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-ld128.S |
| 69 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld128.S |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 70 | |
| 71 | ################################### ARM NEON ################################## |
| 72 | ### LD64 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 73 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-neon-lane-ld64.c |
| 74 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8-neon-lane-ld64.c |
| 75 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-neonfma-lane-ld64.c |
| 76 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8-neonfma-lane-ld64.c |
| 77 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neon-lane-ld64.c |
| 78 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neon-lane-ld64.c |
| 79 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neonfma-lane-ld64.c |
| 80 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neonfma-lane-ld64.c |
| 81 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-neon-lane-ld64.c |
| 82 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8-neon-lane-ld64.c |
| 83 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-neonfma-lane-ld64.c |
| 84 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8-neonfma-lane-ld64.c |
| 85 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neon-lane-ld64.c |
| 86 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neon-lane-ld64.c |
| 87 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neonfma-lane-ld64.c |
| 88 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neonfma-lane-ld64.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 89 | ### LD128 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 90 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neon-lane-ld128.c |
| 91 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neon-lane-ld128.c |
| 92 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neonfma-lane-ld128.c |
| 93 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neonfma-lane-ld128.c |
Frank Barchard | 69172d9 | 2019-11-26 16:22:39 -0800 | [diff] [blame^] | 94 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neon-lane-ld128.c |
| 95 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neon-lane-ld128.c |
| 96 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neonfma-lane-ld128.c |
| 97 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neonfma-lane-ld128.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 98 | ### MRx2 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 99 | tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-neon-lane-ld64.c |
| 100 | tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-neonfma-lane-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 101 | ### DUP LD64 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 102 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-neon-dup-ld64.c |
| 103 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8-neon-dup-ld64.c |
| 104 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-neonfma-dup-ld64.c |
| 105 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8-neonfma-dup-ld64.c |
| 106 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neon-dup-ld64.c |
| 107 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neon-dup-ld64.c |
| 108 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neonfma-dup-ld64.c |
| 109 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neonfma-dup-ld64.c |
| 110 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neon-dup-ld64.c |
| 111 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neon-dup-ld64.c |
| 112 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neonfma-dup-ld64.c |
| 113 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neonfma-dup-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 114 | ### DUP LD128 midupkernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 115 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neon-dup-ld128.c |
| 116 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neon-dup-ld128.c |
| 117 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neonfma-dup-ld128.c |
| 118 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neonfma-dup-ld128.c |
Frank Barchard | 69172d9 | 2019-11-26 16:22:39 -0800 | [diff] [blame^] | 119 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neon-dup-ld128.c |
| 120 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neon-dup-ld128.c |
| 121 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neonfma-dup-ld128.c |
| 122 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neonfma-dup-ld128.c |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 123 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 124 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8s4-neon.c |
| 125 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-neon.c |
| 126 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/1x8s4-neonfma.c |
| 127 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-neonfma.c |
| 128 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8s4-neon.c |
| 129 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-neon.c |
| 130 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/4x8s4-neonfma.c |
| 131 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-neonfma.c |
| 132 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8s4-neon.c |
| 133 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-neon.c |
| 134 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/6x8s4-neonfma.c |
| 135 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-neonfma.c |
| 136 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/8x8s4-neon.c |
| 137 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4-neon.c |
| 138 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/8x8s4-neonfma.c |
| 139 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4-neonfma.c |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 140 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 141 | #################################### PSIMD #################################### |
| 142 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 143 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-psimd-loadsplat.c |
| 144 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-psimd-loadsplat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 145 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 146 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-psimd-loadsplat.c |
| 147 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-psimd-loadsplat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 148 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 149 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8-psimd-loadsplat.c |
| 150 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8-psimd-loadsplat.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 151 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 152 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-psimd-splat.c |
| 153 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-psimd-splat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 154 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 155 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-psimd-splat.c |
| 156 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-psimd-splat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 157 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 158 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8-psimd-splat.c |
| 159 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8-psimd-splat.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 160 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 161 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-psimd.c |
| 162 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-psimd.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 163 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 164 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-psimd.c |
| 165 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-psimd.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 166 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 167 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8s4-psimd.c |
| 168 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-psimd.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 169 | |
| 170 | ################################### x86 SSE ################################### |
| 171 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 172 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-sse-load1.c |
| 173 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-sse-load1.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 174 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 175 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-sse-load1.c |
| 176 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-sse-load1.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 177 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 178 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-sse-dup.c |
| 179 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-sse-dup.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 180 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 181 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-sse-dup.c |
| 182 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-sse-dup.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 183 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 184 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-sse.c |
| 185 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-sse.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 186 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 187 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-sse.c |
| 188 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-sse.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 189 | |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 190 | ################################### x86 AVX ################################### |
| 191 | ### AVX+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 192 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8-avx-broadcast.c |
| 193 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 194 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 195 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8-avx-broadcast.c |
| 196 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 197 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 198 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/5x8-avx-broadcast.c |
| 199 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/5x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 200 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 201 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8-avx-broadcast.c |
| 202 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 203 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 204 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/7x8-avx-broadcast.c |
| 205 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/7x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 206 | ### FMA3+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 207 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x8-fma3-broadcast.c |
| 208 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 209 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 210 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x8-fma3-broadcast.c |
| 211 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 212 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 213 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x8-fma3-broadcast.c |
| 214 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 215 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 216 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/6x8-fma3-broadcast.c |
| 217 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/6x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 218 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 219 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/7x8-fma3-broadcast.c |
| 220 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/7x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 221 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 222 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/8x8-fma3-broadcast.c |
| 223 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/8x8-fma3-broadcast.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 224 | |
| 225 | ################################## Unit tests ################################# |
| 226 | tools/generate-gemm-test.py --spec test/f32-gemm.yaml --output test/f32-gemm.cc |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 227 | tools/generate-gemm-test.py --spec test/f32-gemminc.yaml --output test/f32-gemminc.cc |