XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | # Copyright 2019 Google LLC |
| 3 | # |
| 4 | # This source code is licensed under the BSD-style license found in the |
| 5 | # LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | #################################### Scalar ################################### |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 8 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -o src/f32-igemm/gen/1x4-scalar.c |
| 9 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -o src/f32-igemm/gen/2x4-scalar.c |
| 10 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2-scalar.c |
| 11 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -o src/f32-igemm/gen/4x4-scalar.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 12 | |
| 13 | ################################### ARM NEON ################################## |
| 14 | ### LD64 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 15 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-neon-lane-ld64.c |
| 16 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-neonfma-lane-ld64.c |
| 17 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-neon-lane-ld64.c |
| 18 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-neonfma-lane-ld64.c |
| 19 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-neon-lane-ld64.c |
| 20 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-neonfma-lane-ld64.c |
| 21 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-neon-lane-ld64.c |
| 22 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-neonfma-lane-ld64.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 23 | ### LD128 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 24 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-neon-lane-ld128.c |
| 25 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-neonfma-lane-ld128.c |
Frank Barchard | 69172d9 | 2019-11-26 16:22:39 -0800 | [diff] [blame^] | 26 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-neon-lane-ld128.c |
| 27 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-neonfma-lane-ld128.c |
Frank Barchard | 91317c5 | 2019-11-22 10:54:35 -0800 | [diff] [blame] | 28 | ### MRx2 micro-kernels- |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 29 | tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-neon-lane-ld64.c |
| 30 | tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-neonfma-lane-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 31 | ### DUP LD64 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 32 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-neon-dup-ld64.c |
| 33 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-neonfma-dup-ld64.c |
| 34 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-neon-dup-ld64.c |
| 35 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-neonfma-dup-ld64.c |
| 36 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-neon-dup-ld64.c |
| 37 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-neonfma-dup-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 38 | ### DUP LD128 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 39 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-neon-dup-ld128.c |
| 40 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-neonfma-dup-ld128.c |
Frank Barchard | 69172d9 | 2019-11-26 16:22:39 -0800 | [diff] [blame^] | 41 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-neon-dup-ld128.c |
| 42 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-neonfma-dup-ld128.c |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 43 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 44 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-neon.c |
| 45 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-neonfma.c |
| 46 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-neon.c |
| 47 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-neonfma.c |
| 48 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-neon.c |
| 49 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-neonfma.c |
| 50 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-neon.c |
| 51 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-neonfma.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 52 | |
| 53 | #################################### PSIMD #################################### |
| 54 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 55 | tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-psimd-loadsplat.c |
| 56 | tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-psimd-loadsplat.c |
| 57 | tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8-psimd-loadsplat.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 58 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 59 | tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-psimd-splat.c |
| 60 | tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-psimd-splat.c |
| 61 | tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8-psimd-splat.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 62 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 63 | tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-psimd.c |
| 64 | tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-psimd.c |
| 65 | tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8s4-psimd.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 66 | ### MRx2 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 67 | tools/xngen src/f32-igemm/MRx2c4-psimd.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-psimd.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 68 | |
| 69 | ################################### x86 SSE ################################### |
| 70 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 71 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-sse-load1.c |
| 72 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-sse-load1.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 73 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 74 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-sse-dup.c |
| 75 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-sse-dup.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 76 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 77 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-sse.c |
| 78 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-sse.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 79 | ### MRx2 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 80 | tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-sse.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 81 | |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 82 | ################################### x86 AVX ################################### |
| 83 | ### AVX+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 84 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-avx-broadcast.c |
| 85 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-avx-broadcast.c |
| 86 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-avx-broadcast.c |
| 87 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-avx-broadcast.c |
| 88 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 89 | ### FMA3+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 90 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-fma3-broadcast.c |
| 91 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-fma3-broadcast.c |
| 92 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-fma3-broadcast.c |
| 93 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-fma3-broadcast.c |
| 94 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-fma3-broadcast.c |
| 95 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-fma3-broadcast.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 96 | |
| 97 | ################################## Unit tests ################################# |
| 98 | tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc |