XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | # Copyright 2019 Google LLC |
| 3 | # |
| 4 | # This source code is licensed under the BSD-style license found in the |
| 5 | # LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | #################################### Scalar ################################### |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 8 | ### Generic C micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 9 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-scalar.c & |
| 10 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-scalar.c & |
| 11 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-scalar.c & |
| 12 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-scalar.c & |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 13 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 14 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-scalar.c & |
| 15 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-scalar.c & |
| 16 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-scalar.c & |
| 17 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-scalar.c & |
Marat Dukhan | 467f636 | 2020-05-22 23:21:55 -0700 | [diff] [blame] | 18 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 19 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-scalar.c & |
| 20 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-scalar.c & |
| 21 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-scalar.c & |
| 22 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-scalar.c & |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 23 | |
| 24 | ### WAsm-specific micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 25 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-wasm.c & |
| 26 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-wasm.c & |
| 27 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-wasm.c & |
| 28 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-wasm.c & |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 29 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 30 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-wasm.c & |
| 31 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-wasm.c & |
| 32 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-wasm.c & |
| 33 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-wasm.c & |
Marat Dukhan | 467f636 | 2020-05-22 23:21:55 -0700 | [diff] [blame] | 34 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 35 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-wasm.c & |
| 36 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-wasm.c & |
| 37 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-wasm.c & |
| 38 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-wasm.c & |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 39 | |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 40 | ################################## WAsm SIMD ################################## |
| 41 | ### LOAD1+BROADCAST micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 42 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-loadsplat.c & |
| 43 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-loadsplat.c & |
| 44 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-loadsplat.c & |
| 45 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-loadsplat.c & |
| 46 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-loadsplat.c & |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 47 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 48 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-loadsplat.c & |
| 49 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-loadsplat.c & |
| 50 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-loadsplat.c & |
| 51 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-loadsplat.c & |
| 52 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-loadsplat.c & |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 53 | |
| 54 | ### LOAD4+DUPLICATE micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 55 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-splat.c & |
| 56 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-splat.c & |
| 57 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-splat.c & |
| 58 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-splat.c & |
| 59 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-splat.c & |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 60 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 61 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8-relu-wasmsimd-splat.c & |
| 62 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8-relu-wasmsimd-splat.c & |
| 63 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8-relu-wasmsimd-splat.c & |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 64 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 65 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8-wasmsimd-splat.c & |
| 66 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8-wasmsimd-splat.c & |
| 67 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8-wasmsimd-splat.c & |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 68 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 69 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-splat.c & |
| 70 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-splat.c & |
| 71 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-splat.c & |
| 72 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-splat.c & |
| 73 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-splat.c & |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 74 | |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 75 | ### LOAD4+PERMUTE micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 76 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-arm.c & |
| 77 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-arm.c & |
| 78 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-arm.c & |
| 79 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-arm.c & |
| 80 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-arm.c & |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 81 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 82 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-x86.c & |
| 83 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-x86.c & |
| 84 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-x86.c & |
| 85 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-x86.c & |
| 86 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-x86.c & |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 87 | |
Marat Dukhan | e39e646 | 2020-07-09 01:33:36 -0700 | [diff] [blame] | 88 | ### MRx2 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 89 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-arm.c & |
| 90 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-x86.c & |
| 91 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2c4-relu-wasmsimd.c & |
| 92 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2c4-wasmsimd.c & |
Marat Dukhan | e39e646 | 2020-07-09 01:33:36 -0700 | [diff] [blame] | 93 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 94 | ############################### AArch64 assembly ############################## |
Frank Barchard | e349124 | 2021-06-11 14:04:57 -0700 | [diff] [blame] | 95 | ### LD64 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 96 | tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld64.S & |
| 97 | tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld64.S & |
Frank Barchard | 79cd5f9 | 2021-06-21 17:34:59 -0700 | [diff] [blame] | 98 | |
| 99 | ### LD128 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 100 | tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld128.S & |
| 101 | tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld128.S & |
Frank Barchard | e349124 | 2021-06-11 14:04:57 -0700 | [diff] [blame] | 102 | |
Frank Barchard | d2c4d0d | 2021-06-28 16:46:57 -0700 | [diff] [blame] | 103 | ### Cortex A75 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 104 | tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S & |
| 105 | tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & |
| 106 | tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S & |
| 107 | tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & |
| 108 | tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S & |
| 109 | tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & |
| 110 | tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S & |
| 111 | tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 112 | |
Frank Barchard | 90ce789 | 2020-02-10 23:35:45 -0800 | [diff] [blame] | 113 | ############################### AArch32 assembly ############################## |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 114 | tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S & |
Frank Barchard | 7873586 | 2022-01-04 16:47:44 -0800 | [diff] [blame] | 115 | tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-prfm-cortex-a75.S & |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 116 | tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a7.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S & |
| 117 | tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-ld64.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-ld64.S & |
Frank Barchard | 569561d | 2020-06-17 13:11:12 -0700 | [diff] [blame] | 118 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 119 | ################################### ARM NEON ################################## |
| 120 | ### LD64 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 121 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c & |
| 122 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c & |
| 123 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c & |
| 124 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c & |
| 125 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c & |
| 126 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c & |
| 127 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c & |
| 128 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c & |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 129 | ### LD128 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 130 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c & |
| 131 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c & |
| 132 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c & |
| 133 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c & |
Frank Barchard | 91317c5 | 2019-11-22 10:54:35 -0800 | [diff] [blame] | 134 | ### MRx2 micro-kernels- |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 135 | tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c & |
| 136 | tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c & |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 137 | ### DUP LD64 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 138 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c & |
| 139 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c & |
| 140 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c & |
| 141 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c & |
| 142 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c & |
| 143 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c & |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 144 | ### DUP LD128 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 145 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c & |
| 146 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c & |
| 147 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c & |
| 148 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c & |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 149 | ### LOAD4+PERMUTE micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 150 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-minmax-neon.c & |
| 151 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-minmax-neonfma.c & |
| 152 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-minmax-neon.c & |
| 153 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-minmax-neonfma.c & |
| 154 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-minmax-neon.c & |
| 155 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-minmax-neonfma.c & |
| 156 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-minmax-neon.c & |
| 157 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-minmax-neonfma.c & |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 158 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 159 | ################################### x86 SSE ################################### |
| 160 | ### LOAD1+BROADCAST micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 161 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-minmax-sse-load1.c & |
| 162 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8-minmax-sse-load1.c & |
| 163 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-minmax-sse-load1.c & |
| 164 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8-minmax-sse-load1.c & |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 165 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 166 | ### LOAD4+DUPLICATE micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 167 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/1x8-minmax-sse-dup.c & |
| 168 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/3x8-minmax-sse-dup.c & |
| 169 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/4x8-minmax-sse-dup.c & |
| 170 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/5x8-minmax-sse-dup.c & |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 171 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 172 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/1x8-minmax-sse2-dup.c & |
| 173 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/3x8-minmax-sse2-dup.c & |
| 174 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/4x8-minmax-sse2-dup.c & |
| 175 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/5x8-minmax-sse2-dup.c & |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 176 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 177 | ### LOAD4+PERMUTE micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 178 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-minmax-sse.c & |
| 179 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8s4-minmax-sse.c & |
| 180 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-minmax-sse.c & |
| 181 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8s4-minmax-sse.c & |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 182 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 183 | ### MRx2 micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 184 | tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-minmax-sse.c & |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 185 | |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 186 | ################################### x86 AVX ################################### |
| 187 | ### AVX+BROADCAST micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 188 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-minmax-avx-broadcast.c & |
| 189 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-minmax-avx-broadcast.c & |
| 190 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-minmax-avx-broadcast.c & |
| 191 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-minmax-avx-broadcast.c & |
| 192 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-minmax-avx-broadcast.c & |
| 193 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-minmax-avx-broadcast.c & |
| 194 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-minmax-avx-broadcast.c & |
| 195 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-minmax-avx-broadcast.c & |
| 196 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-minmax-avx-broadcast.c & |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 197 | ### FMA3+BROADCAST micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 198 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c & |
| 199 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c & |
| 200 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c & |
| 201 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c & |
| 202 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c & |
| 203 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c & |
| 204 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c & |
| 205 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c & |
| 206 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c & |
| 207 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c & |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 208 | |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 209 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c & |
| 210 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c & |
| 211 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c & |
| 212 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c & |
Marat Dukhan | 2712132 | 2019-12-09 14:57:40 -0800 | [diff] [blame] | 213 | |
Marat Dukhan | 0f349c4 | 2019-11-27 11:58:54 -0800 | [diff] [blame] | 214 | ################################# x86 AVX-512 ################################# |
| 215 | ### AVX512F+BROADCAST micro-kernels |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 216 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c & |
| 217 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c & |
| 218 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c & |
| 219 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c & |
| 220 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c & |
| 221 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c & |
Marat Dukhan | 0f349c4 | 2019-11-27 11:58:54 -0800 | [diff] [blame] | 222 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 223 | ################################## Unit tests ################################# |
Zhi An Ng | 4c1fd6f | 2022-01-10 19:35:06 -0800 | [diff] [blame] | 224 | tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc --output test/f32-igemm-2.cc & |
| 225 | tools/generate-gemm-test.py --spec test/f32-igemm-relu.yaml --output test/f32-igemm-relu.cc --output test/f32-igemm-relu-2.cc & |
| 226 | tools/generate-gemm-test.py --spec test/f32-igemm-minmax.yaml --output test/f32-igemm-minmax.cc --output test/f32-igemm-minmax-2.cc & |
Frank Barchard | aae722a | 2021-08-30 12:37:30 -0700 | [diff] [blame] | 227 | |
| 228 | wait |