XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | # Copyright 2019 Google LLC |
| 3 | # |
| 4 | # This source code is licensed under the BSD-style license found in the |
| 5 | # LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | #################################### Scalar ################################### |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 8 | ### Generic C micro-kernels |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 9 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-scalar.c |
| 10 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-scalar.c |
| 11 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-scalar.c |
| 12 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-scalar.c |
| 13 | |
Marat Dukhan | 467f636 | 2020-05-22 23:21:55 -0700 | [diff] [blame] | 14 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-scalar.c |
| 15 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-scalar.c |
| 16 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-scalar.c |
| 17 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-scalar.c |
| 18 | |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 19 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-scalar.c |
| 20 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-scalar.c |
| 21 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-scalar.c |
| 22 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-scalar.c |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 23 | |
| 24 | ### WAsm-specific micro-kernels |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 25 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-wasm.c |
| 26 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-wasm.c |
| 27 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-wasm.c |
| 28 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-wasm.c |
| 29 | |
Marat Dukhan | 467f636 | 2020-05-22 23:21:55 -0700 | [diff] [blame] | 30 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-wasm.c |
| 31 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-wasm.c |
| 32 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-wasm.c |
| 33 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-wasm.c |
| 34 | |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 35 | tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-wasm.c |
| 36 | tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-wasm.c |
| 37 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-wasm.c |
| 38 | tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-wasm.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 39 | |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 40 | ################################## WAsm SIMD ################################## |
| 41 | ### LOAD1+BROADCAST micro-kernels |
Frank Barchard | 0725b8d | 2020-12-07 11:07:35 -0800 | [diff] [blame] | 42 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-loadsplat.c |
| 43 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-loadsplat.c |
| 44 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-loadsplat.c |
| 45 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-loadsplat.c |
| 46 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-loadsplat.c |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 47 | |
Frank Barchard | 0725b8d | 2020-12-07 11:07:35 -0800 | [diff] [blame] | 48 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-loadsplat.c |
| 49 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-loadsplat.c |
| 50 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-loadsplat.c |
| 51 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-loadsplat.c |
| 52 | tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-loadsplat.c |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 53 | |
| 54 | ### LOAD4+DUPLICATE micro-kernels |
Frank Barchard | 0725b8d | 2020-12-07 11:07:35 -0800 | [diff] [blame] | 55 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-splat.c |
| 56 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-splat.c |
| 57 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-splat.c |
| 58 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-splat.c |
| 59 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-splat.c |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 60 | |
Marat Dukhan | 688f6d8 | 2020-07-14 17:02:11 -0700 | [diff] [blame] | 61 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8-relu-wasmsimd-splat.c |
| 62 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8-relu-wasmsimd-splat.c |
| 63 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8-relu-wasmsimd-splat.c |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 64 | |
| 65 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8-wasmsimd-splat.c |
| 66 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8-wasmsimd-splat.c |
| 67 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8-wasmsimd-splat.c |
| 68 | |
Frank Barchard | 0725b8d | 2020-12-07 11:07:35 -0800 | [diff] [blame] | 69 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-splat.c |
| 70 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-splat.c |
| 71 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-splat.c |
| 72 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-splat.c |
| 73 | tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-splat.c |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 74 | |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 75 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 76 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-arm.c |
| 77 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-arm.c |
| 78 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-arm.c |
| 79 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-arm.c |
| 80 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-arm.c |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 81 | |
Marat Dukhan | 802808c | 2020-06-16 11:01:17 -0700 | [diff] [blame] | 82 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-x86.c |
| 83 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-x86.c |
| 84 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-x86.c |
| 85 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-x86.c |
| 86 | tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-x86.c |
Marat Dukhan | 1bbf96b | 2020-06-15 23:01:20 -0700 | [diff] [blame] | 87 | |
Marat Dukhan | e39e646 | 2020-07-09 01:33:36 -0700 | [diff] [blame] | 88 | ### MRx2 micro-kernels |
| 89 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-arm.c |
| 90 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-x86.c |
Marat Dukhan | 688f6d8 | 2020-07-14 17:02:11 -0700 | [diff] [blame] | 91 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2c4-relu-wasmsimd.c |
Marat Dukhan | e39e646 | 2020-07-09 01:33:36 -0700 | [diff] [blame] | 92 | tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2c4-wasmsimd.c |
| 93 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 94 | ############################### AArch64 assembly ############################## |
| 95 | # Cortex A75 / A57 micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 96 | tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a57.S |
| 97 | tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S |
| 98 | tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a57.S |
| 99 | tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S |
| 100 | tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a57.S |
| 101 | tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S |
| 102 | tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a57.S |
| 103 | tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 104 | |
Frank Barchard | 90ce789 | 2020-02-10 23:35:45 -0800 | [diff] [blame] | 105 | ############################### AArch32 assembly ############################## |
Frank Barchard | 490febe | 2020-07-16 18:42:17 -0700 | [diff] [blame] | 106 | tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S |
| 107 | tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-pld-cortex-a75.S |
| 108 | tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a7.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S |
| 109 | tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-ld64.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-ld64.S |
Frank Barchard | 569561d | 2020-06-17 13:11:12 -0700 | [diff] [blame] | 110 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 111 | ################################### ARM NEON ################################## |
| 112 | ### LD64 micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 113 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c |
| 114 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c |
| 115 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c |
| 116 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c |
| 117 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c |
| 118 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c |
| 119 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c |
| 120 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 121 | ### LD128 micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 122 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c |
| 123 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c |
| 124 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c |
| 125 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c |
Frank Barchard | 91317c5 | 2019-11-22 10:54:35 -0800 | [diff] [blame] | 126 | ### MRx2 micro-kernels- |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 127 | tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c |
| 128 | tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 129 | ### DUP LD64 micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 130 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c |
| 131 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c |
| 132 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c |
| 133 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c |
| 134 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c |
| 135 | tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 136 | ### DUP LD128 micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 137 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c |
| 138 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c |
| 139 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c |
| 140 | tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 141 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 142 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-minmax-neon.c |
| 143 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-minmax-neonfma.c |
| 144 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-minmax-neon.c |
| 145 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-minmax-neonfma.c |
| 146 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-minmax-neon.c |
| 147 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-minmax-neonfma.c |
| 148 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-minmax-neon.c |
| 149 | tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-minmax-neonfma.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 150 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 151 | ################################### x86 SSE ################################### |
| 152 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 153 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-minmax-sse-load1.c |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 154 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8-minmax-sse-load1.c |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 155 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-minmax-sse-load1.c |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 156 | tools/xngen src/f32-igemm/sse-load1.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8-minmax-sse-load1.c |
| 157 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 158 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 159 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/1x8-minmax-sse-dup.c |
| 160 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/3x8-minmax-sse-dup.c |
| 161 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/4x8-minmax-sse-dup.c |
| 162 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/5x8-minmax-sse-dup.c |
| 163 | |
| 164 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/1x8-minmax-sse2-dup.c |
| 165 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/3x8-minmax-sse2-dup.c |
| 166 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/4x8-minmax-sse2-dup.c |
| 167 | tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/5x8-minmax-sse2-dup.c |
| 168 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 169 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 170 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-minmax-sse.c |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 171 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8s4-minmax-sse.c |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 172 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-minmax-sse.c |
Marat Dukhan | 802fcae | 2020-12-11 14:37:25 -0800 | [diff] [blame] | 173 | tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8s4-minmax-sse.c |
| 174 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 175 | ### MRx2 micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 176 | tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-minmax-sse.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 177 | |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 178 | ################################### x86 AVX ################################### |
| 179 | ### AVX+BROADCAST micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 180 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-minmax-avx-broadcast.c |
| 181 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-minmax-avx-broadcast.c |
| 182 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-minmax-avx-broadcast.c |
| 183 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-minmax-avx-broadcast.c |
| 184 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-minmax-avx-broadcast.c |
| 185 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-minmax-avx-broadcast.c |
| 186 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-minmax-avx-broadcast.c |
| 187 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-minmax-avx-broadcast.c |
| 188 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-minmax-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 189 | ### FMA3+BROADCAST micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 190 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c |
| 191 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c |
| 192 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c |
| 193 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c |
| 194 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c |
| 195 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c |
| 196 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c |
| 197 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c |
| 198 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c |
| 199 | tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 200 | |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 201 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c |
| 202 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c |
| 203 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c |
| 204 | tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c |
Marat Dukhan | 2712132 | 2019-12-09 14:57:40 -0800 | [diff] [blame] | 205 | |
Marat Dukhan | 0f349c4 | 2019-11-27 11:58:54 -0800 | [diff] [blame] | 206 | ################################# x86 AVX-512 ################################# |
| 207 | ### AVX512F+BROADCAST micro-kernels |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 208 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c |
| 209 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c |
| 210 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c |
| 211 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c |
| 212 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c |
| 213 | tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c |
Marat Dukhan | 0f349c4 | 2019-11-27 11:58:54 -0800 | [diff] [blame] | 214 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 215 | ################################## Unit tests ################################# |
Marat Dukhan | 163a7e6 | 2020-04-09 04:19:26 -0700 | [diff] [blame] | 216 | tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc |
Marat Dukhan | 467f636 | 2020-05-22 23:21:55 -0700 | [diff] [blame] | 217 | tools/generate-gemm-test.py --spec test/f32-igemm-relu.yaml --output test/f32-igemm-relu.cc |
Marat Dukhan | 1c58711 | 2020-04-08 20:04:28 -0700 | [diff] [blame] | 218 | tools/generate-gemm-test.py --spec test/f32-igemm-minmax.yaml --output test/f32-igemm-minmax.cc |