XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | # Copyright 2019 Google LLC |
| 3 | # |
| 4 | # This source code is licensed under the BSD-style license found in the |
| 5 | # LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | #################################### Scalar ################################### |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 8 | ### Generic C micro-kernels |
| 9 | tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/1x4-scalar.c |
| 10 | tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/1x4-scalar.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 11 | |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 12 | tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/2x4-scalar.c |
| 13 | tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/2x4-scalar.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 14 | |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 15 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/4x2-scalar.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 16 | |
Marat Dukhan | 436ebe6 | 2019-12-04 15:10:12 -0800 | [diff] [blame] | 17 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/4x4-scalar.c |
| 18 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/4x4-scalar.c |
| 19 | |
| 20 | ### WAsm-specific micro-kernels |
| 21 | tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/1x4-wasm.c |
| 22 | tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=1 -o src/f32-gemm/gen-inc/1x4-wasm.c |
| 23 | |
| 24 | tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/2x4-wasm.c |
| 25 | tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=1 -o src/f32-gemm/gen-inc/2x4-wasm.c |
| 26 | |
| 27 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/4x2-wasm.c |
| 28 | |
| 29 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/4x4-wasm.c |
| 30 | tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D INC=1 -o src/f32-gemm/gen-inc/4x4-wasm.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 31 | |
| 32 | ############################### AArch64 assembly ############################## |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 33 | tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x12-aarch64-neonfma-cortex-a53.S |
| 34 | tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x12-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 35 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 36 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a53.S |
| 37 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 38 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 39 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a57.S |
| 40 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a57.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 41 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 42 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a75.S |
| 43 | tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 44 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 45 | tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x12-aarch64-neonfma-cortex-a53.S |
| 46 | tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x12-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 47 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 48 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a53.S |
| 49 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 50 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 51 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a57.S |
| 52 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a57.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 53 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 54 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a75.S |
| 55 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 56 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 57 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-ld128.S |
| 58 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld128.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 59 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 60 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-ld64.S |
| 61 | tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld64.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 62 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 63 | tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a57.S |
| 64 | tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a57.S |
| 65 | |
| 66 | tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a75.S |
| 67 | tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 68 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 69 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a53.S |
| 70 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a53.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 71 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 72 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a73.S |
| 73 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a73.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 74 | |
Frank Barchard | 387c2d1 | 2019-12-16 19:14:07 -0800 | [diff] [blame] | 75 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a57.S |
| 76 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a57.S |
| 77 | |
| 78 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a75.S |
| 79 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a75.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 80 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 81 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S |
| 82 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld64.S |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 83 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 84 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-ld128.S |
| 85 | tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld128.S |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 86 | |
Frank Barchard | abf8154 | 2019-12-13 16:18:30 -0800 | [diff] [blame] | 87 | ############################### AArch32 assembly ############################## |
| 88 | tools/xngen src/f32-gemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/4x8-aarch32-neon-cortex-a75.S |
| 89 | tools/xngen src/f32-gemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/4x8-aarch32-neon-pld-cortex-a75.S |
| 90 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 91 | ################################### ARM NEON ################################## |
| 92 | ### LD64 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 93 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-neon-lane-ld64.c |
| 94 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8-neon-lane-ld64.c |
| 95 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-neonfma-lane-ld64.c |
| 96 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8-neonfma-lane-ld64.c |
| 97 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neon-lane-ld64.c |
| 98 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neon-lane-ld64.c |
| 99 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neonfma-lane-ld64.c |
| 100 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neonfma-lane-ld64.c |
| 101 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-neon-lane-ld64.c |
| 102 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8-neon-lane-ld64.c |
| 103 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-neonfma-lane-ld64.c |
| 104 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8-neonfma-lane-ld64.c |
| 105 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neon-lane-ld64.c |
| 106 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neon-lane-ld64.c |
| 107 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neonfma-lane-ld64.c |
| 108 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neonfma-lane-ld64.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 109 | ### LD128 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 110 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neon-lane-ld128.c |
| 111 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neon-lane-ld128.c |
| 112 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neonfma-lane-ld128.c |
| 113 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neonfma-lane-ld128.c |
Frank Barchard | 69172d9 | 2019-11-26 16:22:39 -0800 | [diff] [blame] | 114 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neon-lane-ld128.c |
| 115 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neon-lane-ld128.c |
| 116 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neonfma-lane-ld128.c |
| 117 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neonfma-lane-ld128.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 118 | ### MRx2 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 119 | tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-neon-lane-ld64.c |
| 120 | tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-neonfma-lane-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 121 | ### DUP LD64 micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 122 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-neon-dup-ld64.c |
| 123 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8-neon-dup-ld64.c |
| 124 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-neonfma-dup-ld64.c |
| 125 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8-neonfma-dup-ld64.c |
| 126 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neon-dup-ld64.c |
| 127 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neon-dup-ld64.c |
| 128 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neonfma-dup-ld64.c |
| 129 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neonfma-dup-ld64.c |
| 130 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neon-dup-ld64.c |
| 131 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neon-dup-ld64.c |
| 132 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neonfma-dup-ld64.c |
| 133 | tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neonfma-dup-ld64.c |
Frank Barchard | 5243bb0 | 2019-11-22 16:37:50 -0800 | [diff] [blame] | 134 | ### DUP LD128 midupkernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 135 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neon-dup-ld128.c |
| 136 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neon-dup-ld128.c |
| 137 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neonfma-dup-ld128.c |
| 138 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neonfma-dup-ld128.c |
Frank Barchard | 69172d9 | 2019-11-26 16:22:39 -0800 | [diff] [blame] | 139 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neon-dup-ld128.c |
| 140 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neon-dup-ld128.c |
| 141 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neonfma-dup-ld128.c |
| 142 | tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neonfma-dup-ld128.c |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 143 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 144 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8s4-neon.c |
| 145 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-neon.c |
| 146 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/1x8s4-neonfma.c |
| 147 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-neonfma.c |
| 148 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8s4-neon.c |
| 149 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-neon.c |
| 150 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/4x8s4-neonfma.c |
| 151 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-neonfma.c |
| 152 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8s4-neon.c |
| 153 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-neon.c |
| 154 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/6x8s4-neonfma.c |
| 155 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-neonfma.c |
| 156 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/8x8s4-neon.c |
| 157 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4-neon.c |
| 158 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/8x8s4-neonfma.c |
| 159 | tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4-neonfma.c |
Frank Barchard | df06d80 | 2019-11-20 15:53:46 -0800 | [diff] [blame] | 160 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 161 | #################################### PSIMD #################################### |
| 162 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 163 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-psimd-loadsplat.c |
| 164 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-psimd-loadsplat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 165 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 166 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-psimd-loadsplat.c |
| 167 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-psimd-loadsplat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 168 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 169 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8-psimd-loadsplat.c |
| 170 | tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8-psimd-loadsplat.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 171 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 172 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-psimd-splat.c |
| 173 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-psimd-splat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 174 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 175 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-psimd-splat.c |
| 176 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-psimd-splat.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 177 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 178 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8-psimd-splat.c |
| 179 | tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8-psimd-splat.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 180 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 181 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-psimd.c |
| 182 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-psimd.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 183 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 184 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-psimd.c |
| 185 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-psimd.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 186 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 187 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8s4-psimd.c |
| 188 | tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-psimd.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 189 | |
| 190 | ################################### x86 SSE ################################### |
| 191 | ### LOAD1+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 192 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-sse-load1.c |
| 193 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-sse-load1.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 194 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 195 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-sse-load1.c |
| 196 | tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-sse-load1.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 197 | ### LOAD4+DUPLICATE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 198 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-sse-dup.c |
| 199 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-sse-dup.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 200 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 201 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-sse-dup.c |
| 202 | tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-sse-dup.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 203 | ### LOAD4+PERMUTE micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 204 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-sse.c |
| 205 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-sse.c |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 206 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 207 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-sse.c |
| 208 | tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-sse.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 209 | |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 210 | ################################### x86 AVX ################################### |
| 211 | ### AVX+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 212 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8-avx-broadcast.c |
| 213 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 214 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 215 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8-avx-broadcast.c |
| 216 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 217 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 218 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/5x8-avx-broadcast.c |
| 219 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/5x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 220 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 221 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8-avx-broadcast.c |
| 222 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 223 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 224 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/7x8-avx-broadcast.c |
| 225 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/7x8-avx-broadcast.c |
Marat Dukhan | eccfd71 | 2019-12-08 16:49:27 -0800 | [diff] [blame] | 226 | |
| 227 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x16-avx-broadcast.c |
| 228 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x16-avx-broadcast.c |
| 229 | |
| 230 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/3x16-avx-broadcast.c |
| 231 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/3x16-avx-broadcast.c |
| 232 | |
| 233 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x16-avx-broadcast.c |
| 234 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x16-avx-broadcast.c |
| 235 | |
| 236 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/5x16-avx-broadcast.c |
| 237 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/5x16-avx-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 238 | ### FMA3+BROADCAST micro-kernels |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 239 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x8-fma3-broadcast.c |
| 240 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 241 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 242 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x8-fma3-broadcast.c |
| 243 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 244 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 245 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x8-fma3-broadcast.c |
| 246 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 247 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 248 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/6x8-fma3-broadcast.c |
| 249 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/6x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 250 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 251 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/7x8-fma3-broadcast.c |
| 252 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/7x8-fma3-broadcast.c |
Marat Dukhan | fda12b8 | 2019-11-21 12:27:59 -0800 | [diff] [blame] | 253 | |
Marat Dukhan | 40a672f | 2019-11-25 03:08:22 -0800 | [diff] [blame] | 254 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/8x8-fma3-broadcast.c |
| 255 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/8x8-fma3-broadcast.c |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 256 | |
Marat Dukhan | eccfd71 | 2019-12-08 16:49:27 -0800 | [diff] [blame] | 257 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x16-fma3-broadcast.c |
| 258 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x16-fma3-broadcast.c |
| 259 | |
| 260 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/3x16-fma3-broadcast.c |
| 261 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/3x16-fma3-broadcast.c |
| 262 | |
| 263 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x16-fma3-broadcast.c |
| 264 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x16-fma3-broadcast.c |
| 265 | |
| 266 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x16-fma3-broadcast.c |
| 267 | tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x16-fma3-broadcast.c |
| 268 | |
Ashkan Aliabadi | d94b856 | 2019-12-10 11:33:51 -0800 | [diff] [blame] | 269 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x16s4-fma3-broadcast.c |
| 270 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x16s4-fma3-broadcast.c |
Marat Dukhan | 2712132 | 2019-12-09 14:57:40 -0800 | [diff] [blame] | 271 | |
Ashkan Aliabadi | d94b856 | 2019-12-10 11:33:51 -0800 | [diff] [blame] | 272 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/3x16s4-fma3-broadcast.c |
| 273 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/3x16s4-fma3-broadcast.c |
Marat Dukhan | 2712132 | 2019-12-09 14:57:40 -0800 | [diff] [blame] | 274 | |
Ashkan Aliabadi | d94b856 | 2019-12-10 11:33:51 -0800 | [diff] [blame] | 275 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x16s4-fma3-broadcast.c |
| 276 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x16s4-fma3-broadcast.c |
Marat Dukhan | 2712132 | 2019-12-09 14:57:40 -0800 | [diff] [blame] | 277 | |
Ashkan Aliabadi | d94b856 | 2019-12-10 11:33:51 -0800 | [diff] [blame] | 278 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x16s4-fma3-broadcast.c |
| 279 | tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x16s4-fma3-broadcast.c |
Marat Dukhan | 2712132 | 2019-12-09 14:57:40 -0800 | [diff] [blame] | 280 | |
Marat Dukhan | 0f349c4 | 2019-11-27 11:58:54 -0800 | [diff] [blame] | 281 | ################################# x86 AVX-512 ################################# |
| 282 | ### AVX512F+BROADCAST micro-kernels |
| 283 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -D INC=0 -o src/f32-gemm/gen/1x16-avx512f-broadcast.c |
| 284 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/1x16-avx512f-broadcast.c |
| 285 | |
| 286 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -D INC=0 -o src/f32-gemm/gen/4x16-avx512f-broadcast.c |
| 287 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/4x16-avx512f-broadcast.c |
| 288 | |
| 289 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -D INC=0 -o src/f32-gemm/gen/5x16-avx512f-broadcast.c |
| 290 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/5x16-avx512f-broadcast.c |
| 291 | |
| 292 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -D INC=0 -o src/f32-gemm/gen/6x16-avx512f-broadcast.c |
| 293 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/6x16-avx512f-broadcast.c |
| 294 | |
| 295 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -D INC=0 -o src/f32-gemm/gen/7x16-avx512f-broadcast.c |
| 296 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/7x16-avx512f-broadcast.c |
| 297 | |
| 298 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -D INC=0 -o src/f32-gemm/gen/8x16-avx512f-broadcast.c |
| 299 | tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/8x16-avx512f-broadcast.c |
| 300 | |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 301 | ################################## Unit tests ################################# |
| 302 | tools/generate-gemm-test.py --spec test/f32-gemm.yaml --output test/f32-gemm.cc |
Marat Dukhan | ba7c3bb | 2019-10-27 19:53:54 -0700 | [diff] [blame] | 303 | tools/generate-gemm-test.py --spec test/f32-gemminc.yaml --output test/f32-gemminc.cc |