blob: bfafb798057cbf841650e400e1b058e30ad953e7 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7#################################### Scalar ###################################
Marat Dukhan436ebe62019-12-04 15:10:12 -08008### Generic C micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -07009tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-scalar.c &
10tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-scalar.c &
11tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-scalar.c &
12tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-scalar.c &
Marat Dukhan163a7e62020-04-09 04:19:26 -070013
Frank Barchardaae722a2021-08-30 12:37:30 -070014tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-scalar.c &
15tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-scalar.c &
16tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-scalar.c &
17tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-scalar.c &
Marat Dukhan467f6362020-05-22 23:21:55 -070018
Frank Barchardaae722a2021-08-30 12:37:30 -070019tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-scalar.c &
20tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-scalar.c &
21tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-scalar.c &
22tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-scalar.c &
Marat Dukhan436ebe62019-12-04 15:10:12 -080023
24### WAsm-specific micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070025tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-wasm.c &
26tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-wasm.c &
27tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-wasm.c &
28tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-wasm.c &
Marat Dukhan163a7e62020-04-09 04:19:26 -070029
Frank Barchardaae722a2021-08-30 12:37:30 -070030tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-wasm.c &
31tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-wasm.c &
32tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-wasm.c &
33tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-wasm.c &
Marat Dukhan467f6362020-05-22 23:21:55 -070034
Frank Barchardaae722a2021-08-30 12:37:30 -070035tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-wasm.c &
36tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-wasm.c &
37tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-wasm.c &
38tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-wasm.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -070039
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070040################################## WAsm SIMD ##################################
41### LOAD1+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070042tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-loadsplat.c &
43tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-loadsplat.c &
44tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-loadsplat.c &
45tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-loadsplat.c &
46tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070047
Frank Barchardaae722a2021-08-30 12:37:30 -070048tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-loadsplat.c &
49tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-loadsplat.c &
50tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-loadsplat.c &
51tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-loadsplat.c &
52tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070053
54### LOAD4+DUPLICATE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070055tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-splat.c &
56tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-splat.c &
57tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-splat.c &
58tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-splat.c &
59tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070060
Frank Barchardaae722a2021-08-30 12:37:30 -070061tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8-relu-wasmsimd-splat.c &
62tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8-relu-wasmsimd-splat.c &
63tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8-relu-wasmsimd-splat.c &
Marat Dukhan802808c2020-06-16 11:01:17 -070064
Frank Barchardaae722a2021-08-30 12:37:30 -070065tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8-wasmsimd-splat.c &
66tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8-wasmsimd-splat.c &
67tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8-wasmsimd-splat.c &
Marat Dukhan802808c2020-06-16 11:01:17 -070068
Frank Barchardaae722a2021-08-30 12:37:30 -070069tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-splat.c &
70tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-splat.c &
71tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-splat.c &
72tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-splat.c &
73tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-splat.c &
Marat Dukhan802808c2020-06-16 11:01:17 -070074
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070075### LOAD4+PERMUTE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070076tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-arm.c &
77tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-arm.c &
78tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-arm.c &
79tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-arm.c &
80tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070081
Frank Barchardaae722a2021-08-30 12:37:30 -070082tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-x86.c &
83tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-x86.c &
84tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-x86.c &
85tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-x86.c &
86tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-x86.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070087
Marat Dukhane39e6462020-07-09 01:33:36 -070088### MRx2 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070089tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-arm.c &
90tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-x86.c &
91tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2c4-relu-wasmsimd.c &
92tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2c4-wasmsimd.c &
Marat Dukhane39e6462020-07-09 01:33:36 -070093
Frank Barchard387c2d12019-12-16 19:14:07 -080094############################### AArch64 assembly ##############################
Frank Barcharde3491242021-06-11 14:04:57 -070095### LD64 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070096tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld64.S &
97tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld64.S &
Frank Barchard79cd5f92021-06-21 17:34:59 -070098
99### LD128 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700100tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld128.S &
101tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld128.S &
Frank Barcharde3491242021-06-11 14:04:57 -0700102
Frank Barchardd2c4d0d2021-06-28 16:46:57 -0700103### Cortex A75 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700104tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S &
105tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
106tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S &
107tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
108tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S &
109tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
110tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S &
111tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
Frank Barchard387c2d12019-12-16 19:14:07 -0800112
Frank Barchard90ce7892020-02-10 23:35:45 -0800113############################### AArch32 assembly ##############################
Frank Barchardaae722a2021-08-30 12:37:30 -0700114tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S &
Frank Barchard78735862022-01-04 16:47:44 -0800115tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-prfm-cortex-a75.S &
Frank Barchardaae722a2021-08-30 12:37:30 -0700116tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a7.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S &
117tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-ld64.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-ld64.S &
Frank Barchard569561d2020-06-17 13:11:12 -0700118
XNNPACK Teamb455b122019-09-27 18:10:33 -0700119################################### ARM NEON ##################################
120### LD64 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700121tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c &
122tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c &
123tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c &
124tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c &
125tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c &
126tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c &
127tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c &
128tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129### LD128 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700130tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c &
131tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c &
132tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c &
133tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c &
Frank Barchard91317c52019-11-22 10:54:35 -0800134### MRx2 micro-kernels-
Frank Barchardaae722a2021-08-30 12:37:30 -0700135tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c &
136tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c &
Frank Barchard5243bb02019-11-22 16:37:50 -0800137### DUP LD64 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700138tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c &
139tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c &
140tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c &
141tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c &
142tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c &
143tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c &
Frank Barchard5243bb02019-11-22 16:37:50 -0800144### DUP LD128 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700145tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c &
146tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c &
147tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c &
148tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c &
Frank Barcharddf06d802019-11-20 15:53:46 -0800149### LOAD4+PERMUTE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700150tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-minmax-neon.c &
151tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-minmax-neonfma.c &
152tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-minmax-neon.c &
153tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-minmax-neonfma.c &
154tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-minmax-neon.c &
155tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-minmax-neonfma.c &
156tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-minmax-neon.c &
157tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-minmax-neonfma.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700158
XNNPACK Teamb455b122019-09-27 18:10:33 -0700159################################### x86 SSE ###################################
160### LOAD1+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700161tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-minmax-sse-load1.c &
162tools/xngen src/f32-igemm/sse-load1.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8-minmax-sse-load1.c &
163tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-minmax-sse-load1.c &
164tools/xngen src/f32-igemm/sse-load1.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8-minmax-sse-load1.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800165
XNNPACK Teamb455b122019-09-27 18:10:33 -0700166### LOAD4+DUPLICATE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700167tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/1x8-minmax-sse-dup.c &
168tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/3x8-minmax-sse-dup.c &
169tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/4x8-minmax-sse-dup.c &
170tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/5x8-minmax-sse-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800171
Frank Barchardaae722a2021-08-30 12:37:30 -0700172tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/1x8-minmax-sse2-dup.c &
173tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/3x8-minmax-sse2-dup.c &
174tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/4x8-minmax-sse2-dup.c &
175tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/5x8-minmax-sse2-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800176
XNNPACK Teamb455b122019-09-27 18:10:33 -0700177### LOAD4+PERMUTE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700178tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-minmax-sse.c &
179tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8s4-minmax-sse.c &
180tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-minmax-sse.c &
181tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8s4-minmax-sse.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800182
XNNPACK Teamb455b122019-09-27 18:10:33 -0700183### MRx2 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700184tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-minmax-sse.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700185
Marat Dukhanfda12b82019-11-21 12:27:59 -0800186################################### x86 AVX ###################################
187### AVX+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700188tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-minmax-avx-broadcast.c &
189tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-minmax-avx-broadcast.c &
190tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-minmax-avx-broadcast.c &
191tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-minmax-avx-broadcast.c &
192tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-minmax-avx-broadcast.c &
193tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-minmax-avx-broadcast.c &
194tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-minmax-avx-broadcast.c &
195tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-minmax-avx-broadcast.c &
196tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-minmax-avx-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800197### FMA3+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700198tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c &
199tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c &
200tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c &
201tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c &
202tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c &
203tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c &
204tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c &
205tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c &
206tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c &
207tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700208
Frank Barchardaae722a2021-08-30 12:37:30 -0700209tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c &
210tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c &
211tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c &
212tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c &
Marat Dukhan27121322019-12-09 14:57:40 -0800213
Marat Dukhan0f349c42019-11-27 11:58:54 -0800214################################# x86 AVX-512 #################################
215### AVX512F+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700216tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c &
217tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c &
218tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c &
219tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c &
220tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c &
221tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800222
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223################################## Unit tests #################################
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800224tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc --output test/f32-igemm-2.cc &
225tools/generate-gemm-test.py --spec test/f32-igemm-relu.yaml --output test/f32-igemm-relu.cc --output test/f32-igemm-relu-2.cc &
226tools/generate-gemm-test.py --spec test/f32-igemm-minmax.yaml --output test/f32-igemm-minmax.cc --output test/f32-igemm-minmax-2.cc &
Frank Barchardaae722a2021-08-30 12:37:30 -0700227
228wait