blob: 3517075e729c7035970c7b789af891d86da44c46 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7#################################### Scalar ###################################
Marat Dukhan436ebe62019-12-04 15:10:12 -08008### Generic C micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -07009tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/1x4-scalar.c &
10tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/1x4-relu-scalar.c &
11tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x4-minmax-scalar.c &
12tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x4inc-minmax-scalar.c &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070013
Frank Barchardaae722a2021-08-30 12:37:30 -070014tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/2x4-scalar.c &
15tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/2x4-relu-scalar.c &
16tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/2x4-minmax-scalar.c &
17tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/2x4inc-minmax-scalar.c &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070018
Frank Barchardaae722a2021-08-30 12:37:30 -070019tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/4x2-scalar.c &
20tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/4x2-relu-scalar.c &
21tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x2-minmax-scalar.c &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070022
Frank Barchardaae722a2021-08-30 12:37:30 -070023tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/4x4-scalar.c &
24tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/4x4-relu-scalar.c &
25tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x4-minmax-scalar.c &
26tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x4inc-minmax-scalar.c &
Marat Dukhan436ebe62019-12-04 15:10:12 -080027
28### WAsm-specific micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -070029tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/1x4-wasm.c &
30tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/1x4-relu-wasm.c &
31tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x4-minmax-wasm.c &
32tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x4inc-minmax-wasm.c &
Marat Dukhan436ebe62019-12-04 15:10:12 -080033
Frank Barchardaae722a2021-08-30 12:37:30 -070034tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/2x4-wasm.c &
35tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/2x4-relu-wasm.c &
36tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/2x4-minmax-wasm.c &
37tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/2x4inc-minmax-wasm.c &
Marat Dukhan436ebe62019-12-04 15:10:12 -080038
Frank Barchardaae722a2021-08-30 12:37:30 -070039tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/4x2-wasm.c &
40tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/4x2-relu-wasm.c &
41tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x2-minmax-wasm.c &
Marat Dukhan436ebe62019-12-04 15:10:12 -080042
Frank Barchardaae722a2021-08-30 12:37:30 -070043tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/4x4-wasm.c &
44tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/4x4-relu-wasm.c &
45tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x4-minmax-wasm.c &
46tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x4inc-minmax-wasm.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -070047
48############################### AArch64 assembly ##############################
Frank Barchardaae722a2021-08-30 12:37:30 -070049tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x12-minmax-aarch64-neonfma-cortex-a53.S &
50tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x12inc-minmax-aarch64-neonfma-cortex-a53.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070051
Frank Barchardaae722a2021-08-30 12:37:30 -070052tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a53.S &
53tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a53.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070054
Frank Barchardaae722a2021-08-30 12:37:30 -070055tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S &
56tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a75.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070057
Frank Barchardaae722a2021-08-30 12:37:30 -070058tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
59tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-prfm-cortex-a75.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070060
Frank Barchardaae722a2021-08-30 12:37:30 -070061tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x12-minmax-aarch64-neonfma-cortex-a53.S &
62tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x12inc-minmax-aarch64-neonfma-cortex-a53.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070063
Frank Barchardaae722a2021-08-30 12:37:30 -070064tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a53.S &
65tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a53.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070066
Frank Barchardaae722a2021-08-30 12:37:30 -070067tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a55.S.in -D INC=0 -o src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a55.S &
68tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a55.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a55.S &
Frank Barchard8fb90552020-03-16 11:36:09 -070069
Frank Barchardaae722a2021-08-30 12:37:30 -070070tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S &
71tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a75.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070072
Frank Barchardaae722a2021-08-30 12:37:30 -070073tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
74tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-prfm-cortex-a75.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070075
Frank Barchardaae722a2021-08-30 12:37:30 -070076tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-ld128.S &
77tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-ld128.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070078
Frank Barchardaae722a2021-08-30 12:37:30 -070079tools/xngen src/f32-gemm/1x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-ld64.S &
80tools/xngen src/f32-gemm/1x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-ld64.S &
Frank Barchard3cb54f92020-04-10 10:46:08 -070081
Frank Barchardaae722a2021-08-30 12:37:30 -070082tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-ld64.S &
83tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-ld64.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070084
Frank Barchardaae722a2021-08-30 12:37:30 -070085tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S &
86tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/5x8inc-minmax-aarch64-neonfma-cortex-a75.S &
Frank Barchard387c2d12019-12-16 19:14:07 -080087
Frank Barchardaae722a2021-08-30 12:37:30 -070088tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/5x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
89tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/5x8inc-minmax-aarch64-neonfma-prfm-cortex-a75.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070090
Frank Barchardaae722a2021-08-30 12:37:30 -070091tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a53.S &
92tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a53.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070093
Frank Barchardaae722a2021-08-30 12:37:30 -070094tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a55.S.in -D INC=0 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a55.S &
95tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a55.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a55.S &
Frank Barchard91e19992020-03-09 18:46:14 -070096
Frank Barchardaae722a2021-08-30 12:37:30 -070097tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=0 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a73.S &
98tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a73.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070099
Frank Barchardaae722a2021-08-30 12:37:30 -0700100tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S &
101tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=0 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a75.S &
Frank Barchard387c2d12019-12-16 19:14:07 -0800102
Frank Barchardaae722a2021-08-30 12:37:30 -0700103tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a75.S &
104tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -D PREFETCH=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-prfm-cortex-a75.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700105
Frank Barchardaae722a2021-08-30 12:37:30 -0700106tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld64.S &
107tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ld64.S &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700108
Frank Barchardaae722a2021-08-30 12:37:30 -0700109tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld128.S &
110tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ld128.S &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700111
Frank Barchardabf81542019-12-13 16:18:30 -0800112############################### AArch32 assembly ##############################
Frank Barchardaae722a2021-08-30 12:37:30 -0700113tools/xngen src/f32-gemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S &
Frank Barchard78735862022-01-04 16:47:44 -0800114tools/xngen src/f32-gemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/4x8-minmax-aarch32-neon-prfm-cortex-a75.S &
Frank Barchardaae722a2021-08-30 12:37:30 -0700115tools/xngen src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a7.S.in -D INC=0 -D PREFETCH=1 -o src/f32-gemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S &
116tools/xngen src/f32-gemm/4x8-minmax-aarch32-neon-ld64.S.in -D INC=0 -D PREFETCH=0 -o src/f32-gemm/gen/4x8-minmax-aarch32-neon-ld64.S &
Frank Barchard569561d2020-06-17 13:11:12 -0700117
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118################################### ARM NEON ##################################
119### LD64 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700120tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-minmax-neon-lane-ld64.c &
121tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8inc-minmax-neon-lane-ld64.c &
122tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-minmax-neonfma-lane-ld64.c &
123tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8inc-minmax-neonfma-lane-ld64.c &
124tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-minmax-neon-lane-ld64.c &
125tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8inc-minmax-neon-lane-ld64.c &
126tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-minmax-neonfma-lane-ld64.c &
127tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-lane-ld64.c &
128tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-minmax-neon-lane-ld64.c &
129tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8inc-minmax-neon-lane-ld64.c &
130tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-minmax-neonfma-lane-ld64.c &
131tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8inc-minmax-neonfma-lane-ld64.c &
132tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-minmax-neon-lane-ld64.c &
133tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8inc-minmax-neon-lane-ld64.c &
134tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-minmax-neonfma-lane-ld64.c &
135tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-lane-ld64.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700136### LD128 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700137tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-minmax-neon-lane-ld128.c &
138tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8inc-minmax-neon-lane-ld128.c &
139tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-minmax-neonfma-lane-ld128.c &
140tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-lane-ld128.c &
141tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-minmax-neon-lane-ld128.c &
142tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8inc-minmax-neon-lane-ld128.c &
143tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-minmax-neonfma-lane-ld128.c &
144tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-lane-ld128.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700145### MRx2 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700146tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-minmax-neon-lane-ld64.c &
147tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-minmax-neonfma-lane-ld64.c &
Frank Barchard5243bb02019-11-22 16:37:50 -0800148### DUP LD64 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700149tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-minmax-neon-dup-ld64.c &
150tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-neon-dup-ld64.c &
151tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-minmax-neonfma-dup-ld64.c &
152tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-neonfma-dup-ld64.c &
153tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-minmax-neon-dup-ld64.c &
154tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-neon-dup-ld64.c &
155tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld64.c &
156tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-dup-ld64.c &
157tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-minmax-neon-dup-ld64.c &
158tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-neon-dup-ld64.c &
159tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-minmax-neonfma-dup-ld64.c &
160tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-dup-ld64.c &
Frank Barchard5243bb02019-11-22 16:37:50 -0800161### DUP LD128 midupkernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700162tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-minmax-neon-dup-ld128.c &
163tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-neon-dup-ld128.c &
164tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld128.c &
165tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-dup-ld128.c &
166tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-minmax-neon-dup-ld128.c &
167tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-neon-dup-ld128.c &
168tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-minmax-neonfma-dup-ld128.c &
169tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-dup-ld128.c &
Frank Barcharddf06d802019-11-20 15:53:46 -0800170### LOAD4+PERMUTE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700171tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8s4-minmax-neon.c &
172tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4inc-minmax-neon.c &
173tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/1x8s4-minmax-neonfma.c &
174tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4inc-minmax-neonfma.c &
175tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8s4-minmax-neon.c &
176tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4inc-minmax-neon.c &
177tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/4x8s4-minmax-neonfma.c &
178tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4inc-minmax-neonfma.c &
179tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8s4-minmax-neon.c &
180tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4inc-minmax-neon.c &
181tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/6x8s4-minmax-neonfma.c &
182tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4inc-minmax-neonfma.c &
183tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/8x8s4-minmax-neon.c &
184tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4inc-minmax-neon.c &
185tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/8x8s4-minmax-neonfma.c &
186tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4inc-minmax-neonfma.c &
Frank Barcharddf06d802019-11-20 15:53:46 -0800187
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700188################################## WAsm SIMD ##################################
189### LOAD1+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700190tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8-minmax-wasmsimd-arm-loadsplat.c &
191tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8inc-minmax-wasmsimd-arm-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700192
Frank Barchardaae722a2021-08-30 12:37:30 -0700193tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/3x8-minmax-wasmsimd-arm-loadsplat.c &
194tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/3x8inc-minmax-wasmsimd-arm-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700195
Frank Barchardaae722a2021-08-30 12:37:30 -0700196tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x8-minmax-wasmsimd-arm-loadsplat.c &
197tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x8inc-minmax-wasmsimd-arm-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700198
Frank Barchardaae722a2021-08-30 12:37:30 -0700199tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/5x8-minmax-wasmsimd-arm-loadsplat.c &
200tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/5x8inc-minmax-wasmsimd-arm-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700201
Frank Barchardaae722a2021-08-30 12:37:30 -0700202tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/6x8-minmax-wasmsimd-arm-loadsplat.c &
203tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/6x8inc-minmax-wasmsimd-arm-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700204
Frank Barchardaae722a2021-08-30 12:37:30 -0700205tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8-minmax-wasmsimd-x86-loadsplat.c &
206tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8inc-minmax-wasmsimd-x86-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700207
Frank Barchardaae722a2021-08-30 12:37:30 -0700208tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/3x8-minmax-wasmsimd-x86-loadsplat.c &
209tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/3x8inc-minmax-wasmsimd-x86-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700210
Frank Barchardaae722a2021-08-30 12:37:30 -0700211tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x8-minmax-wasmsimd-x86-loadsplat.c &
212tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x8inc-minmax-wasmsimd-x86-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700213
Frank Barchardaae722a2021-08-30 12:37:30 -0700214tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/5x8-minmax-wasmsimd-x86-loadsplat.c &
215tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/5x8inc-minmax-wasmsimd-x86-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700216
Frank Barchardaae722a2021-08-30 12:37:30 -0700217tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/6x8-minmax-wasmsimd-x86-loadsplat.c &
218tools/xngen src/f32-gemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/6x8inc-minmax-wasmsimd-x86-loadsplat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700219### LOAD4+DUPLICATE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700220tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8-minmax-wasmsimd-arm-splat.c &
221tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8inc-minmax-wasmsimd-arm-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700222
Frank Barchardaae722a2021-08-30 12:37:30 -0700223tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/3x8-minmax-wasmsimd-arm-splat.c &
224tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/3x8inc-minmax-wasmsimd-arm-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700225
Frank Barchardaae722a2021-08-30 12:37:30 -0700226tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x8-minmax-wasmsimd-arm-splat.c &
227tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x8inc-minmax-wasmsimd-arm-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700228
Frank Barchardaae722a2021-08-30 12:37:30 -0700229tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/5x8-minmax-wasmsimd-arm-splat.c &
230tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/5x8inc-minmax-wasmsimd-arm-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700231
Frank Barchardaae722a2021-08-30 12:37:30 -0700232tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/6x8-minmax-wasmsimd-arm-splat.c &
233tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/6x8inc-minmax-wasmsimd-arm-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700234
Frank Barchardaae722a2021-08-30 12:37:30 -0700235tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/1x8-relu-wasmsimd-splat.c &
236tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/4x8-relu-wasmsimd-splat.c &
237tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/5x8-relu-wasmsimd-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700238
Frank Barchardaae722a2021-08-30 12:37:30 -0700239tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/1x8-wasmsimd-splat.c &
240tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/4x8-wasmsimd-splat.c &
241tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/5x8-wasmsimd-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700242
Frank Barchardaae722a2021-08-30 12:37:30 -0700243tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8-minmax-wasmsimd-x86-splat.c &
244tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8inc-minmax-wasmsimd-x86-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700245
Frank Barchardaae722a2021-08-30 12:37:30 -0700246tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/3x8-minmax-wasmsimd-x86-splat.c &
247tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/3x8inc-minmax-wasmsimd-x86-splat.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700248
Frank Barchardaae722a2021-08-30 12:37:30 -0700249tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x8-minmax-wasmsimd-x86-splat.c &
250tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x8inc-minmax-wasmsimd-x86-splat.c &
Marat Dukhan802808c2020-06-16 11:01:17 -0700251
Frank Barchardaae722a2021-08-30 12:37:30 -0700252tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/5x8-minmax-wasmsimd-x86-splat.c &
253tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/5x8inc-minmax-wasmsimd-x86-splat.c &
Marat Dukhan802808c2020-06-16 11:01:17 -0700254
Frank Barchardaae722a2021-08-30 12:37:30 -0700255tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/6x8-minmax-wasmsimd-x86-splat.c &
256tools/xngen src/f32-gemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/6x8inc-minmax-wasmsimd-x86-splat.c &
Marat Dukhan802808c2020-06-16 11:01:17 -0700257
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700258### LOAD4+PERMUTE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700259tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8s4-minmax-wasmsimd-arm.c &
260tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8s4inc-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700261
Frank Barchardaae722a2021-08-30 12:37:30 -0700262tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/3x8s4-minmax-wasmsimd-arm.c &
263tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/3x8s4inc-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700264
Frank Barchardaae722a2021-08-30 12:37:30 -0700265tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x8s4-minmax-wasmsimd-arm.c &
266tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x8s4inc-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700267
Frank Barchardaae722a2021-08-30 12:37:30 -0700268tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/5x8s4-minmax-wasmsimd-arm.c &
269tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/5x8s4inc-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700270
Frank Barchardaae722a2021-08-30 12:37:30 -0700271tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/6x8s4-minmax-wasmsimd-arm.c &
272tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/6x8s4inc-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700273
Frank Barchardaae722a2021-08-30 12:37:30 -0700274tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8s4-minmax-wasmsimd-arm.c &
275tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8s4inc-minmax-wasmsimd-arm.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700276
Frank Barchardaae722a2021-08-30 12:37:30 -0700277tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/1x8s4-minmax-wasmsimd-x86.c &
278tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/1x8s4inc-minmax-wasmsimd-x86.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700279
Frank Barchardaae722a2021-08-30 12:37:30 -0700280tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/3x8s4-minmax-wasmsimd-x86.c &
281tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/3x8s4inc-minmax-wasmsimd-x86.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700282
Frank Barchardaae722a2021-08-30 12:37:30 -0700283tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x8s4-minmax-wasmsimd-x86.c &
284tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/4x8s4inc-minmax-wasmsimd-x86.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700285
Frank Barchardaae722a2021-08-30 12:37:30 -0700286tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/5x8s4-minmax-wasmsimd-x86.c &
287tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/5x8s4inc-minmax-wasmsimd-x86.c &
Marat Dukhan802808c2020-06-16 11:01:17 -0700288
Frank Barchardaae722a2021-08-30 12:37:30 -0700289tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=1 -D INC=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/6x8s4-minmax-wasmsimd-x86.c &
290tools/xngen src/f32-gemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=1 -D INC=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen-inc/6x8s4inc-minmax-wasmsimd-x86.c &
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700291
Marat Dukhane39e6462020-07-09 01:33:36 -0700292### MRx2 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700293tools/xngen src/f32-gemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x2c4-minmax-wasmsimd-arm.c &
294tools/xngen src/f32-gemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-gemm/gen/4x2c4-minmax-wasmsimd-x86.c &
295tools/xngen src/f32-gemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=RELU -o src/f32-gemm/gen/4x2c4-relu-wasmsimd.c &
296tools/xngen src/f32-gemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-gemm/gen/4x2c4-wasmsimd.c &
Marat Dukhane39e6462020-07-09 01:33:36 -0700297
XNNPACK Teamb455b122019-09-27 18:10:33 -0700298################################### x86 SSE ###################################
299### LOAD1+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700300tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-minmax-sse-load1.c &
301tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-sse-load1.c &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700302
Frank Barchardaae722a2021-08-30 12:37:30 -0700303tools/xngen src/f32-gemm/sse-load1.c.in -D MR=3 -D NR=8 -D INC=0 -o src/f32-gemm/gen/3x8-minmax-sse-load1.c &
304tools/xngen src/f32-gemm/sse-load1.c.in -D MR=3 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/3x8inc-minmax-sse-load1.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800305
Frank Barchardaae722a2021-08-30 12:37:30 -0700306tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-minmax-sse-load1.c &
307tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-sse-load1.c &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700308
Frank Barchardaae722a2021-08-30 12:37:30 -0700309tools/xngen src/f32-gemm/sse-load1.c.in -D MR=5 -D NR=8 -D INC=0 -o src/f32-gemm/gen/5x8-minmax-sse-load1.c &
310tools/xngen src/f32-gemm/sse-load1.c.in -D MR=5 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/5x8inc-minmax-sse-load1.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800311
312### LOAD4+DUPLICATE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700313tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=0 -D SSE=1 -o src/f32-gemm/gen/1x8-minmax-sse-dup.c &
314tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=1 -D SSE=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-sse-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800315
Frank Barchardaae722a2021-08-30 12:37:30 -0700316tools/xngen src/f32-gemm/sse-dup.c.in -D MR=3 -D NR=8 -D INC=0 -D SSE=1 -o src/f32-gemm/gen/3x8-minmax-sse-dup.c &
317tools/xngen src/f32-gemm/sse-dup.c.in -D MR=3 -D NR=8 -D INC=1 -D SSE=1 -o src/f32-gemm/gen-inc/3x8inc-minmax-sse-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800318
Frank Barchardaae722a2021-08-30 12:37:30 -0700319tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=0 -D SSE=1 -o src/f32-gemm/gen/4x8-minmax-sse-dup.c &
320tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=1 -D SSE=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-sse-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800321
Frank Barchardaae722a2021-08-30 12:37:30 -0700322tools/xngen src/f32-gemm/sse-dup.c.in -D MR=5 -D NR=8 -D INC=0 -D SSE=1 -o src/f32-gemm/gen/5x8-minmax-sse-dup.c &
323tools/xngen src/f32-gemm/sse-dup.c.in -D MR=5 -D NR=8 -D INC=1 -D SSE=1 -o src/f32-gemm/gen-inc/5x8inc-minmax-sse-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800324
Frank Barchardaae722a2021-08-30 12:37:30 -0700325tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=0 -D SSE=2 -o src/f32-gemm/gen/1x8-minmax-sse2-dup.c &
326tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=1 -D SSE=2 -o src/f32-gemm/gen-inc/1x8inc-minmax-sse2-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800327
Frank Barchardaae722a2021-08-30 12:37:30 -0700328tools/xngen src/f32-gemm/sse-dup.c.in -D MR=3 -D NR=8 -D INC=0 -D SSE=2 -o src/f32-gemm/gen/3x8-minmax-sse2-dup.c &
329tools/xngen src/f32-gemm/sse-dup.c.in -D MR=3 -D NR=8 -D INC=1 -D SSE=2 -o src/f32-gemm/gen-inc/3x8inc-minmax-sse2-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800330
Frank Barchardaae722a2021-08-30 12:37:30 -0700331tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=0 -D SSE=2 -o src/f32-gemm/gen/4x8-minmax-sse2-dup.c &
332tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=1 -D SSE=2 -o src/f32-gemm/gen-inc/4x8inc-minmax-sse2-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800333
Frank Barchardaae722a2021-08-30 12:37:30 -0700334tools/xngen src/f32-gemm/sse-dup.c.in -D MR=5 -D NR=8 -D INC=0 -D SSE=2 -o src/f32-gemm/gen/5x8-minmax-sse2-dup.c &
335tools/xngen src/f32-gemm/sse-dup.c.in -D MR=5 -D NR=8 -D INC=1 -D SSE=2 -o src/f32-gemm/gen-inc/5x8inc-minmax-sse2-dup.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800336
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337### LOAD4+PERMUTE micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700338tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-minmax-sse.c &
339tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4inc-minmax-sse.c &
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700340
Frank Barchardaae722a2021-08-30 12:37:30 -0700341tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=3 -D NR=8 -D INC=0 -o src/f32-gemm/gen/3x8s4-minmax-sse.c &
342tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=3 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/3x8s4inc-minmax-sse.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800343
Frank Barchardaae722a2021-08-30 12:37:30 -0700344tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-minmax-sse.c &
345tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4inc-minmax-sse.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800346
Frank Barchardaae722a2021-08-30 12:37:30 -0700347tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=5 -D NR=8 -D INC=0 -o src/f32-gemm/gen/5x8s4-minmax-sse.c &
348tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=5 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/5x8s4inc-minmax-sse.c &
Marat Dukhan802fcae2020-12-11 14:37:25 -0800349
Marat Dukhanb00004d2020-02-13 09:28:35 -0800350### MRx2 micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700351tools/xngen src/f32-gemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-gemm/gen/4x2c4-minmax-sse.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352
Marat Dukhanfda12b82019-11-21 12:27:59 -0800353################################### x86 AVX ###################################
354### AVX+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700355tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8-minmax-avx-broadcast.c &
356tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-avx-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800357
Frank Barchardaae722a2021-08-30 12:37:30 -0700358tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8-minmax-avx-broadcast.c &
359tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-avx-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800360
Frank Barchardaae722a2021-08-30 12:37:30 -0700361tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/5x8-minmax-avx-broadcast.c &
362tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/5x8inc-minmax-avx-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800363
Frank Barchardaae722a2021-08-30 12:37:30 -0700364tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8-minmax-avx-broadcast.c &
365tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-avx-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800366
Frank Barchardaae722a2021-08-30 12:37:30 -0700367tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/7x8-minmax-avx-broadcast.c &
368tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/7x8inc-minmax-avx-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800369
Frank Barchardaae722a2021-08-30 12:37:30 -0700370tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x16-minmax-avx-broadcast.c &
371tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x16inc-minmax-avx-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800372
Frank Barchardaae722a2021-08-30 12:37:30 -0700373tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/3x16-minmax-avx-broadcast.c &
374tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/3x16inc-minmax-avx-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800375
Frank Barchardaae722a2021-08-30 12:37:30 -0700376tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x16-minmax-avx-broadcast.c &
377tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x16inc-minmax-avx-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800378
Frank Barchardaae722a2021-08-30 12:37:30 -0700379tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/5x16-minmax-avx-broadcast.c &
380tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/5x16inc-minmax-avx-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800381### FMA3+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700382tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x8-minmax-fma3-broadcast.c &
383tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x8inc-minmax-fma3-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800384
Frank Barchardaae722a2021-08-30 12:37:30 -0700385tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x8-minmax-fma3-broadcast.c &
386tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x8inc-minmax-fma3-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800387
Frank Barchardaae722a2021-08-30 12:37:30 -0700388tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x8-minmax-fma3-broadcast.c &
389tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x8inc-minmax-fma3-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800390
Frank Barchardaae722a2021-08-30 12:37:30 -0700391tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/6x8-minmax-fma3-broadcast.c &
392tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/6x8inc-minmax-fma3-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800393
Frank Barchardaae722a2021-08-30 12:37:30 -0700394tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/7x8-minmax-fma3-broadcast.c &
395tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/7x8inc-minmax-fma3-broadcast.c &
Marat Dukhanfda12b82019-11-21 12:27:59 -0800396
Frank Barchardaae722a2021-08-30 12:37:30 -0700397tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/8x8-minmax-fma3-broadcast.c &
398tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/8x8inc-minmax-fma3-broadcast.c &
XNNPACK Teamb455b122019-09-27 18:10:33 -0700399
Frank Barchardaae722a2021-08-30 12:37:30 -0700400tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x16-minmax-fma3-broadcast.c &
401tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x16inc-minmax-fma3-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800402
Frank Barchardaae722a2021-08-30 12:37:30 -0700403tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/3x16-minmax-fma3-broadcast.c &
404tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/3x16inc-minmax-fma3-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800405
Frank Barchardaae722a2021-08-30 12:37:30 -0700406tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x16-minmax-fma3-broadcast.c &
407tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x16inc-minmax-fma3-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800408
Frank Barchardaae722a2021-08-30 12:37:30 -0700409tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x16-minmax-fma3-broadcast.c &
410tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x16inc-minmax-fma3-broadcast.c &
Marat Dukhaneccfd712019-12-08 16:49:27 -0800411
Frank Barchardaae722a2021-08-30 12:37:30 -0700412tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x16s4-minmax-fma3-broadcast.c &
413tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x16s4inc-minmax-fma3-broadcast.c &
Marat Dukhan27121322019-12-09 14:57:40 -0800414
Frank Barchardaae722a2021-08-30 12:37:30 -0700415tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/3x16s4-minmax-fma3-broadcast.c &
416tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/3x16s4inc-minmax-fma3-broadcast.c &
Marat Dukhan27121322019-12-09 14:57:40 -0800417
Frank Barchardaae722a2021-08-30 12:37:30 -0700418tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x16s4-minmax-fma3-broadcast.c &
419tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x16s4inc-minmax-fma3-broadcast.c &
Marat Dukhan27121322019-12-09 14:57:40 -0800420
Frank Barchardaae722a2021-08-30 12:37:30 -0700421tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x16s4-minmax-fma3-broadcast.c &
422tools/xngen src/f32-gemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x16s4inc-minmax-fma3-broadcast.c &
Marat Dukhan27121322019-12-09 14:57:40 -0800423
Marat Dukhan0f349c42019-11-27 11:58:54 -0800424################################# x86 AVX-512 #################################
425### AVX512F+BROADCAST micro-kernels
Frank Barchardaae722a2021-08-30 12:37:30 -0700426tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -D INC=0 -o src/f32-gemm/gen/1x16-minmax-avx512f-broadcast.c &
427tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/1x16inc-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800428
Frank Barchardaae722a2021-08-30 12:37:30 -0700429tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -D INC=0 -o src/f32-gemm/gen/4x16-minmax-avx512f-broadcast.c &
430tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/4x16inc-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800431
Frank Barchardaae722a2021-08-30 12:37:30 -0700432tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -D INC=0 -o src/f32-gemm/gen/5x16-minmax-avx512f-broadcast.c &
433tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/5x16inc-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800434
Frank Barchardaae722a2021-08-30 12:37:30 -0700435tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -D INC=0 -o src/f32-gemm/gen/6x16-minmax-avx512f-broadcast.c &
436tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/6x16inc-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800437
Frank Barchardaae722a2021-08-30 12:37:30 -0700438tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -D INC=0 -o src/f32-gemm/gen/7x16-minmax-avx512f-broadcast.c &
439tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/7x16inc-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800440
Frank Barchardaae722a2021-08-30 12:37:30 -0700441tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -D INC=0 -o src/f32-gemm/gen/8x16-minmax-avx512f-broadcast.c &
442tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/8x16inc-minmax-avx512f-broadcast.c &
Marat Dukhan0f349c42019-11-27 11:58:54 -0800443
XNNPACK Teamb455b122019-09-27 18:10:33 -0700444################################## Unit tests #################################
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800445tools/generate-gemm-test.py --spec test/f32-gemm.yaml --output test/f32-gemm.cc --output test/f32-gemm-2.cc &
446tools/generate-gemm-test.py --spec test/f32-gemm-relu.yaml --output test/f32-gemm-relu.cc --output test/f32-gemm-relu-2.cc &
447tools/generate-gemm-test.py --spec test/f32-gemm-minmax.yaml --output test/f32-gemm-minmax.cc --output test/f32-gemm-minmax-2.cc &
448tools/generate-gemm-test.py --spec test/f32-gemminc-minmax.yaml --output test/f32-gemminc-minmax.cc --output test/f32-gemminc-minmax-2.cc &
Frank Barchardaae722a2021-08-30 12:37:30 -0700449
450wait