blob: ae87bb7f68cc9046b09a7d9440e79a165bcbe11d [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7#################################### Scalar ###################################
Marat Dukhan40a672f2019-11-25 03:08:22 -08008tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D INC=0 -o src/f32-gemm/gen/1x4-scalar.c
9tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D INC=1 -o src/f32-gemm/gen-inc/1x4-scalar.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070010
Marat Dukhan40a672f2019-11-25 03:08:22 -080011tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D INC=0 -o src/f32-gemm/gen/2x4-scalar.c
12tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D INC=1 -o src/f32-gemm/gen-inc/2x4-scalar.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070013
Marat Dukhan40a672f2019-11-25 03:08:22 -080014tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D INC=0 -o src/f32-gemm/gen/4x2-scalar.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070015
Marat Dukhan40a672f2019-11-25 03:08:22 -080016tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D INC=0 -o src/f32-gemm/gen/4x4-scalar.c
17tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D INC=1 -o src/f32-gemm/gen-inc/4x4-scalar.c
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19############################### AArch64 assembly ##############################
Marat Dukhan40a672f2019-11-25 03:08:22 -080020tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x12-aarch64-neonfma-cortex-a53.S
21tools/xngen src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x12-aarch64-neonfma-cortex-a53.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070022
Marat Dukhan40a672f2019-11-25 03:08:22 -080023tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a53.S
24tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a53.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070025
Marat Dukhan40a672f2019-11-25 03:08:22 -080026tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a57.S
27tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a57.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070028
Marat Dukhan40a672f2019-11-25 03:08:22 -080029tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a75.S
30tools/xngen src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a75.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070031
Marat Dukhan40a672f2019-11-25 03:08:22 -080032tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x12-aarch64-neonfma-cortex-a53.S
33tools/xngen src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x12-aarch64-neonfma-cortex-a53.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070034
Marat Dukhan40a672f2019-11-25 03:08:22 -080035tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a53.S
36tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a53.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070037
Marat Dukhan40a672f2019-11-25 03:08:22 -080038tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a57.S
39tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a57.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070040
Marat Dukhan40a672f2019-11-25 03:08:22 -080041tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a75.S
42tools/xngen src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a75.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070043
Marat Dukhan40a672f2019-11-25 03:08:22 -080044tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-ld128.S
45tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld128.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070046
Marat Dukhan40a672f2019-11-25 03:08:22 -080047tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/4x8-aarch64-neonfma-ld64.S
48tools/xngen src/f32-gemm/4x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld64.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070049
Marat Dukhan40a672f2019-11-25 03:08:22 -080050tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a75.S
51tools/xngen src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a75.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070052
Marat Dukhan40a672f2019-11-25 03:08:22 -080053tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a53.S
54tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a53.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070055
Marat Dukhan40a672f2019-11-25 03:08:22 -080056tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a57.S
57tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a57.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070058
Marat Dukhan40a672f2019-11-25 03:08:22 -080059tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a73.S
60tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a73.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070061
Marat Dukhan40a672f2019-11-25 03:08:22 -080062tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a75.S
63tools/xngen src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a75.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070064
Marat Dukhan40a672f2019-11-25 03:08:22 -080065tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S
66tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld64.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld64.S
Marat Dukhanba7c3bb2019-10-27 19:53:54 -070067
Marat Dukhan40a672f2019-11-25 03:08:22 -080068tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=0 -o src/f32-gemm/gen/6x8-aarch64-neonfma-ld128.S
69tools/xngen src/f32-gemm/6x8-aarch64-neonfma-ld128.S.in -D INC=1 -o src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld128.S
XNNPACK Teamb455b122019-09-27 18:10:33 -070070
71################################### ARM NEON ##################################
72### LD64 micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -080073tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-neon-lane-ld64.c
74tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8-neon-lane-ld64.c
75tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/1x8-neonfma-lane-ld64.c
76tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/1x8-neonfma-lane-ld64.c
77tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neon-lane-ld64.c
78tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neon-lane-ld64.c
79tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neonfma-lane-ld64.c
80tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neonfma-lane-ld64.c
81tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-neon-lane-ld64.c
82tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8-neon-lane-ld64.c
83tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/5x8-neonfma-lane-ld64.c
84tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=5 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/5x8-neonfma-lane-ld64.c
85tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neon-lane-ld64.c
86tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neon-lane-ld64.c
87tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neonfma-lane-ld64.c
88tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neonfma-lane-ld64.c
XNNPACK Teamb455b122019-09-27 18:10:33 -070089### LD128 micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -080090tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neon-lane-ld128.c
91tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neon-lane-ld128.c
92tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x8-neonfma-lane-ld128.c
93tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/4x8-neonfma-lane-ld128.c
Frank Barchard69172d92019-11-26 16:22:39 -080094tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neon-lane-ld128.c
95tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neon-lane-ld128.c
96tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/6x8-neonfma-lane-ld128.c
97tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=0 -o src/f32-gemm/gen-inc/6x8-neonfma-lane-ld128.c
XNNPACK Teamb455b122019-09-27 18:10:33 -070098### MRx2 micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -080099tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-neon-lane-ld64.c
100tools/xngen src/f32-gemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D INC=0 -D DUP=0 -o src/f32-gemm/gen/4x2-neonfma-lane-ld64.c
Frank Barchard5243bb02019-11-22 16:37:50 -0800101### DUP LD64 micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800102tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-neon-dup-ld64.c
103tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8-neon-dup-ld64.c
104tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/1x8-neonfma-dup-ld64.c
105tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/1x8-neonfma-dup-ld64.c
106tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neon-dup-ld64.c
107tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neon-dup-ld64.c
108tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neonfma-dup-ld64.c
109tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neonfma-dup-ld64.c
110tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neon-dup-ld64.c
111tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neon-dup-ld64.c
112tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neonfma-dup-ld64.c
113tools/xngen src/f32-gemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neonfma-dup-ld64.c
Frank Barchard5243bb02019-11-22 16:37:50 -0800114### DUP LD128 midupkernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800115tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neon-dup-ld128.c
116tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neon-dup-ld128.c
117tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/4x8-neonfma-dup-ld128.c
118tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/4x8-neonfma-dup-ld128.c
Frank Barchard69172d92019-11-26 16:22:39 -0800119tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neon-dup-ld128.c
120tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neon-dup-ld128.c
121tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -D DUP=1 -o src/f32-gemm/gen/6x8-neonfma-dup-ld128.c
122tools/xngen src/f32-gemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -D DUP=1 -o src/f32-gemm/gen-inc/6x8-neonfma-dup-ld128.c
Frank Barcharddf06d802019-11-20 15:53:46 -0800123### LOAD4+PERMUTE micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800124tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8s4-neon.c
125tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-neon.c
126tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/1x8s4-neonfma.c
127tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-neonfma.c
128tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8s4-neon.c
129tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-neon.c
130tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/4x8s4-neonfma.c
131tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-neonfma.c
132tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8s4-neon.c
133tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-neon.c
134tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/6x8s4-neonfma.c
135tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-neonfma.c
136tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/8x8s4-neon.c
137tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4-neon.c
138tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=0 -o src/f32-gemm/gen/8x8s4-neonfma.c
139tools/xngen src/f32-gemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -D INC=1 -o src/f32-gemm/gen-inc/8x8s4-neonfma.c
Frank Barcharddf06d802019-11-20 15:53:46 -0800140
XNNPACK Teamb455b122019-09-27 18:10:33 -0700141#################################### PSIMD ####################################
142### LOAD1+BROADCAST micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800143tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-psimd-loadsplat.c
144tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-psimd-loadsplat.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700145
Marat Dukhan40a672f2019-11-25 03:08:22 -0800146tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-psimd-loadsplat.c
147tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-psimd-loadsplat.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700148
Marat Dukhan40a672f2019-11-25 03:08:22 -0800149tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8-psimd-loadsplat.c
150tools/xngen src/f32-gemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8-psimd-loadsplat.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700151### LOAD4+DUPLICATE micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800152tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-psimd-splat.c
153tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-psimd-splat.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700154
Marat Dukhan40a672f2019-11-25 03:08:22 -0800155tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-psimd-splat.c
156tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-psimd-splat.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700157
Marat Dukhan40a672f2019-11-25 03:08:22 -0800158tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8-psimd-splat.c
159tools/xngen src/f32-gemm/psimd-splat.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8-psimd-splat.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700160### LOAD4+PERMUTE micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800161tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-psimd.c
162tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-psimd.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700163
Marat Dukhan40a672f2019-11-25 03:08:22 -0800164tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-psimd.c
165tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-psimd.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700166
Marat Dukhan40a672f2019-11-25 03:08:22 -0800167tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=6 -D NR=8 -D INC=0 -o src/f32-gemm/gen/6x8s4-psimd.c
168tools/xngen src/f32-gemm/psimd-s4.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/6x8s4-psimd.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700169
170################################### x86 SSE ###################################
171### LOAD1+BROADCAST micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800172tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-sse-load1.c
173tools/xngen src/f32-gemm/sse-load1.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-sse-load1.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700174
Marat Dukhan40a672f2019-11-25 03:08:22 -0800175tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-sse-load1.c
176tools/xngen src/f32-gemm/sse-load1.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-sse-load1.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700177### LOAD4+DUPLICATE micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800178tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8-sse-dup.c
179tools/xngen src/f32-gemm/sse-dup.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8-sse-dup.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700180
Marat Dukhan40a672f2019-11-25 03:08:22 -0800181tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8-sse-dup.c
182tools/xngen src/f32-gemm/sse-dup.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8-sse-dup.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700183### LOAD4+PERMUTE micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800184tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=0 -o src/f32-gemm/gen/1x8s4-sse.c
185tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=1 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/1x8s4-sse.c
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700186
Marat Dukhan40a672f2019-11-25 03:08:22 -0800187tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=0 -o src/f32-gemm/gen/4x8s4-sse.c
188tools/xngen src/f32-gemm/sse-shuffle.c.in -D MR=4 -D NR=8 -D INC=1 -o src/f32-gemm/gen-inc/4x8s4-sse.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700189
Marat Dukhanfda12b82019-11-21 12:27:59 -0800190################################### x86 AVX ###################################
191### AVX+BROADCAST micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800192tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/1x8-avx-broadcast.c
193tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/1x8-avx-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800194
Marat Dukhan40a672f2019-11-25 03:08:22 -0800195tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/4x8-avx-broadcast.c
196tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/4x8-avx-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800197
Marat Dukhan40a672f2019-11-25 03:08:22 -0800198tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/5x8-avx-broadcast.c
199tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/5x8-avx-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800200
Marat Dukhan40a672f2019-11-25 03:08:22 -0800201tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/6x8-avx-broadcast.c
202tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/6x8-avx-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800203
Marat Dukhan40a672f2019-11-25 03:08:22 -0800204tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=0 -o src/f32-gemm/gen/7x8-avx-broadcast.c
205tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -D INC=1 -o src/f32-gemm/gen-inc/7x8-avx-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800206### FMA3+BROADCAST micro-kernels
Marat Dukhan40a672f2019-11-25 03:08:22 -0800207tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/1x8-fma3-broadcast.c
208tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/1x8-fma3-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800209
Marat Dukhan40a672f2019-11-25 03:08:22 -0800210tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/4x8-fma3-broadcast.c
211tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/4x8-fma3-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800212
Marat Dukhan40a672f2019-11-25 03:08:22 -0800213tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/5x8-fma3-broadcast.c
214tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/5x8-fma3-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800215
Marat Dukhan40a672f2019-11-25 03:08:22 -0800216tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/6x8-fma3-broadcast.c
217tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/6x8-fma3-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800218
Marat Dukhan40a672f2019-11-25 03:08:22 -0800219tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/7x8-fma3-broadcast.c
220tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/7x8-fma3-broadcast.c
Marat Dukhanfda12b82019-11-21 12:27:59 -0800221
Marat Dukhan40a672f2019-11-25 03:08:22 -0800222tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=0 -o src/f32-gemm/gen/8x8-fma3-broadcast.c
223tools/xngen src/f32-gemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -D INC=1 -o src/f32-gemm/gen-inc/8x8-fma3-broadcast.c
XNNPACK Teamb455b122019-09-27 18:10:33 -0700224
Marat Dukhan0f349c42019-11-27 11:58:54 -0800225################################# x86 AVX-512 #################################
226### AVX512F+BROADCAST micro-kernels
227tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -D INC=0 -o src/f32-gemm/gen/1x16-avx512f-broadcast.c
228tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/1x16-avx512f-broadcast.c
229
230tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -D INC=0 -o src/f32-gemm/gen/4x16-avx512f-broadcast.c
231tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/4x16-avx512f-broadcast.c
232
233tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -D INC=0 -o src/f32-gemm/gen/5x16-avx512f-broadcast.c
234tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/5x16-avx512f-broadcast.c
235
236tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -D INC=0 -o src/f32-gemm/gen/6x16-avx512f-broadcast.c
237tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/6x16-avx512f-broadcast.c
238
239tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -D INC=0 -o src/f32-gemm/gen/7x16-avx512f-broadcast.c
240tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/7x16-avx512f-broadcast.c
241
242tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -D INC=0 -o src/f32-gemm/gen/8x16-avx512f-broadcast.c
243tools/xngen src/f32-gemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -D INC=1 -o src/f32-gemm/gen-inc/8x16-avx512f-broadcast.c
244
XNNPACK Teamb455b122019-09-27 18:10:33 -0700245################################## Unit tests #################################
246tools/generate-gemm-test.py --spec test/f32-gemm.yaml --output test/f32-gemm.cc
Marat Dukhanba7c3bb2019-10-27 19:53:54 -0700247tools/generate-gemm-test.py --spec test/f32-gemminc.yaml --output test/f32-gemminc.cc