blob: 12b11600aa411b2ccdae61472ad9c56bf3d525cc [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7#################################### Scalar ###################################
8tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -o src/f32-igemm/1x4-scalar.c
9tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -o src/f32-igemm/2x4-scalar.c
10tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -o src/f32-igemm/4x2-scalar.c
11tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -o src/f32-igemm/4x4-scalar.c
12
13################################### ARM NEON ##################################
14### LD64 micro-kernels
Frank Barchard91317c52019-11-22 10:54:35 -080015tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/1x8-neon-lane-ld64.c
16tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/1x8-neonfma-lane-ld64.c
17tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/4x4-neon-lane-ld64.c
18tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/4x4-neonfma-lane-ld64.c
19tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/4x8-neon-lane-ld64.c
20tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/4x8-neonfma-lane-ld64.c
21tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/6x8-neon-lane-ld64.c
22tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/6x8-neonfma-lane-ld64.c
XNNPACK Teamb455b122019-09-27 18:10:33 -070023### LD128 micro-kernels
Frank Barchard91317c52019-11-22 10:54:35 -080024tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/4x8-neon-lane-ld128.c
25tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/4x8-neonfma-lane-ld128.c
26### MRx2 micro-kernels-
27tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/4x2-neon-lane-ld64.c
28tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/4x2-neonfma-lane-ld64.c
Frank Barcharddf06d802019-11-20 15:53:46 -080029### LOAD4+PERMUTE micro-kernels
Frank Barchard91317c52019-11-22 10:54:35 -080030tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/1x8s4-neon.c
31tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/1x8s4-neonfma.c
32tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/4x8s4-neon.c
33tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/4x8s4-neonfma.c
34tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/6x8s4-neon.c
35tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/6x8s4-neonfma.c
36tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/8x8s4-neon.c
37tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/8x8s4-neonfma.c
XNNPACK Teamb455b122019-09-27 18:10:33 -070038
39#################################### PSIMD ####################################
40### LOAD1+BROADCAST micro-kernels
41tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/1x8-psimd-loadsplat.c
42tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/4x8-psimd-loadsplat.c
43tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/6x8-psimd-loadsplat.c
44### LOAD4+DUPLICATE micro-kernels
45tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/1x8-psimd-splat.c
46tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/4x8-psimd-splat.c
47tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/6x8-psimd-splat.c
48### LOAD4+PERMUTE micro-kernels
49tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=1 -D NR=8 -o src/f32-igemm/1x8s4-psimd.c
50tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=4 -D NR=8 -o src/f32-igemm/4x8s4-psimd.c
51tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=6 -D NR=8 -o src/f32-igemm/6x8s4-psimd.c
52### MRx2 micro-kernels
53tools/xngen src/f32-igemm/MRx2c4-psimd.c.in -D MR=4 -D NR=2 -o src/f32-igemm/4x2c4-psimd.c
54
55################################### x86 SSE ###################################
56### LOAD1+BROADCAST micro-kernels
57tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/1x8-sse-load1.c
58tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/4x8-sse-load1.c
59### LOAD4+DUPLICATE micro-kernels
60tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -o src/f32-igemm/1x8-sse-dup.c
61tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -o src/f32-igemm/4x8-sse-dup.c
62### LOAD4+PERMUTE micro-kernels
63tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/1x8s4-sse.c
64tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/4x8s4-sse.c
65### MRx2 micro-kernels
66tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/4x2c4-sse.c
67
Marat Dukhanfda12b82019-11-21 12:27:59 -080068################################### x86 AVX ###################################
69### AVX+BROADCAST micro-kernels
70tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/1x8-avx-broadcast.c
71tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/4x8-avx-broadcast.c
72tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/5x8-avx-broadcast.c
73tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/6x8-avx-broadcast.c
74tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/7x8-avx-broadcast.c
75### FMA3+BROADCAST micro-kernels
76tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/1x8-fma3-broadcast.c
77tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/4x8-fma3-broadcast.c
78tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/5x8-fma3-broadcast.c
79tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/6x8-fma3-broadcast.c
80tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/7x8-fma3-broadcast.c
81tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/8x8-fma3-broadcast.c
XNNPACK Teamb455b122019-09-27 18:10:33 -070082
83################################## Unit tests #################################
84tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc