blob: eef5463a233cbdacaef22d85ea73c17061e57aed [file] [log] [blame]
Marat Dukhan346a9e52019-11-15 09:06:30 -08001#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7################################### ARM NEON ##################################
Marat Dukhan4a24a582020-01-06 13:30:00 -08008tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c
9tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c
10tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c
11tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x16.c
12tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x20.c
13tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080014
Marat Dukhan4a24a582020-01-06 13:30:00 -080015tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x4.c
16tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x8.c
17tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x12.c
18tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x16.c
19tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x20.c
20tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080021
Marat Dukhan4a24a582020-01-06 13:30:00 -080022tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x4.c
23tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x8.c
24tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x12.c
25tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x16.c
26tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x20.c
27tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080028
Marat Dukhan4a24a582020-01-06 13:30:00 -080029tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x4.c
30tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x8.c
31tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x12.c
32tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x16.c
33tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x20.c
34tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080035
Marat Dukhan4a24a582020-01-06 13:30:00 -080036tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c
37tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x8.c
38tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x12.c
39tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x16.c
40tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x20.c
41tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080042
Marat Dukhan4a24a582020-01-06 13:30:00 -080043tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x4.c
44tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x8.c
45tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x12.c
46tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x16.c
47tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x20.c
48tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080049
Marat Dukhan4a24a582020-01-06 13:30:00 -080050tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x4.c
51tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x8.c
52tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x12.c
53tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x16.c
54tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x20.c
55tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080056
Marat Dukhan4a24a582020-01-06 13:30:00 -080057tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x4.c
58tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x8.c
59tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x12.c
60tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x16.c
61tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x20.c
62tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080063
Marat Dukhan4a24a582020-01-06 13:30:00 -080064tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x4.c
65tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x8.c
66tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x12.c
67tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x16.c
68tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x20.c
69tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080070
Marat Dukhan4a24a582020-01-06 13:30:00 -080071tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x4.c
72tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x8.c
73tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x12.c
74tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x16.c
75tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x20.c
76tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080077
Marat Dukhan4a24a582020-01-06 13:30:00 -080078tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x4.c
79tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x8.c
80tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x12.c
81tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x16.c
82tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x20.c
83tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080084
Marat Dukhan4a24a582020-01-06 13:30:00 -080085tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x4.c
86tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x8.c
87tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x12.c
88tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x16.c
89tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x20.c
90tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080091
Marat Dukhan4a24a582020-01-06 13:30:00 -080092tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c
93tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c
94tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c
95tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c
96tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c
97tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080098
Marat Dukhan4a24a582020-01-06 13:30:00 -080099tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x4.c
100tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x8.c
101tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x12.c
102tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x16.c
103tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x20.c
104tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800105
Marat Dukhan4a24a582020-01-06 13:30:00 -0800106tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x4.c
107tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x8.c
108tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x12.c
109tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x16.c
110tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x20.c
111tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800112
Marat Dukhan40a672f2019-11-25 03:08:22 -0800113################################### x86 SSE ###################################
Marat Dukhand243c1a2020-09-17 18:48:10 -0700114tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=4 -D SSE=2 -o src/f32-sigmoid/gen/sse2-p5-div-x4.c
115tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=8 -D SSE=2 -o src/f32-sigmoid/gen/sse2-p5-div-x8.c
116tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=12 -D SSE=2 -o src/f32-sigmoid/gen/sse2-p5-div-x12.c
117tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=16 -D SSE=2 -o src/f32-sigmoid/gen/sse2-p5-div-x16.c
118tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=20 -D SSE=2 -o src/f32-sigmoid/gen/sse2-p5-div-x20.c
119tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=24 -D SSE=2 -o src/f32-sigmoid/gen/sse2-p5-div-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800120
Marat Dukhand243c1a2020-09-17 18:48:10 -0700121tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=4 -D SSE=4 -o src/f32-sigmoid/gen/sse41-p5-div-x4.c
122tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=8 -D SSE=4 -o src/f32-sigmoid/gen/sse41-p5-div-x8.c
123tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=12 -D SSE=4 -o src/f32-sigmoid/gen/sse41-p5-div-x12.c
124tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=16 -D SSE=4 -o src/f32-sigmoid/gen/sse41-p5-div-x16.c
125tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=20 -D SSE=4 -o src/f32-sigmoid/gen/sse41-p5-div-x20.c
126tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=24 -D SSE=4 -o src/f32-sigmoid/gen/sse41-p5-div-x24.c
127
128tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=4 -D SSE=2 -o src/f32-sigmoid/gen/sse2-lut64-p2-div-x4.c
129tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=8 -D SSE=2 -o src/f32-sigmoid/gen/sse2-lut64-p2-div-x8.c
130tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=12 -D SSE=2 -o src/f32-sigmoid/gen/sse2-lut64-p2-div-x12.c
131tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=16 -D SSE=2 -o src/f32-sigmoid/gen/sse2-lut64-p2-div-x16.c
132tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=20 -D SSE=2 -o src/f32-sigmoid/gen/sse2-lut64-p2-div-x20.c
133tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=24 -D SSE=2 -o src/f32-sigmoid/gen/sse2-lut64-p2-div-x24.c
134
135tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=4 -D SSE=4 -o src/f32-sigmoid/gen/sse41-lut64-p2-div-x4.c
136tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=8 -D SSE=4 -o src/f32-sigmoid/gen/sse41-lut64-p2-div-x8.c
137tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=12 -D SSE=4 -o src/f32-sigmoid/gen/sse41-lut64-p2-div-x12.c
138tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=16 -D SSE=4 -o src/f32-sigmoid/gen/sse41-lut64-p2-div-x16.c
139tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=20 -D SSE=4 -o src/f32-sigmoid/gen/sse41-lut64-p2-div-x20.c
140tools/xngen src/f32-sigmoid/sse-lut64-p2-div.c.in -D BATCH_TILE=24 -D SSE=4 -o src/f32-sigmoid/gen/sse41-lut64-p2-div-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800141
Marat Dukhanfa0a4322020-01-06 16:14:29 -0800142################################### x86 AVX ###################################
T.J. Alumbaughdc2b29c2020-10-14 13:56:08 -0700143tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x8.c
144tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x16.c
145tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x24.c
146tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x32.c
147tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x40.c
148tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x48.c
149tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x56.c
150tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x64.c
151tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x72.c
152tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx-rr2-p5-div-x80.c
153
154tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x8.c
155tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x16.c
156tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x24.c
157tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x32.c
158tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x40.c
159tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x48.c
160tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x56.c
161tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x64.c
162tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x72.c
163tools/xngen src/f32-sigmoid/avx-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=2 -D DIV_ALGO=nr2 -o src/f32-sigmoid/gen/avx-rr2-p5-nr2-x80.c
164
Marat Dukhanfa0a4322020-01-06 16:14:29 -0800165tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x8.c
166tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x16.c
167tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x24.c
168tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x32.c
169tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x40.c
170tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x48.c
171tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x56.c
172tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x64.c
173tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x72.c
174tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x80.c
175
176tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x8.c
177tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x16.c
178tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x24.c
179tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x32.c
180tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x40.c
181tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x48.c
182tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x56.c
183tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x64.c
184tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x72.c
185tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x80.c
186
187tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x8.c
188tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x16.c
189tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x24.c
190tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x32.c
191tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x40.c
192tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x48.c
193tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x56.c
194tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x64.c
195tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x72.c
196tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x80.c
197
Marat Dukhand9ca7e62020-09-23 23:45:29 -0700198################################# x86 AVX-512 #################################
199tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x16.c
200tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x32.c
201tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x48.c
202tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x64.c
203tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x80.c
204tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=96 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x96.c
205tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=112 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x112.c
206tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=128 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x128.c
207
208tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x16.c
209tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x32.c
210tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x48.c
211tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x64.c
212tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x80.c
213tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=96 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x96.c
214tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=112 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x112.c
215tools/xngen src/f32-sigmoid/avx512f-p5-scalef.c.in -D BATCH_TILE=128 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x128.c
216
217tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x16.c
218tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x32.c
219tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x48.c
220tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x64.c
221tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x80.c
222tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=96 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x96.c
223tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=112 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x112.c
224tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=128 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x128.c
225
226tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x16.c
227tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c
228tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c
229tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c
230tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c
231tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=96 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c
232tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=112 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c
233tools/xngen src/f32-sigmoid/avx512f-lut16-p3-perm-scalef.c.in -D BATCH_TILE=128 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c
234
235tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x16.c
236tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=32 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x32.c
237tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=48 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x48.c
238tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=64 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x64.c
239tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=80 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x80.c
240tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=96 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x96.c
241tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=112 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c
242tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=128 -D RR_STEPS=2 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c
243
244tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x16.c
245tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=32 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c
246tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=48 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c
247tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=64 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c
248tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=80 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c
249tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=96 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c
250tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=112 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c
251tools/xngen src/f32-sigmoid/avx512f-lut32-p2-perm2-scalef.c.in -D BATCH_TILE=128 -D RR_STEPS=2 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c
252
Marat Dukhanb3635ed2020-07-16 12:36:28 -0700253################################## WAsm SIMD ##################################
254tools/xngen src/f32-sigmoid/wasmsimd-p5-div.c.in -D BATCH_TILE=4 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-p5-div-x4.c
255tools/xngen src/f32-sigmoid/wasmsimd-p5-div.c.in -D BATCH_TILE=8 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-p5-div-x8.c
256tools/xngen src/f32-sigmoid/wasmsimd-p5-div.c.in -D BATCH_TILE=12 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-p5-div-x12.c
257tools/xngen src/f32-sigmoid/wasmsimd-p5-div.c.in -D BATCH_TILE=16 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-p5-div-x16.c
258tools/xngen src/f32-sigmoid/wasmsimd-p5-div.c.in -D BATCH_TILE=20 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-p5-div-x20.c
259tools/xngen src/f32-sigmoid/wasmsimd-p5-div.c.in -D BATCH_TILE=24 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-p5-div-x24.c
260
Marat Dukhand187a5b2020-07-20 01:07:17 -0700261tools/xngen src/f32-sigmoid/wasmsimd-lut64-p2-div.c.in -D BATCH_TILE=4 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-lut64-p2-div-x4.c
262tools/xngen src/f32-sigmoid/wasmsimd-lut64-p2-div.c.in -D BATCH_TILE=8 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-lut64-p2-div-x8.c
263tools/xngen src/f32-sigmoid/wasmsimd-lut64-p2-div.c.in -D BATCH_TILE=12 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-lut64-p2-div-x12.c
264tools/xngen src/f32-sigmoid/wasmsimd-lut64-p2-div.c.in -D BATCH_TILE=16 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-lut64-p2-div-x16.c
265tools/xngen src/f32-sigmoid/wasmsimd-lut64-p2-div.c.in -D BATCH_TILE=20 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-lut64-p2-div-x20.c
266tools/xngen src/f32-sigmoid/wasmsimd-lut64-p2-div.c.in -D BATCH_TILE=24 -D BLEND=0 -o src/f32-sigmoid/gen/wasmsimd-lut64-p2-div-x24.c
267
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800268################################### Scalar ####################################
269tools/xngen src/f32-sigmoid/scalar-lut2048-p1-div.c.in -D BATCH_TILE=1 -o src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c
270tools/xngen src/f32-sigmoid/scalar-lut2048-p1-div.c.in -D BATCH_TILE=2 -o src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c
271tools/xngen src/f32-sigmoid/scalar-lut2048-p1-div.c.in -D BATCH_TILE=4 -o src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c
272
273tools/xngen src/f32-sigmoid/scalar-lut64-p2-div.c.in -D BATCH_TILE=1 -o src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c
274tools/xngen src/f32-sigmoid/scalar-lut64-p2-div.c.in -D BATCH_TILE=2 -o src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c
275tools/xngen src/f32-sigmoid/scalar-lut64-p2-div.c.in -D BATCH_TILE=4 -o src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c
276
277tools/xngen src/f32-sigmoid/scalar-p5-div.c.in -D BATCH_TILE=1 -o src/f32-sigmoid/gen/scalar-p5-div-x1.c
278tools/xngen src/f32-sigmoid/scalar-p5-div.c.in -D BATCH_TILE=2 -o src/f32-sigmoid/gen/scalar-p5-div-x2.c
279tools/xngen src/f32-sigmoid/scalar-p5-div.c.in -D BATCH_TILE=4 -o src/f32-sigmoid/gen/scalar-p5-div-x4.c
280
Marat Dukhan346a9e52019-11-15 09:06:30 -0800281################################## Unit tests #################################
Marat Dukhan1e782c42019-11-21 17:02:40 -0800282tools/generate-vunary-test.py --spec test/f32-sigmoid.yaml --output test/f32-sigmoid.cc