blob: 26e4df569145df7203de9122bad290d703963b1b [file] [log] [blame]
Marat Dukhan346a9e52019-11-15 09:06:30 -08001#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7################################### ARM NEON ##################################
Marat Dukhan4a24a582020-01-06 13:30:00 -08008tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c
9tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c
10tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c
11tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x16.c
12tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x20.c
13tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-p5-div-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080014
Marat Dukhan4a24a582020-01-06 13:30:00 -080015tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x4.c
16tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x8.c
17tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x12.c
18tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x16.c
19tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x20.c
20tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080021
Marat Dukhan4a24a582020-01-06 13:30:00 -080022tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x4.c
23tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x8.c
24tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x12.c
25tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x16.c
26tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x20.c
27tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080028
Marat Dukhan4a24a582020-01-06 13:30:00 -080029tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x4.c
30tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x8.c
31tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x12.c
32tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x16.c
33tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x20.c
34tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080035
Marat Dukhan4a24a582020-01-06 13:30:00 -080036tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=4 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c
37tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x8.c
38tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=12 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x12.c
39tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x16.c
40tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=20 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x20.c
41tools/xngen src/f32-sigmoid/neon-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080042
Marat Dukhan4a24a582020-01-06 13:30:00 -080043tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x4.c
44tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x8.c
45tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x12.c
46tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x16.c
47tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x20.c
48tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080049
Marat Dukhan4a24a582020-01-06 13:30:00 -080050tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x4.c
51tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x8.c
52tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x12.c
53tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x16.c
54tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x20.c
55tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080056
Marat Dukhan4a24a582020-01-06 13:30:00 -080057tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x4.c
58tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x8.c
59tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x12.c
60tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x16.c
61tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x20.c
62tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080063
Marat Dukhan4a24a582020-01-06 13:30:00 -080064tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x4.c
65tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x8.c
66tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x12.c
67tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x16.c
68tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x20.c
69tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080070
Marat Dukhan4a24a582020-01-06 13:30:00 -080071tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=4 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x4.c
72tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x8.c
73tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=12 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x12.c
74tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x16.c
75tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=20 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x20.c
76tools/xngen src/f32-sigmoid/neon-lut64-p2.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x24.c
Marat Dukhan68b3b452020-01-02 10:11:15 -080077
Marat Dukhan4a24a582020-01-06 13:30:00 -080078tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x4.c
79tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x8.c
80tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x12.c
81tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x16.c
82tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x20.c
83tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080084
Marat Dukhan4a24a582020-01-06 13:30:00 -080085tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x4.c
86tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x8.c
87tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x12.c
88tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x16.c
89tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x20.c
90tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080091
Marat Dukhan4a24a582020-01-06 13:30:00 -080092tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c
93tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c
94tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c
95tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c
96tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c
97tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr1recps1fma -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080098
Marat Dukhan4a24a582020-01-06 13:30:00 -080099tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x4.c
100tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x8.c
101tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x12.c
102tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x16.c
103tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x20.c
104tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D FMA=1 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800105
Marat Dukhan4a24a582020-01-06 13:30:00 -0800106tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=4 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x4.c
107tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=8 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x8.c
108tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=12 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x12.c
109tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=16 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x16.c
110tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=20 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x20.c
111tools/xngen src/f32-sigmoid/neon-lut2048-p1.c.in -D BATCH_TILE=24 -D RR_STEPS=2 -D FMA=0 -D DIV_ALGO=nr2recps -o src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x24.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800112
Marat Dukhan40a672f2019-11-25 03:08:22 -0800113tools/xngen src/f32-sigmoid/neon-frac-p9-p10-nr1recps.c.in -D BATCH_TILE=16 -o src/f32-sigmoid/gen/neon-frac-p9-p10-nr1recps-x16.c
114
115################################### x86 SSE ###################################
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800116tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=4 -D BLEND=0 -o src/f32-sigmoid/gen/sse2-p5-div-x4.c
117tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=8 -D BLEND=0 -o src/f32-sigmoid/gen/sse2-p5-div-x8.c
118tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=12 -D BLEND=0 -o src/f32-sigmoid/gen/sse2-p5-div-x12.c
Marat Dukhan40a672f2019-11-25 03:08:22 -0800119tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=16 -D BLEND=0 -o src/f32-sigmoid/gen/sse2-p5-div-x16.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800120tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=20 -D BLEND=0 -o src/f32-sigmoid/gen/sse2-p5-div-x20.c
121tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=24 -D BLEND=0 -o src/f32-sigmoid/gen/sse2-p5-div-x24.c
122
123tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=4 -D BLEND=1 -o src/f32-sigmoid/gen/sse41-p5-div-x4.c
124tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=8 -D BLEND=1 -o src/f32-sigmoid/gen/sse41-p5-div-x8.c
125tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=12 -D BLEND=1 -o src/f32-sigmoid/gen/sse41-p5-div-x12.c
Marat Dukhan40a672f2019-11-25 03:08:22 -0800126tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=16 -D BLEND=1 -o src/f32-sigmoid/gen/sse41-p5-div-x16.c
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800127tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=20 -D BLEND=1 -o src/f32-sigmoid/gen/sse41-p5-div-x20.c
128tools/xngen src/f32-sigmoid/sse-p5-div.c.in -D BATCH_TILE=24 -D BLEND=1 -o src/f32-sigmoid/gen/sse41-p5-div-x24.c
129
Marat Dukhanfa0a4322020-01-06 16:14:29 -0800130################################### x86 AVX ###################################
131tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x8.c
132tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x16.c
133tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x24.c
134tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x32.c
135tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x40.c
136tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x48.c
137tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x56.c
138tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x64.c
139tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x72.c
140tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=div -o src/f32-sigmoid/gen/avx2-rr1-p5-div-x80.c
141
142tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x8.c
143tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x16.c
144tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x24.c
145tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x32.c
146tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x40.c
147tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x48.c
148tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x56.c
149tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x64.c
150tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x72.c
151tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=nr1fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x80.c
152
153tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=8 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x8.c
154tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=16 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x16.c
155tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=24 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x24.c
156tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=32 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x32.c
157tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=40 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x40.c
158tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=48 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x48.c
159tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=56 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x56.c
160tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=64 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x64.c
161tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=72 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x72.c
162tools/xngen src/f32-sigmoid/avx2-p5.c.in -D BATCH_TILE=80 -D RR_STEPS=1 -D DIV_ALGO=nr2fma -o src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x80.c
163
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800164#################################### PSIMD ####################################
165tools/xngen src/f32-sigmoid/psimd-p5-div.c.in -D BATCH_TILE=4 -D BLEND=0 -o src/f32-sigmoid/gen/psimd-p5-div-x4.c
166tools/xngen src/f32-sigmoid/psimd-p5-div.c.in -D BATCH_TILE=8 -D BLEND=0 -o src/f32-sigmoid/gen/psimd-p5-div-x8.c
167tools/xngen src/f32-sigmoid/psimd-p5-div.c.in -D BATCH_TILE=12 -D BLEND=0 -o src/f32-sigmoid/gen/psimd-p5-div-x12.c
168tools/xngen src/f32-sigmoid/psimd-p5-div.c.in -D BATCH_TILE=16 -D BLEND=0 -o src/f32-sigmoid/gen/psimd-p5-div-x16.c
169tools/xngen src/f32-sigmoid/psimd-p5-div.c.in -D BATCH_TILE=20 -D BLEND=0 -o src/f32-sigmoid/gen/psimd-p5-div-x20.c
170tools/xngen src/f32-sigmoid/psimd-p5-div.c.in -D BATCH_TILE=24 -D BLEND=0 -o src/f32-sigmoid/gen/psimd-p5-div-x24.c
Marat Dukhan346a9e52019-11-15 09:06:30 -0800171
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800172################################### Scalar ####################################
173tools/xngen src/f32-sigmoid/scalar-lut2048-p1-div.c.in -D BATCH_TILE=1 -o src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c
174tools/xngen src/f32-sigmoid/scalar-lut2048-p1-div.c.in -D BATCH_TILE=2 -o src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c
175tools/xngen src/f32-sigmoid/scalar-lut2048-p1-div.c.in -D BATCH_TILE=4 -o src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c
176
177tools/xngen src/f32-sigmoid/scalar-lut64-p2-div.c.in -D BATCH_TILE=1 -o src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c
178tools/xngen src/f32-sigmoid/scalar-lut64-p2-div.c.in -D BATCH_TILE=2 -o src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c
179tools/xngen src/f32-sigmoid/scalar-lut64-p2-div.c.in -D BATCH_TILE=4 -o src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c
180
181tools/xngen src/f32-sigmoid/scalar-p5-div.c.in -D BATCH_TILE=1 -o src/f32-sigmoid/gen/scalar-p5-div-x1.c
182tools/xngen src/f32-sigmoid/scalar-p5-div.c.in -D BATCH_TILE=2 -o src/f32-sigmoid/gen/scalar-p5-div-x2.c
183tools/xngen src/f32-sigmoid/scalar-p5-div.c.in -D BATCH_TILE=4 -o src/f32-sigmoid/gen/scalar-p5-div-x4.c
184
Marat Dukhan346a9e52019-11-15 09:06:30 -0800185################################## Unit tests #################################
Marat Dukhan1e782c42019-11-21 17:02:40 -0800186tools/generate-vunary-test.py --spec test/f32-sigmoid.yaml --output test/f32-sigmoid.cc