blob: 92206942f2eb27457e6dc58b02d5088e9b1a2784 [file] [log] [blame]
Miao Wange9993472020-02-10 15:00:10 -08001// Copyright (C) 2020 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15OPERATOR_SRCS = [
Miao Wangc0aa11a2020-06-10 13:41:26 -070016 "src/operators/argmax-pooling-nhwc.c",
17 "src/operators/average-pooling-nhwc.c",
18 "src/operators/binary-elementwise-nd.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -070019 "src/operators/channel-shuffle-nc.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070020 "src/operators/constant-pad-nd.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -070021 "src/operators/convolution-nchw.c",
22 "src/operators/convolution-nhwc.c",
23 "src/operators/deconvolution-nhwc.c",
Miao Wang5eea8312020-12-07 09:12:40 -080024 "src/operators/depth-to-space-nchw2nhwc.c",
25 "src/operators/depth-to-space-nhwc.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -070026 "src/operators/fully-connected-nc.c",
27 "src/operators/global-average-pooling-ncw.c",
28 "src/operators/global-average-pooling-nwc.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -070029 "src/operators/leaky-relu-nc.c",
30 "src/operators/max-pooling-nhwc.c",
31 "src/operators/prelu-nc.c",
Miao Wang5eea8312020-12-07 09:12:40 -080032 "src/operators/resize-bilinear-nchw.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -070033 "src/operators/resize-bilinear-nhwc.c",
34 "src/operators/sigmoid-nc.c",
35 "src/operators/softmax-nc.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070036 "src/operators/unary-elementwise-nc.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -070037 "src/operators/unpooling-nhwc.c",
Miao Wange9993472020-02-10 15:00:10 -080038]
39
Miao Wang86f5fbe2020-07-24 11:16:10 -070040LOGGING_SRCS = [
41 "src/operator-strings.c",
42 "src/subgraph-strings.c",
43]
44
45SUBGRAPH_SRCS = [
46 "src/subgraph/abs.c",
47 "src/subgraph/add2.c",
48 "src/subgraph/argmax-pooling-2d.c",
49 "src/subgraph/average-pooling-2d.c",
50 "src/subgraph/bankers-rounding.c",
51 "src/subgraph/ceiling.c",
52 "src/subgraph/clamp.c",
53 "src/subgraph/convolution-2d.c",
54 "src/subgraph/deconvolution-2d.c",
Miao Wang5eea8312020-12-07 09:12:40 -080055 "src/subgraph/depth-to-space.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070056 "src/subgraph/depthwise-convolution-2d.c",
57 "src/subgraph/divide.c",
Miao Wang55abe392021-02-03 14:54:41 -080058 "src/subgraph/elu.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070059 "src/subgraph/floor.c",
Miao Wang5eea8312020-12-07 09:12:40 -080060 "src/subgraph/fully-connected.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070061 "src/subgraph/global-average-pooling-2d.c",
62 "src/subgraph/hardswish.c",
63 "src/subgraph/leaky-relu.c",
64 "src/subgraph/max-pooling-2d.c",
65 "src/subgraph/maximum2.c",
66 "src/subgraph/minimum2.c",
67 "src/subgraph/multiply2.c",
68 "src/subgraph/negate.c",
69 "src/subgraph/prelu.c",
70 "src/subgraph/sigmoid.c",
71 "src/subgraph/softmax.c",
Miao Wang5eea8312020-12-07 09:12:40 -080072 "src/subgraph/square-root.c",
73 "src/subgraph/square.c",
74 "src/subgraph/squared-difference.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070075 "src/subgraph/static-constant-pad.c",
76 "src/subgraph/static-reshape.c",
77 "src/subgraph/static-resize-bilinear-2d.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -070078 "src/subgraph/subtract.c",
79 "src/subgraph/unpooling-2d.c",
80]
81
Miao Wange9993472020-02-10 15:00:10 -080082TABLE_SRCS = [
83 "src/tables/exp2-k-over-64.c",
84 "src/tables/exp2-k-over-2048.c",
Miao Wang5eea8312020-12-07 09:12:40 -080085 "src/tables/exp2minus-k-over-4.c",
86 "src/tables/exp2minus-k-over-8.c",
87 "src/tables/exp2minus-k-over-16.c",
88 "src/tables/exp2minus-k-over-64.c",
89 "src/tables/exp2minus-k-over-2048.c",
Miao Wange9993472020-02-10 15:00:10 -080090]
91
92SCALAR_UKERNELS = [
93 "src/f32-argmaxpool/4x-scalar-c1.c",
94 "src/f32-argmaxpool/9p8x-scalar-c1.c",
95 "src/f32-argmaxpool/9x-scalar-c1.c",
Miao Wang400e4042020-04-17 10:15:59 -070096 "src/f32-avgpool/9p8x-minmax-scalar-c1.c",
97 "src/f32-avgpool/9x-minmax-scalar-c1.c",
Miao Wang2534c2f2020-03-16 11:58:04 -070098 "src/f32-clamp/gen/scalar-x1.c",
99 "src/f32-clamp/gen/scalar-x2.c",
100 "src/f32-clamp/gen/scalar-x4.c",
Miao Wange9993472020-02-10 15:00:10 -0800101 "src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c",
102 "src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700103 "src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c",
Miao Wang400e4042020-04-17 10:15:59 -0700104 "src/f32-dwconv/gen/up1x4-minmax-scalar-acc2.c",
105 "src/f32-dwconv/gen/up1x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800106 "src/f32-dwconv/gen/up1x4-scalar-acc2.c",
107 "src/f32-dwconv/gen/up1x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700108 "src/f32-dwconv/gen/up1x9-minmax-scalar-acc2.c",
109 "src/f32-dwconv/gen/up1x9-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800110 "src/f32-dwconv/gen/up1x9-scalar-acc2.c",
111 "src/f32-dwconv/gen/up1x9-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700112 "src/f32-dwconv/gen/up1x25-minmax-scalar-acc2.c",
113 "src/f32-dwconv/gen/up1x25-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800114 "src/f32-dwconv/gen/up1x25-scalar-acc2.c",
115 "src/f32-dwconv/gen/up1x25-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700116 "src/f32-dwconv/gen/up2x4-minmax-scalar-acc2.c",
117 "src/f32-dwconv/gen/up2x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800118 "src/f32-dwconv/gen/up2x4-scalar-acc2.c",
119 "src/f32-dwconv/gen/up2x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700120 "src/f32-dwconv/gen/up2x9-minmax-scalar-acc2.c",
121 "src/f32-dwconv/gen/up2x9-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800122 "src/f32-dwconv/gen/up2x9-scalar-acc2.c",
123 "src/f32-dwconv/gen/up2x9-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700124 "src/f32-dwconv/gen/up2x25-minmax-scalar-acc2.c",
125 "src/f32-dwconv/gen/up2x25-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800126 "src/f32-dwconv/gen/up2x25-scalar-acc2.c",
127 "src/f32-dwconv/gen/up2x25-scalar.c",
128 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-1x1-acc2.c",
129 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-1x1-acc3.c",
130 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-1x1-acc4.c",
131 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-1x1.c",
132 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-2x1-acc2.c",
133 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-2x1.c",
134 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-3x1.c",
135 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-4x1.c",
136 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-5x1.c",
137 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-6x1.c",
138 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-1x1-acc2.c",
139 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-1x1-acc3.c",
140 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-1x1-acc4.c",
141 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-1x1.c",
142 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-2x1-acc2.c",
143 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-2x1.c",
144 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-3x1.c",
145 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-4x1.c",
146 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-1x1-acc2.c",
147 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-1x1-acc3.c",
148 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-1x1-acc4.c",
149 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-1x1-acc5.c",
150 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-1x1.c",
151 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-2x1-acc2.c",
152 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-2x1-acc3.c",
153 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-2x1.c",
154 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-3x1-acc2.c",
155 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-3x1.c",
156 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-1x1-acc2.c",
157 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-1x1-acc3.c",
158 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-1x1-acc4.c",
159 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-1x1-acc5.c",
160 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-1x1.c",
161 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-2x1-acc2.c",
162 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-2x1-acc3.c",
163 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-2x1.c",
164 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-3x1-acc2.c",
165 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-3x1.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700166 "src/f32-gavgpool-cw/scalar-x1.c",
Miao Wang400e4042020-04-17 10:15:59 -0700167 "src/f32-gavgpool/7p7x-minmax-scalar-c1.c",
168 "src/f32-gavgpool/7x-minmax-scalar-c1.c",
169 "src/f32-gemm/gen-inc/1x4inc-minmax-scalar.c",
170 "src/f32-gemm/gen-inc/2x4inc-minmax-scalar.c",
171 "src/f32-gemm/gen-inc/4x4inc-minmax-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700172 "src/f32-gemm/gen/1x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800173 "src/f32-gemm/gen/1x4-relu-scalar.c",
174 "src/f32-gemm/gen/1x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700175 "src/f32-gemm/gen/2x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800176 "src/f32-gemm/gen/2x4-relu-scalar.c",
177 "src/f32-gemm/gen/2x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700178 "src/f32-gemm/gen/4x2-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800179 "src/f32-gemm/gen/4x2-relu-scalar.c",
180 "src/f32-gemm/gen/4x2-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700181 "src/f32-gemm/gen/4x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800182 "src/f32-gemm/gen/4x4-relu-scalar.c",
183 "src/f32-gemm/gen/4x4-scalar.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700184 "src/f32-hswish/gen/hswish-scalar-x1.c",
185 "src/f32-hswish/gen/hswish-scalar-x2.c",
186 "src/f32-hswish/gen/hswish-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800187 "src/f32-ibilinear-chw/gen/scalar-p1.c",
188 "src/f32-ibilinear-chw/gen/scalar-p2.c",
189 "src/f32-ibilinear-chw/gen/scalar-p4.c",
Miao Wang2534c2f2020-03-16 11:58:04 -0700190 "src/f32-ibilinear/gen/scalar-c1.c",
191 "src/f32-ibilinear/gen/scalar-c2.c",
192 "src/f32-ibilinear/gen/scalar-c4.c",
Miao Wang400e4042020-04-17 10:15:59 -0700193 "src/f32-igemm/gen/1x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800194 "src/f32-igemm/gen/1x4-relu-scalar.c",
195 "src/f32-igemm/gen/1x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700196 "src/f32-igemm/gen/2x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800197 "src/f32-igemm/gen/2x4-relu-scalar.c",
198 "src/f32-igemm/gen/2x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700199 "src/f32-igemm/gen/4x2-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800200 "src/f32-igemm/gen/4x2-relu-scalar.c",
201 "src/f32-igemm/gen/4x2-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700202 "src/f32-igemm/gen/4x4-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800203 "src/f32-igemm/gen/4x4-relu-scalar.c",
204 "src/f32-igemm/gen/4x4-scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700205 "src/f32-maxpool/9p8x-minmax-scalar-c1.c",
206 "src/f32-pavgpool/9p8x-minmax-scalar-c1.c",
207 "src/f32-pavgpool/9x-minmax-scalar-c1.c",
208 "src/f32-ppmm/gen/2x4-minmax-scalar.c",
209 "src/f32-ppmm/gen/3x3-minmax-scalar.c",
210 "src/f32-ppmm/gen/4x2-minmax-scalar.c",
211 "src/f32-ppmm/gen/4x4-minmax-scalar.c",
Miao Wange9993472020-02-10 15:00:10 -0800212 "src/f32-prelu/gen/scalar-2x1.c",
213 "src/f32-prelu/gen/scalar-2x4.c",
214 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x1.c",
Miao Wange9993472020-02-10 15:00:10 -0800215 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800216 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2.c",
Miao Wange9993472020-02-10 15:00:10 -0800217 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc2.c",
218 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800219 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4.c",
Miao Wange9993472020-02-10 15:00:10 -0800220 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x1.c",
Miao Wange9993472020-02-10 15:00:10 -0800221 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800222 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2.c",
Miao Wange9993472020-02-10 15:00:10 -0800223 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc2.c",
224 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800225 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700226 "src/f32-relu/gen/scalar-x1.c",
227 "src/f32-relu/gen/scalar-x2.c",
228 "src/f32-relu/gen/scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800229 "src/f32-relu/gen/scalar-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800230 "src/f32-rmax/scalar.c",
Miao Wange9993472020-02-10 15:00:10 -0800231 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c",
232 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c",
233 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800234 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c",
235 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c",
236 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c",
Miao Wange9993472020-02-10 15:00:10 -0800237 "src/f32-sigmoid/gen/scalar-p5-div-x1.c",
238 "src/f32-sigmoid/gen/scalar-p5-div-x2.c",
239 "src/f32-sigmoid/gen/scalar-p5-div-x4.c",
Miao Wang400e4042020-04-17 10:15:59 -0700240 "src/f32-spmm/gen/1x1-minmax-scalar-pipelined.c",
241 "src/f32-spmm/gen/1x1-minmax-scalar.c",
242 "src/f32-spmm/gen/2x1-minmax-scalar-pipelined.c",
243 "src/f32-spmm/gen/2x1-minmax-scalar.c",
244 "src/f32-spmm/gen/4x1-minmax-scalar-pipelined.c",
245 "src/f32-spmm/gen/4x1-minmax-scalar.c",
246 "src/f32-spmm/gen/8x1-minmax-scalar-pipelined.c",
247 "src/f32-spmm/gen/8x1-minmax-scalar.c",
248 "src/f32-spmm/gen/8x2-minmax-scalar.c",
249 "src/f32-spmm/gen/8x4-minmax-scalar.c",
250 "src/f32-vbinary/gen/vadd-minmax-scalar-x1.c",
251 "src/f32-vbinary/gen/vadd-minmax-scalar-x2.c",
252 "src/f32-vbinary/gen/vadd-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800253 "src/f32-vbinary/gen/vadd-minmax-scalar-x8.c",
254 "src/f32-vbinary/gen/vadd-relu-scalar-x1.c",
255 "src/f32-vbinary/gen/vadd-relu-scalar-x2.c",
256 "src/f32-vbinary/gen/vadd-relu-scalar-x4.c",
257 "src/f32-vbinary/gen/vadd-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700258 "src/f32-vbinary/gen/vadd-scalar-x1.c",
259 "src/f32-vbinary/gen/vadd-scalar-x2.c",
260 "src/f32-vbinary/gen/vadd-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800261 "src/f32-vbinary/gen/vadd-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700262 "src/f32-vbinary/gen/vaddc-minmax-scalar-x1.c",
263 "src/f32-vbinary/gen/vaddc-minmax-scalar-x2.c",
264 "src/f32-vbinary/gen/vaddc-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800265 "src/f32-vbinary/gen/vaddc-minmax-scalar-x8.c",
266 "src/f32-vbinary/gen/vaddc-relu-scalar-x1.c",
267 "src/f32-vbinary/gen/vaddc-relu-scalar-x2.c",
268 "src/f32-vbinary/gen/vaddc-relu-scalar-x4.c",
269 "src/f32-vbinary/gen/vaddc-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700270 "src/f32-vbinary/gen/vaddc-scalar-x1.c",
271 "src/f32-vbinary/gen/vaddc-scalar-x2.c",
272 "src/f32-vbinary/gen/vaddc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800273 "src/f32-vbinary/gen/vaddc-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700274 "src/f32-vbinary/gen/vdiv-minmax-scalar-x1.c",
275 "src/f32-vbinary/gen/vdiv-minmax-scalar-x2.c",
276 "src/f32-vbinary/gen/vdiv-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800277 "src/f32-vbinary/gen/vdiv-minmax-scalar-x8.c",
278 "src/f32-vbinary/gen/vdiv-relu-scalar-x1.c",
279 "src/f32-vbinary/gen/vdiv-relu-scalar-x2.c",
280 "src/f32-vbinary/gen/vdiv-relu-scalar-x4.c",
281 "src/f32-vbinary/gen/vdiv-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700282 "src/f32-vbinary/gen/vdiv-scalar-x1.c",
283 "src/f32-vbinary/gen/vdiv-scalar-x2.c",
284 "src/f32-vbinary/gen/vdiv-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800285 "src/f32-vbinary/gen/vdiv-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700286 "src/f32-vbinary/gen/vdivc-minmax-scalar-x1.c",
287 "src/f32-vbinary/gen/vdivc-minmax-scalar-x2.c",
288 "src/f32-vbinary/gen/vdivc-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800289 "src/f32-vbinary/gen/vdivc-minmax-scalar-x8.c",
290 "src/f32-vbinary/gen/vdivc-relu-scalar-x1.c",
291 "src/f32-vbinary/gen/vdivc-relu-scalar-x2.c",
292 "src/f32-vbinary/gen/vdivc-relu-scalar-x4.c",
293 "src/f32-vbinary/gen/vdivc-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700294 "src/f32-vbinary/gen/vdivc-scalar-x1.c",
295 "src/f32-vbinary/gen/vdivc-scalar-x2.c",
296 "src/f32-vbinary/gen/vdivc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800297 "src/f32-vbinary/gen/vdivc-scalar-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800298 "src/f32-vbinary/gen/vmax-scalar-x1.c",
299 "src/f32-vbinary/gen/vmax-scalar-x2.c",
300 "src/f32-vbinary/gen/vmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800301 "src/f32-vbinary/gen/vmax-scalar-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800302 "src/f32-vbinary/gen/vmaxc-scalar-x1.c",
303 "src/f32-vbinary/gen/vmaxc-scalar-x2.c",
304 "src/f32-vbinary/gen/vmaxc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800305 "src/f32-vbinary/gen/vmaxc-scalar-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800306 "src/f32-vbinary/gen/vmin-scalar-x1.c",
307 "src/f32-vbinary/gen/vmin-scalar-x2.c",
308 "src/f32-vbinary/gen/vmin-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800309 "src/f32-vbinary/gen/vmin-scalar-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800310 "src/f32-vbinary/gen/vminc-scalar-x1.c",
311 "src/f32-vbinary/gen/vminc-scalar-x2.c",
312 "src/f32-vbinary/gen/vminc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800313 "src/f32-vbinary/gen/vminc-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700314 "src/f32-vbinary/gen/vmul-minmax-scalar-x1.c",
315 "src/f32-vbinary/gen/vmul-minmax-scalar-x2.c",
316 "src/f32-vbinary/gen/vmul-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800317 "src/f32-vbinary/gen/vmul-minmax-scalar-x8.c",
318 "src/f32-vbinary/gen/vmul-relu-scalar-x1.c",
319 "src/f32-vbinary/gen/vmul-relu-scalar-x2.c",
320 "src/f32-vbinary/gen/vmul-relu-scalar-x4.c",
321 "src/f32-vbinary/gen/vmul-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700322 "src/f32-vbinary/gen/vmul-scalar-x1.c",
323 "src/f32-vbinary/gen/vmul-scalar-x2.c",
324 "src/f32-vbinary/gen/vmul-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800325 "src/f32-vbinary/gen/vmul-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700326 "src/f32-vbinary/gen/vmulc-minmax-scalar-x1.c",
327 "src/f32-vbinary/gen/vmulc-minmax-scalar-x2.c",
328 "src/f32-vbinary/gen/vmulc-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800329 "src/f32-vbinary/gen/vmulc-minmax-scalar-x8.c",
330 "src/f32-vbinary/gen/vmulc-relu-scalar-x1.c",
331 "src/f32-vbinary/gen/vmulc-relu-scalar-x2.c",
332 "src/f32-vbinary/gen/vmulc-relu-scalar-x4.c",
333 "src/f32-vbinary/gen/vmulc-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700334 "src/f32-vbinary/gen/vmulc-scalar-x1.c",
335 "src/f32-vbinary/gen/vmulc-scalar-x2.c",
336 "src/f32-vbinary/gen/vmulc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800337 "src/f32-vbinary/gen/vmulc-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700338 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x1.c",
339 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x2.c",
340 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800341 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x8.c",
342 "src/f32-vbinary/gen/vrdivc-relu-scalar-x1.c",
343 "src/f32-vbinary/gen/vrdivc-relu-scalar-x2.c",
344 "src/f32-vbinary/gen/vrdivc-relu-scalar-x4.c",
345 "src/f32-vbinary/gen/vrdivc-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700346 "src/f32-vbinary/gen/vrdivc-scalar-x1.c",
347 "src/f32-vbinary/gen/vrdivc-scalar-x2.c",
348 "src/f32-vbinary/gen/vrdivc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800349 "src/f32-vbinary/gen/vrdivc-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700350 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x1.c",
351 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x2.c",
352 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800353 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x8.c",
354 "src/f32-vbinary/gen/vrsubc-relu-scalar-x1.c",
355 "src/f32-vbinary/gen/vrsubc-relu-scalar-x2.c",
356 "src/f32-vbinary/gen/vrsubc-relu-scalar-x4.c",
357 "src/f32-vbinary/gen/vrsubc-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700358 "src/f32-vbinary/gen/vrsubc-scalar-x1.c",
359 "src/f32-vbinary/gen/vrsubc-scalar-x2.c",
360 "src/f32-vbinary/gen/vrsubc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800361 "src/f32-vbinary/gen/vrsubc-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700362 "src/f32-vbinary/gen/vsqrdiff-scalar-x1.c",
363 "src/f32-vbinary/gen/vsqrdiff-scalar-x2.c",
364 "src/f32-vbinary/gen/vsqrdiff-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800365 "src/f32-vbinary/gen/vsqrdiff-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700366 "src/f32-vbinary/gen/vsqrdiffc-scalar-x1.c",
367 "src/f32-vbinary/gen/vsqrdiffc-scalar-x2.c",
368 "src/f32-vbinary/gen/vsqrdiffc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800369 "src/f32-vbinary/gen/vsqrdiffc-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700370 "src/f32-vbinary/gen/vsub-minmax-scalar-x1.c",
371 "src/f32-vbinary/gen/vsub-minmax-scalar-x2.c",
372 "src/f32-vbinary/gen/vsub-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800373 "src/f32-vbinary/gen/vsub-minmax-scalar-x8.c",
374 "src/f32-vbinary/gen/vsub-relu-scalar-x1.c",
375 "src/f32-vbinary/gen/vsub-relu-scalar-x2.c",
376 "src/f32-vbinary/gen/vsub-relu-scalar-x4.c",
377 "src/f32-vbinary/gen/vsub-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700378 "src/f32-vbinary/gen/vsub-scalar-x1.c",
379 "src/f32-vbinary/gen/vsub-scalar-x2.c",
380 "src/f32-vbinary/gen/vsub-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800381 "src/f32-vbinary/gen/vsub-scalar-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700382 "src/f32-vbinary/gen/vsubc-minmax-scalar-x1.c",
383 "src/f32-vbinary/gen/vsubc-minmax-scalar-x2.c",
384 "src/f32-vbinary/gen/vsubc-minmax-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800385 "src/f32-vbinary/gen/vsubc-minmax-scalar-x8.c",
386 "src/f32-vbinary/gen/vsubc-relu-scalar-x1.c",
387 "src/f32-vbinary/gen/vsubc-relu-scalar-x2.c",
388 "src/f32-vbinary/gen/vsubc-relu-scalar-x4.c",
389 "src/f32-vbinary/gen/vsubc-relu-scalar-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700390 "src/f32-vbinary/gen/vsubc-scalar-x1.c",
391 "src/f32-vbinary/gen/vsubc-scalar-x2.c",
392 "src/f32-vbinary/gen/vsubc-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800393 "src/f32-vbinary/gen/vsubc-scalar-x8.c",
Miao Wang55abe392021-02-03 14:54:41 -0800394 "src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x1.c",
395 "src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x2.c",
396 "src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x3.c",
397 "src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x4.c",
398 "src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x5.c",
399 "src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x6.c",
400 "src/f32-velu/gen/velu-scalar-rr2-p6-x1.c",
401 "src/f32-velu/gen/velu-scalar-rr2-p6-x2.c",
402 "src/f32-velu/gen/velu-scalar-rr2-p6-x3.c",
403 "src/f32-velu/gen/velu-scalar-rr2-p6-x4.c",
404 "src/f32-velu/gen/velu-scalar-rr2-p6-x5.c",
405 "src/f32-velu/gen/velu-scalar-rr2-p6-x6.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700406 "src/f32-vlrelu/gen/vlrelu-scalar-x1.c",
407 "src/f32-vlrelu/gen/vlrelu-scalar-x2.c",
408 "src/f32-vlrelu/gen/vlrelu-scalar-x4.c",
Miao Wang400e4042020-04-17 10:15:59 -0700409 "src/f32-vmulcaddc/gen/c1-minmax-scalar-2x.c",
410 "src/f32-vmulcaddc/gen/c2-minmax-scalar-2x.c",
411 "src/f32-vmulcaddc/gen/c4-minmax-scalar-2x.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700412 "src/f32-vrnd/gen/vrndd-scalar-libm-x1.c",
413 "src/f32-vrnd/gen/vrndd-scalar-libm-x2.c",
414 "src/f32-vrnd/gen/vrndd-scalar-libm-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800415 "src/f32-vrnd/gen/vrndne-scalar-libm-x1.c",
416 "src/f32-vrnd/gen/vrndne-scalar-libm-x2.c",
417 "src/f32-vrnd/gen/vrndne-scalar-libm-x4.c",
418 "src/f32-vrnd/gen/vrndu-scalar-libm-x1.c",
419 "src/f32-vrnd/gen/vrndu-scalar-libm-x2.c",
420 "src/f32-vrnd/gen/vrndu-scalar-libm-x4.c",
421 "src/f32-vrnd/gen/vrndz-scalar-libm-x1.c",
422 "src/f32-vrnd/gen/vrndz-scalar-libm-x2.c",
423 "src/f32-vrnd/gen/vrndz-scalar-libm-x4.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700424 "src/f32-vsqrt/gen/scalar-sqrt-x1.c",
425 "src/f32-vsqrt/gen/scalar-sqrt-x2.c",
426 "src/f32-vsqrt/gen/scalar-sqrt-x4.c",
427 "src/f32-vunary/gen/vabs-scalar-x1.c",
428 "src/f32-vunary/gen/vabs-scalar-x2.c",
429 "src/f32-vunary/gen/vabs-scalar-x4.c",
430 "src/f32-vunary/gen/vneg-scalar-x1.c",
431 "src/f32-vunary/gen/vneg-scalar-x2.c",
432 "src/f32-vunary/gen/vneg-scalar-x4.c",
433 "src/f32-vunary/gen/vsqr-scalar-x1.c",
434 "src/f32-vunary/gen/vsqr-scalar-x2.c",
435 "src/f32-vunary/gen/vsqr-scalar-x4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800436 "src/math/expm1minus-scalar-rr2-lut4-p4.c",
437 "src/math/expm1minus-scalar-rr2-lut8-p3.c",
438 "src/math/expm1minus-scalar-rr2-lut8-p4.c",
439 "src/math/expm1minus-scalar-rr2-lut16-p3.c",
440 "src/math/expm1minus-scalar-rr2-lut16-p4.c",
441 "src/math/expm1minus-scalar-rr2-p5.c",
442 "src/math/expm1minus-scalar-rr2-p6.c",
443 "src/math/expminus-scalar-rr2-lut64-p2.c",
444 "src/math/expminus-scalar-rr2-lut2048-p1.c",
445 "src/math/expminus-scalar-rr2-p5.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700446 "src/math/roundd-scalar-addsub.c",
447 "src/math/roundd-scalar-cvt.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700448 "src/math/roundd-scalar-floor.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800449 "src/math/roundne-scalar-addsub.c",
450 "src/math/roundne-scalar-nearbyint.c",
451 "src/math/roundne-scalar-rint.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700452 "src/math/roundu-scalar-addsub.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700453 "src/math/roundu-scalar-ceil.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700454 "src/math/roundu-scalar-cvt.c",
455 "src/math/roundz-scalar-addsub.c",
456 "src/math/roundz-scalar-cvt.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700457 "src/math/roundz-scalar-trunc.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800458 "src/math/sigmoid-scalar-rr2-lut64-p2-div.c",
459 "src/math/sigmoid-scalar-rr2-lut2048-p1-div.c",
460 "src/math/sigmoid-scalar-rr2-p5-div.c",
461 "src/qs8-gemm/gen/8x8c4-minmax-scalar.c",
462 "src/qs8-gemm/gen/12x4c4-minmax-scalar.c",
463 "src/qs8-requantization/fp32-scalar-lrintf.c",
464 "src/qs8-requantization/fp32-scalar-magic.c",
465 "src/qs8-requantization/precise-scalar-signed64.c",
466 "src/qs8-requantization/precise-scalar-unsigned32.c",
467 "src/qs8-requantization/precise-scalar-unsigned64.c",
468 "src/qs8-requantization/q31-scalar.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700469 "src/qu8-avgpool/9p8x-minmax-scalar-c1.c",
470 "src/qu8-avgpool/9x-minmax-scalar-c1.c",
471 "src/qu8-dwconv/up1x9-minmax-scalar.c",
472 "src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
473 "src/qu8-gavgpool/7x-minmax-scalar-c1.c",
474 "src/qu8-gemm/2x2-minmax-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800475 "src/qu8-gemm/gen/8x8c4-minmax-scalar.c",
476 "src/qu8-gemm/gen/12x4c4-minmax-scalar.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700477 "src/qu8-igemm/2x2-minmax-scalar.c",
478 "src/qu8-requantization/fp32-scalar-lrintf.c",
479 "src/qu8-requantization/fp32-scalar-magic.c",
480 "src/qu8-requantization/precise-scalar-signed64.c",
481 "src/qu8-requantization/precise-scalar-unsigned32.c",
482 "src/qu8-requantization/precise-scalar-unsigned64.c",
483 "src/qu8-requantization/q31-scalar.c",
484 "src/qu8-vadd/minmax-scalar.c",
Miao Wang2534c2f2020-03-16 11:58:04 -0700485 "src/u8-clamp/scalar-x4.c",
Miao Wange9993472020-02-10 15:00:10 -0800486 "src/u8-lut32norm/scalar.c",
Miao Wang400e4042020-04-17 10:15:59 -0700487 "src/u8-maxpool/9p8x-minmax-scalar-c1.c",
Miao Wange9993472020-02-10 15:00:10 -0800488 "src/u8-rmax/scalar.c",
Miao Wange9993472020-02-10 15:00:10 -0800489 "src/x8-lut/scalar.c",
490 "src/x8-zip/x2-scalar.c",
491 "src/x8-zip/x3-scalar.c",
492 "src/x8-zip/x4-scalar.c",
493 "src/x8-zip/xm-scalar.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800494 "src/x32-depthtospace2d-chw2hwc/scalar.c",
495 "src/x32-fill/scalar-float.c",
496 "src/x32-fill/scalar-int.c",
497 "src/x32-packx/x2-scalar.c",
498 "src/x32-packx/x3-scalar.c",
499 "src/x32-packx/x4-scalar.c",
500 "src/x32-pad/scalar-float.c",
501 "src/x32-pad/scalar-int.c",
502 "src/x32-unpool/scalar.c",
503 "src/x32-zip/x2-scalar.c",
504 "src/x32-zip/x3-scalar.c",
505 "src/x32-zip/x4-scalar.c",
506 "src/x32-zip/xm-scalar.c",
507 "src/xx-copy/memcpy.c",
Miao Wange9993472020-02-10 15:00:10 -0800508]
509
Miao Wange9993472020-02-10 15:00:10 -0800510// ISA-specific micro-kernels
511NEON_UKERNELS = [
Miao Wang86f5fbe2020-07-24 11:16:10 -0700512 "src/f32-argmaxpool/4x-neon-c4.c",
513 "src/f32-argmaxpool/9p8x-neon-c4.c",
514 "src/f32-argmaxpool/9x-neon-c4.c",
Miao Wang400e4042020-04-17 10:15:59 -0700515 "src/f32-avgpool/9p8x-minmax-neon-c4.c",
516 "src/f32-avgpool/9x-minmax-neon-c4.c",
Miao Wang2534c2f2020-03-16 11:58:04 -0700517 "src/f32-clamp/gen/neon-x4.c",
518 "src/f32-clamp/gen/neon-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700519 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neon-2x1.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700520 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neon-2x2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800521 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neon-2x1.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700522 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neon-2x2.c",
523 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neon-2x1.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700524 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neon-2x2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800525 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neon-2x1.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700526 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neon-2x2.c",
Miao Wang55abe392021-02-03 14:54:41 -0800527 "src/f32-conv-hwc2chw/3x3s2p1c3x4-neon-2x2.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700528 "src/f32-dwconv/gen/up4x4-minmax-neon-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800529 "src/f32-dwconv/gen/up4x4-minmax-neon.c",
Miao Wang400e4042020-04-17 10:15:59 -0700530 "src/f32-dwconv/gen/up4x9-minmax-neon-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800531 "src/f32-dwconv/gen/up4x9-minmax-neon.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700532 "src/f32-dwconv/gen/up4x25-minmax-neon-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800533 "src/f32-dwconv/gen/up4x25-minmax-neon.c",
534 "src/f32-dwconv/gen/up8x4-minmax-neon-acc2.c",
535 "src/f32-dwconv/gen/up8x4-minmax-neon.c",
536 "src/f32-dwconv/gen/up8x9-minmax-neon-acc2.c",
537 "src/f32-dwconv/gen/up8x9-minmax-neon.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700538 "src/f32-dwconv/gen/up8x25-minmax-neon-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800539 "src/f32-dwconv/gen/up8x25-minmax-neon.c",
540 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-1x4-acc2.c",
541 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-1x4-acc3.c",
542 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-1x4-acc4.c",
543 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-1x4.c",
544 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-2x4-acc2.c",
545 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-2x4.c",
546 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-3x4.c",
547 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-4x4.c",
548 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-5x4.c",
549 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neon-6x4.c",
550 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc2.c",
551 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc3.c",
552 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc4.c",
553 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4.c",
554 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-2x4-acc2.c",
555 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-2x4.c",
556 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-3x4.c",
557 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-4x4.c",
558 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-1x4-acc2.c",
559 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-1x4-acc3.c",
560 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-1x4-acc4.c",
561 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-1x4-acc5.c",
562 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-1x4.c",
563 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-2x4-acc2.c",
564 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-2x4-acc3.c",
565 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-2x4.c",
566 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-3x4-acc2.c",
567 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-3x4.c",
568 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-4x4-acc2.c",
569 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-4x4.c",
570 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neon-5x4.c",
571 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-1x4-acc2.c",
572 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-1x4-acc3.c",
573 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-1x4-acc4.c",
574 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-1x4-acc5.c",
575 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-1x4.c",
576 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-2x4-acc2.c",
577 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-2x4-acc3.c",
578 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-2x4.c",
579 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-3x4-acc2.c",
580 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neon-3x4.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700581 "src/f32-gavgpool-cw/neon-x4.c",
Miao Wang400e4042020-04-17 10:15:59 -0700582 "src/f32-gavgpool/7p7x-minmax-neon-c4.c",
583 "src/f32-gavgpool/7x-minmax-neon-c4.c",
Miao Wang400e4042020-04-17 10:15:59 -0700584 "src/f32-gemm/gen-inc/1x8inc-minmax-neon-dup-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800585 "src/f32-gemm/gen-inc/1x8inc-minmax-neon-lane-ld64.c",
586 "src/f32-gemm/gen-inc/1x8s4inc-minmax-neon.c",
Miao Wang400e4042020-04-17 10:15:59 -0700587 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-dup-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800588 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-dup-ld128.c",
589 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-lane-ld64.c",
590 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-lane-ld128.c",
591 "src/f32-gemm/gen-inc/4x8s4inc-minmax-neon.c",
592 "src/f32-gemm/gen-inc/5x8inc-minmax-neon-lane-ld64.c",
Miao Wang400e4042020-04-17 10:15:59 -0700593 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-dup-ld64.c",
594 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-dup-ld128.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800595 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-lane-ld64.c",
596 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-lane-ld128.c",
Miao Wang400e4042020-04-17 10:15:59 -0700597 "src/f32-gemm/gen-inc/6x8s4inc-minmax-neon.c",
598 "src/f32-gemm/gen-inc/8x8s4inc-minmax-neon.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800599 "src/f32-gemm/gen/1x8-minmax-neon-dup-ld64.c",
600 "src/f32-gemm/gen/1x8-minmax-neon-lane-ld64.c",
601 "src/f32-gemm/gen/1x8s4-minmax-neon.c",
602 "src/f32-gemm/gen/4x2-minmax-neon-lane-ld64.c",
603 "src/f32-gemm/gen/4x8-minmax-neon-dup-ld64.c",
604 "src/f32-gemm/gen/4x8-minmax-neon-dup-ld128.c",
605 "src/f32-gemm/gen/4x8-minmax-neon-lane-ld64.c",
606 "src/f32-gemm/gen/4x8-minmax-neon-lane-ld128.c",
607 "src/f32-gemm/gen/4x8s4-minmax-neon.c",
608 "src/f32-gemm/gen/5x8-minmax-neon-lane-ld64.c",
609 "src/f32-gemm/gen/6x8-minmax-neon-dup-ld64.c",
610 "src/f32-gemm/gen/6x8-minmax-neon-dup-ld128.c",
611 "src/f32-gemm/gen/6x8-minmax-neon-lane-ld64.c",
612 "src/f32-gemm/gen/6x8-minmax-neon-lane-ld128.c",
613 "src/f32-gemm/gen/6x8s4-minmax-neon.c",
614 "src/f32-gemm/gen/8x8s4-minmax-neon.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700615 "src/f32-hswish/gen/hswish-neon-x4.c",
616 "src/f32-hswish/gen/hswish-neon-x8.c",
617 "src/f32-hswish/gen/hswish-neon-x16.c",
Miao Wang2534c2f2020-03-16 11:58:04 -0700618 "src/f32-ibilinear/gen/neon-c4.c",
619 "src/f32-ibilinear/gen/neon-c8.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800620 "src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c",
Miao Wang400e4042020-04-17 10:15:59 -0700621 "src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800622 "src/f32-igemm/gen/1x8s4-minmax-neon.c",
Miao Wang400e4042020-04-17 10:15:59 -0700623 "src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c",
624 "src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c",
Miao Wang400e4042020-04-17 10:15:59 -0700625 "src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800626 "src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c",
627 "src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c",
628 "src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c",
629 "src/f32-igemm/gen/4x8s4-minmax-neon.c",
Miao Wang400e4042020-04-17 10:15:59 -0700630 "src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c",
631 "src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800632 "src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c",
633 "src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c",
Miao Wang400e4042020-04-17 10:15:59 -0700634 "src/f32-igemm/gen/6x8s4-minmax-neon.c",
635 "src/f32-igemm/gen/8x8s4-minmax-neon.c",
636 "src/f32-maxpool/9p8x-minmax-neon-c4.c",
637 "src/f32-pavgpool/9p8x-minmax-neon-c4.c",
638 "src/f32-pavgpool/9x-minmax-neon-c4.c",
639 "src/f32-ppmm/gen/4x8-minmax-neon.c",
640 "src/f32-ppmm/gen/8x8-minmax-neon.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700641 "src/f32-prelu/gen/neon-1x4.c",
642 "src/f32-prelu/gen/neon-1x8.c",
643 "src/f32-prelu/gen/neon-1x16.c",
Miao Wange9993472020-02-10 15:00:10 -0800644 "src/f32-prelu/gen/neon-2x4.c",
645 "src/f32-prelu/gen/neon-2x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700646 "src/f32-prelu/gen/neon-2x16.c",
647 "src/f32-prelu/gen/neon-4x4.c",
648 "src/f32-prelu/gen/neon-4x8.c",
649 "src/f32-prelu/gen/neon-4x16.c",
Miao Wange9993472020-02-10 15:00:10 -0800650 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x4.c",
Miao Wange9993472020-02-10 15:00:10 -0800651 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800652 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800653 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc2.c",
654 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800655 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12.c",
Miao Wange9993472020-02-10 15:00:10 -0800656 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc2.c",
657 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800658 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16.c",
Miao Wange9993472020-02-10 15:00:10 -0800659 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc2.c",
660 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800661 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20.c",
662 "src/f32-raddstoreexpminusmax/gen/neon-p5-x4.c",
663 "src/f32-raddstoreexpminusmax/gen/neon-p5-x8-acc2.c",
664 "src/f32-raddstoreexpminusmax/gen/neon-p5-x8.c",
665 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc2.c",
666 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc3.c",
667 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12.c",
668 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc2.c",
669 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc4.c",
670 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16.c",
671 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc2.c",
672 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc5.c",
673 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700674 "src/f32-relu/gen/neon-x4.c",
675 "src/f32-relu/gen/neon-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800676 "src/f32-rmax/neon.c",
Miao Wange9993472020-02-10 15:00:10 -0800677 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x4.c",
678 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x8.c",
679 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x12.c",
680 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x16.c",
681 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x20.c",
682 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x24.c",
683 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x4.c",
684 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x8.c",
685 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x12.c",
686 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x16.c",
687 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x20.c",
688 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x24.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800689 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c",
690 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x8.c",
691 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x12.c",
692 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x16.c",
693 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x20.c",
694 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x24.c",
Miao Wang55abe392021-02-03 14:54:41 -0800695 "src/f32-spmm/gen/4x1-minmax-neon-pipelined.c",
696 "src/f32-spmm/gen/4x1-minmax-neon-x2.c",
697 "src/f32-spmm/gen/4x1-minmax-neon.c",
698 "src/f32-spmm/gen/8x1-minmax-neon-pipelined.c",
699 "src/f32-spmm/gen/8x1-minmax-neon-x2.c",
700 "src/f32-spmm/gen/8x1-minmax-neon.c",
701 "src/f32-spmm/gen/12x1-minmax-neon.c",
702 "src/f32-spmm/gen/16x1-minmax-neon-pipelined.c",
703 "src/f32-spmm/gen/16x1-minmax-neon-x2.c",
704 "src/f32-spmm/gen/16x1-minmax-neon.c",
705 "src/f32-spmm/gen/32x1-minmax-neon-pipelined.c",
706 "src/f32-spmm/gen/32x1-minmax-neon-x2.c",
707 "src/f32-spmm/gen/32x1-minmax-neon.c",
Miao Wang400e4042020-04-17 10:15:59 -0700708 "src/f32-vbinary/gen/vadd-minmax-neon-x4.c",
709 "src/f32-vbinary/gen/vadd-minmax-neon-x8.c",
710 "src/f32-vbinary/gen/vaddc-minmax-neon-x4.c",
711 "src/f32-vbinary/gen/vaddc-minmax-neon-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800712 "src/f32-vbinary/gen/vmax-neon-x4.c",
713 "src/f32-vbinary/gen/vmax-neon-x8.c",
714 "src/f32-vbinary/gen/vmaxc-neon-x4.c",
715 "src/f32-vbinary/gen/vmaxc-neon-x8.c",
716 "src/f32-vbinary/gen/vmin-neon-x4.c",
717 "src/f32-vbinary/gen/vmin-neon-x8.c",
718 "src/f32-vbinary/gen/vminc-neon-x4.c",
719 "src/f32-vbinary/gen/vminc-neon-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700720 "src/f32-vbinary/gen/vmul-minmax-neon-x4.c",
721 "src/f32-vbinary/gen/vmul-minmax-neon-x8.c",
722 "src/f32-vbinary/gen/vmulc-minmax-neon-x4.c",
723 "src/f32-vbinary/gen/vmulc-minmax-neon-x8.c",
724 "src/f32-vbinary/gen/vrsubc-minmax-neon-x4.c",
725 "src/f32-vbinary/gen/vrsubc-minmax-neon-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700726 "src/f32-vbinary/gen/vsqrdiff-neon-x4.c",
727 "src/f32-vbinary/gen/vsqrdiff-neon-x8.c",
728 "src/f32-vbinary/gen/vsqrdiffc-neon-x4.c",
729 "src/f32-vbinary/gen/vsqrdiffc-neon-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700730 "src/f32-vbinary/gen/vsub-minmax-neon-x4.c",
731 "src/f32-vbinary/gen/vsub-minmax-neon-x8.c",
732 "src/f32-vbinary/gen/vsubc-minmax-neon-x4.c",
733 "src/f32-vbinary/gen/vsubc-minmax-neon-x8.c",
Miao Wang55abe392021-02-03 14:54:41 -0800734 "src/f32-velu/gen/velu-neon-rr2-lut16-p3-x4.c",
735 "src/f32-velu/gen/velu-neon-rr2-lut16-p3-x8.c",
736 "src/f32-velu/gen/velu-neon-rr2-lut16-p3-x12.c",
737 "src/f32-velu/gen/velu-neon-rr2-lut16-p3-x16.c",
738 "src/f32-velu/gen/velu-neon-rr2-lut16-p3-x20.c",
739 "src/f32-velu/gen/velu-neon-rr2-lut16-p3-x24.c",
740 "src/f32-velu/gen/velu-neon-rr2-p6-x4.c",
741 "src/f32-velu/gen/velu-neon-rr2-p6-x8.c",
742 "src/f32-velu/gen/velu-neon-rr2-p6-x12.c",
743 "src/f32-velu/gen/velu-neon-rr2-p6-x16.c",
744 "src/f32-velu/gen/velu-neon-rr2-p6-x20.c",
745 "src/f32-velu/gen/velu-neon-rr2-p6-x24.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700746 "src/f32-vlrelu/gen/vlrelu-neon-x4.c",
747 "src/f32-vlrelu/gen/vlrelu-neon-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700748 "src/f32-vmulcaddc/gen/c4-minmax-neon-2x.c",
749 "src/f32-vmulcaddc/gen/c8-minmax-neon-2x.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700750 "src/f32-vrnd/gen/vrndd-neon-x4.c",
751 "src/f32-vrnd/gen/vrndd-neon-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800752 "src/f32-vrnd/gen/vrndne-neon-x4.c",
753 "src/f32-vrnd/gen/vrndne-neon-x8.c",
754 "src/f32-vrnd/gen/vrndu-neon-x4.c",
755 "src/f32-vrnd/gen/vrndu-neon-x8.c",
756 "src/f32-vrnd/gen/vrndz-neon-x4.c",
757 "src/f32-vrnd/gen/vrndz-neon-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700758 "src/f32-vunary/gen/vabs-neon-x4.c",
759 "src/f32-vunary/gen/vabs-neon-x8.c",
760 "src/f32-vunary/gen/vneg-neon-x4.c",
761 "src/f32-vunary/gen/vneg-neon-x8.c",
762 "src/f32-vunary/gen/vsqr-neon-x4.c",
763 "src/f32-vunary/gen/vsqr-neon-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800764 "src/math/expm1minus-neon-rr2-lut16-p3.c",
765 "src/math/expm1minus-neon-rr2-p6.c",
766 "src/math/roundd-neon-addsub.c",
767 "src/math/roundd-neon-cvt.c",
768 "src/math/roundne-neon-addsub.c",
769 "src/math/roundu-neon-addsub.c",
770 "src/math/roundu-neon-cvt.c",
771 "src/math/roundz-neon-addsub.c",
772 "src/math/roundz-neon-cvt.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800773 "src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c",
774 "src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c",
775 "src/math/sigmoid-neon-rr2-p5-nr2recps.c",
776 "src/math/sqrt-neon-nr1rsqrts.c",
777 "src/math/sqrt-neon-nr2rsqrts.c",
778 "src/math/sqrt-neon-nr3rsqrts.c",
779 "src/qs8-dwconv/gen/up8x9-minmax-neon-mul16.c",
780 "src/qs8-dwconv/gen/up16x9-minmax-neon-mul16.c",
781 "src/qs8-dwconv/gen/up24x9-minmax-neon-mul16.c",
782 "src/qs8-dwconv/gen/up32x9-minmax-neon-mul16.c",
783 "src/qs8-gavgpool/gen/7p7x-minmax-neon-c8-acc2.c",
784 "src/qs8-gavgpool/gen/7p7x-minmax-neon-c16-acc2.c",
785 "src/qs8-gavgpool/gen/7p7x-minmax-neon-c24-acc2.c",
786 "src/qs8-gavgpool/gen/7p7x-minmax-neon-c32-acc2.c",
787 "src/qs8-gavgpool/gen/7x-minmax-neon-c8-acc2.c",
788 "src/qs8-gavgpool/gen/7x-minmax-neon-c16-acc2.c",
789 "src/qs8-gavgpool/gen/7x-minmax-neon-c24-acc2.c",
790 "src/qs8-gavgpool/gen/7x-minmax-neon-c32-acc2.c",
791 "src/qs8-gemm/gen/1x8-minmax-neon-mlal-lane.c",
792 "src/qs8-gemm/gen/1x16-minmax-neon-mlal-lane.c",
793 "src/qs8-gemm/gen/2x8-minmax-neon-mlal-lane.c",
794 "src/qs8-gemm/gen/2x16-minmax-neon-mlal-lane.c",
795 "src/qs8-igemm/gen/1x8-minmax-neon-mlal-lane.c",
796 "src/qs8-igemm/gen/1x16-minmax-neon-mlal-lane.c",
797 "src/qs8-igemm/gen/2x8-minmax-neon-mlal-lane.c",
798 "src/qs8-igemm/gen/2x16-minmax-neon-mlal-lane.c",
799 "src/qs8-requantization/fp32-neon.c",
800 "src/qs8-requantization/precise-neon.c",
801 "src/qs8-requantization/q31-neon.c",
802 "src/qs8-vadd/gen/minmax-neon-ld64-x8.c",
803 "src/qs8-vadd/gen/minmax-neon-ld64-x16.c",
804 "src/qs8-vadd/gen/minmax-neon-ld64-x24.c",
805 "src/qs8-vadd/gen/minmax-neon-ld64-x32.c",
806 "src/qs8-vaddc/gen/minmax-neon-ld64-x8.c",
807 "src/qs8-vaddc/gen/minmax-neon-ld64-x16.c",
808 "src/qs8-vaddc/gen/minmax-neon-ld64-x24.c",
809 "src/qs8-vaddc/gen/minmax-neon-ld64-x32.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700810 "src/qu8-avgpool/9p8x-minmax-neon-c8.c",
811 "src/qu8-avgpool/9x-minmax-neon-c8.c",
812 "src/qu8-dwconv/up8x9-minmax-neon.c",
813 "src/qu8-gavgpool/7p7x-minmax-neon-c8.c",
814 "src/qu8-gavgpool/7x-minmax-neon-c8.c",
815 "src/qu8-gemm/4x8-minmax-neon.c",
816 "src/qu8-gemm/8x8-minmax-neon.c",
817 "src/qu8-igemm/4x8-minmax-neon.c",
818 "src/qu8-igemm/8x8-minmax-neon.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700819 "src/qu8-requantization/fp32-neon.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800820 "src/qu8-requantization/precise-neon.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700821 "src/qu8-requantization/q31-neon.c",
822 "src/qu8-vadd/minmax-neon.c",
Miao Wang2534c2f2020-03-16 11:58:04 -0700823 "src/u8-clamp/neon-x64.c",
Miao Wang400e4042020-04-17 10:15:59 -0700824 "src/u8-maxpool/9p8x-minmax-neon-c16.c",
Miao Wange9993472020-02-10 15:00:10 -0800825 "src/u8-rmax/neon.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800826 "src/x8-zip/x2-neon.c",
827 "src/x8-zip/x3-neon.c",
828 "src/x8-zip/x4-neon.c",
829 "src/x8-zip/xm-neon.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700830 "src/x32-fill/neon.c",
Miao Wange9993472020-02-10 15:00:10 -0800831 "src/x32-packx/x4-neon-st4.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700832 "src/x32-pad/neon.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -0700833 "src/x32-unpool/neon.c",
Miao Wange9993472020-02-10 15:00:10 -0800834 "src/x32-zip/x2-neon.c",
835 "src/x32-zip/x3-neon.c",
836 "src/x32-zip/x4-neon.c",
837 "src/x32-zip/xm-neon.c",
Miao Wange9993472020-02-10 15:00:10 -0800838]
839
840NEONFMA_UKERNELS = [
Miao Wang5eea8312020-12-07 09:12:40 -0800841 "src/f32-dwconv/gen/up4x4-minmax-neonfma-acc2.c",
842 "src/f32-dwconv/gen/up4x4-minmax-neonfma.c",
843 "src/f32-dwconv/gen/up4x9-minmax-neonfma-acc2.c",
844 "src/f32-dwconv/gen/up4x9-minmax-neonfma.c",
845 "src/f32-dwconv/gen/up4x25-minmax-neonfma-acc2.c",
846 "src/f32-dwconv/gen/up4x25-minmax-neonfma.c",
847 "src/f32-dwconv/gen/up8x4-minmax-neonfma-acc2.c",
848 "src/f32-dwconv/gen/up8x4-minmax-neonfma.c",
849 "src/f32-dwconv/gen/up8x9-minmax-neonfma-acc2.c",
850 "src/f32-dwconv/gen/up8x9-minmax-neonfma.c",
851 "src/f32-dwconv/gen/up8x25-minmax-neonfma-acc2.c",
852 "src/f32-dwconv/gen/up8x25-minmax-neonfma.c",
853 "src/f32-gemm/gen-inc/1x8inc-minmax-neonfma-dup-ld64.c",
854 "src/f32-gemm/gen-inc/1x8s4inc-minmax-neonfma.c",
855 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-dup-ld64.c",
856 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-dup-ld128.c",
857 "src/f32-gemm/gen-inc/4x8s4inc-minmax-neonfma.c",
858 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-dup-ld64.c",
859 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-dup-ld128.c",
860 "src/f32-gemm/gen-inc/6x8s4inc-minmax-neonfma.c",
861 "src/f32-gemm/gen-inc/8x8s4inc-minmax-neonfma.c",
862 "src/f32-gemm/gen/1x8-minmax-neonfma-dup-ld64.c",
863 "src/f32-gemm/gen/1x8s4-minmax-neonfma.c",
864 "src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld64.c",
865 "src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld128.c",
866 "src/f32-gemm/gen/4x8s4-minmax-neonfma.c",
867 "src/f32-gemm/gen/6x8-minmax-neonfma-dup-ld64.c",
868 "src/f32-gemm/gen/6x8-minmax-neonfma-dup-ld128.c",
869 "src/f32-gemm/gen/6x8s4-minmax-neonfma.c",
870 "src/f32-gemm/gen/8x8s4-minmax-neonfma.c",
Miao Wang2534c2f2020-03-16 11:58:04 -0700871 "src/f32-ibilinear/gen/neonfma-c4.c",
872 "src/f32-ibilinear/gen/neonfma-c8.c",
Miao Wang400e4042020-04-17 10:15:59 -0700873 "src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800874 "src/f32-igemm/gen/1x8s4-minmax-neonfma.c",
Miao Wang400e4042020-04-17 10:15:59 -0700875 "src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800876 "src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c",
877 "src/f32-igemm/gen/4x8s4-minmax-neonfma.c",
Miao Wang400e4042020-04-17 10:15:59 -0700878 "src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c",
879 "src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c",
Miao Wang400e4042020-04-17 10:15:59 -0700880 "src/f32-igemm/gen/6x8s4-minmax-neonfma.c",
881 "src/f32-igemm/gen/8x8s4-minmax-neonfma.c",
Miao Wang400e4042020-04-17 10:15:59 -0700882 "src/f32-ppmm/gen/4x8-minmax-neonfma.c",
883 "src/f32-ppmm/gen/8x8-minmax-neonfma.c",
Miao Wange9993472020-02-10 15:00:10 -0800884 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x4.c",
Miao Wange9993472020-02-10 15:00:10 -0800885 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800886 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8.c",
Miao Wange9993472020-02-10 15:00:10 -0800887 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc2.c",
888 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800889 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12.c",
Miao Wange9993472020-02-10 15:00:10 -0800890 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc2.c",
891 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800892 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16.c",
Miao Wange9993472020-02-10 15:00:10 -0800893 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc2.c",
894 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800895 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20.c",
896 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x4.c",
897 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8-acc2.c",
898 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8.c",
899 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc2.c",
900 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc3.c",
901 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12.c",
902 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc2.c",
903 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc4.c",
904 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16.c",
905 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc2.c",
906 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc5.c",
907 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20.c",
Miao Wange9993472020-02-10 15:00:10 -0800908 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x4.c",
909 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x8.c",
910 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x12.c",
911 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x16.c",
912 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x20.c",
913 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x24.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800914 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x4.c",
915 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x8.c",
916 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x12.c",
917 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x16.c",
918 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x20.c",
919 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x24.c",
Miao Wange9993472020-02-10 15:00:10 -0800920 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x4.c",
921 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x8.c",
922 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x12.c",
923 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x16.c",
924 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x20.c",
925 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x24.c",
Miao Wange9993472020-02-10 15:00:10 -0800926 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c",
927 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c",
928 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c",
929 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c",
930 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c",
931 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800932 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x4.c",
933 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x8.c",
934 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x12.c",
935 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x16.c",
936 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x20.c",
937 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x24.c",
Miao Wange9993472020-02-10 15:00:10 -0800938 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x4.c",
939 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x8.c",
940 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x12.c",
941 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x16.c",
942 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x20.c",
943 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x24.c",
Miao Wang5eea8312020-12-07 09:12:40 -0800944 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x4.c",
945 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x8.c",
946 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x12.c",
947 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x16.c",
948 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x20.c",
949 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x24.c",
950 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x4.c",
951 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x8.c",
952 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x12.c",
953 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x16.c",
954 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x20.c",
955 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x24.c",
956 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x4.c",
957 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x8.c",
958 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x12.c",
959 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x16.c",
960 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x20.c",
961 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x24.c",
Miao Wang55abe392021-02-03 14:54:41 -0800962 "src/f32-spmm/gen/4x1-minmax-neonfma-pipelined.c",
963 "src/f32-spmm/gen/4x1-minmax-neonfma-x2.c",
964 "src/f32-spmm/gen/4x1-minmax-neonfma.c",
965 "src/f32-spmm/gen/8x1-minmax-neonfma-pipelined.c",
966 "src/f32-spmm/gen/8x1-minmax-neonfma-x2.c",
967 "src/f32-spmm/gen/8x1-minmax-neonfma.c",
968 "src/f32-spmm/gen/12x1-minmax-neonfma.c",
969 "src/f32-spmm/gen/16x1-minmax-neonfma-pipelined.c",
970 "src/f32-spmm/gen/16x1-minmax-neonfma-x2.c",
971 "src/f32-spmm/gen/16x1-minmax-neonfma.c",
972 "src/f32-spmm/gen/32x1-minmax-neonfma-pipelined.c",
973 "src/f32-spmm/gen/32x1-minmax-neonfma-x2.c",
974 "src/f32-spmm/gen/32x1-minmax-neonfma.c",
975 "src/f32-velu/gen/velu-neonfma-rr1-lut16-p3-x4.c",
976 "src/f32-velu/gen/velu-neonfma-rr1-lut16-p3-x8.c",
977 "src/f32-velu/gen/velu-neonfma-rr1-lut16-p3-x12.c",
978 "src/f32-velu/gen/velu-neonfma-rr1-lut16-p3-x16.c",
979 "src/f32-velu/gen/velu-neonfma-rr1-lut16-p3-x20.c",
980 "src/f32-velu/gen/velu-neonfma-rr1-lut16-p3-x24.c",
981 "src/f32-velu/gen/velu-neonfma-rr1-p6-x4.c",
982 "src/f32-velu/gen/velu-neonfma-rr1-p6-x8.c",
983 "src/f32-velu/gen/velu-neonfma-rr1-p6-x12.c",
984 "src/f32-velu/gen/velu-neonfma-rr1-p6-x16.c",
985 "src/f32-velu/gen/velu-neonfma-rr1-p6-x20.c",
986 "src/f32-velu/gen/velu-neonfma-rr1-p6-x24.c",
Miao Wang400e4042020-04-17 10:15:59 -0700987 "src/f32-vmulcaddc/gen/c4-minmax-neonfma-2x.c",
988 "src/f32-vmulcaddc/gen/c8-minmax-neonfma-2x.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -0700989 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x4.c",
990 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x8.c",
991 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x12.c",
992 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x16.c",
993 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x20.c",
994 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x24.c",
995 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x28.c",
996 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x32.c",
997 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x36.c",
998 "src/f32-vsqrt/gen/neonfma-nr1rsqrts1fma1adj-x40.c",
999 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x4.c",
1000 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x8.c",
1001 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x12.c",
1002 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x16.c",
1003 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x20.c",
1004 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x24.c",
1005 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x28.c",
1006 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x32.c",
1007 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x36.c",
1008 "src/f32-vsqrt/gen/neonfma-nr2fma1adj-x40.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001009 "src/math/exp-neonfma-rr2-lut64-p2.c",
1010 "src/math/exp-neonfma-rr2-p5.c",
Miao Wang55abe392021-02-03 14:54:41 -08001011 "src/math/expm1minus-neonfma-rr1-lut16-p3.c",
1012 "src/math/expm1minus-neonfma-rr1-p6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001013 "src/math/expminus-neonfma-rr2-lut64-p2.c",
1014 "src/math/expminus-neonfma-rr2-lut2048-p1.c",
1015 "src/math/expminus-neonfma-rr2-p5.c",
Miao Wange9993472020-02-10 15:00:10 -08001016 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr1recps1fma.c",
1017 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2fma.c",
1018 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2recps.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001019 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma.c",
1020 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2fma.c",
1021 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2recps.c",
Miao Wange9993472020-02-10 15:00:10 -08001022 "src/math/sigmoid-neonfma-rr1-p5-nr1recps1fma.c",
1023 "src/math/sigmoid-neonfma-rr1-p5-nr2fma.c",
1024 "src/math/sigmoid-neonfma-rr1-p5-nr2recps.c",
Miao Wange9993472020-02-10 15:00:10 -08001025 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr1recps1fma.c",
1026 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2fma.c",
1027 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2recps.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001028 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr1recps1fma.c",
1029 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2fma.c",
1030 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2recps.c",
Miao Wange9993472020-02-10 15:00:10 -08001031 "src/math/sigmoid-neonfma-rr2-p5-nr1recps1fma.c",
1032 "src/math/sigmoid-neonfma-rr2-p5-nr2fma.c",
1033 "src/math/sigmoid-neonfma-rr2-p5-nr2recps.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001034 "src/math/sqrt-neonfma-nr1fma.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001035 "src/math/sqrt-neonfma-nr1rsqrts1fma1adj.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001036 "src/math/sqrt-neonfma-nr2fma.c",
1037 "src/math/sqrt-neonfma-nr2fma1adj.c",
1038 "src/math/sqrt-neonfma-nr3fma.c",
Miao Wange9993472020-02-10 15:00:10 -08001039]
1040
1041AARCH64_NEONFMA_UKERNELS = [
Miao Wang5eea8312020-12-07 09:12:40 -08001042 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neonfma-2x1.c",
1043 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neonfma-2x2.c",
1044 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neonfma-2x1.c",
1045 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neonfma-2x2.c",
1046 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neonfma-2x1.c",
1047 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neonfma-2x2.c",
1048 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neonfma-2x1.c",
1049 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neonfma-2x2.c",
1050 "src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c",
1051 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-1x4-acc2.c",
1052 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-1x4-acc3.c",
1053 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-1x4-acc4.c",
1054 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-1x4.c",
1055 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-2x4-acc2.c",
1056 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-2x4.c",
1057 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-3x4.c",
1058 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-4x4.c",
1059 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-5x4.c",
1060 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-6x4.c",
1061 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc2.c",
1062 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc3.c",
1063 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc4.c",
1064 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4.c",
1065 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4-acc2.c",
1066 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4.c",
1067 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-3x4.c",
1068 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-4x4.c",
1069 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-1x4-acc2.c",
1070 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-1x4-acc3.c",
1071 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-1x4-acc4.c",
1072 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-1x4-acc5.c",
1073 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-1x4.c",
1074 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-2x4-acc2.c",
1075 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-2x4-acc3.c",
1076 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-2x4.c",
1077 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-3x4-acc2.c",
1078 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-3x4.c",
1079 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-4x4-acc2.c",
1080 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-4x4.c",
1081 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-neonfma-5x4.c",
1082 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-1x4-acc2.c",
1083 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-1x4-acc3.c",
1084 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-1x4-acc4.c",
1085 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-1x4-acc5.c",
1086 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-1x4.c",
1087 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-2x4-acc2.c",
1088 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-2x4-acc3.c",
1089 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-2x4.c",
1090 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-3x4-acc2.c",
1091 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-neonfma-3x4.c",
Miao Wang400e4042020-04-17 10:15:59 -07001092 "src/f32-gemm/gen-inc/1x8inc-minmax-neonfma-lane-ld64.c",
Miao Wang400e4042020-04-17 10:15:59 -07001093 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-lane-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001094 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-lane-ld128.c",
Miao Wang400e4042020-04-17 10:15:59 -07001095 "src/f32-gemm/gen-inc/5x8inc-minmax-neonfma-lane-ld64.c",
1096 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-lane-ld64.c",
1097 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-lane-ld128.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001098 "src/f32-gemm/gen/1x8-minmax-neonfma-lane-ld64.c",
1099 "src/f32-gemm/gen/4x2-minmax-neonfma-lane-ld64.c",
1100 "src/f32-gemm/gen/4x8-minmax-neonfma-lane-ld64.c",
1101 "src/f32-gemm/gen/4x8-minmax-neonfma-lane-ld128.c",
1102 "src/f32-gemm/gen/5x8-minmax-neonfma-lane-ld64.c",
1103 "src/f32-gemm/gen/6x8-minmax-neonfma-lane-ld64.c",
1104 "src/f32-gemm/gen/6x8-minmax-neonfma-lane-ld128.c",
Miao Wang400e4042020-04-17 10:15:59 -07001105 "src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c",
1106 "src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c",
1107 "src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c",
Miao Wang400e4042020-04-17 10:15:59 -07001108 "src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001109 "src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c",
Miao Wang400e4042020-04-17 10:15:59 -07001110 "src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c",
1111 "src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c",
Miao Wange9993472020-02-10 15:00:10 -08001112 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x4.c",
1113 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x8.c",
1114 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x12.c",
1115 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x16.c",
1116 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x20.c",
1117 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x24.c",
1118 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x4.c",
1119 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x8.c",
1120 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x12.c",
1121 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x16.c",
1122 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x20.c",
1123 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x24.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001124 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c",
1125 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c",
1126 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c",
1127 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x16.c",
1128 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x20.c",
1129 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x24.c",
Miao Wang400e4042020-04-17 10:15:59 -07001130 "src/f32-spmm/gen/4x2-minmax-neonfma.c",
1131 "src/f32-spmm/gen/4x4-minmax-neonfma.c",
Miao Wang400e4042020-04-17 10:15:59 -07001132 "src/f32-spmm/gen/8x2-minmax-neonfma.c",
1133 "src/f32-spmm/gen/8x4-minmax-neonfma.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001134 "src/f32-spmm/gen/12x2-minmax-neonfma.c",
1135 "src/f32-spmm/gen/12x4-minmax-neonfma.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001136 "src/f32-spmm/gen/16x2-minmax-neonfma.c",
1137 "src/f32-spmm/gen/16x4-minmax-neonfma.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001138 "src/f32-spmm/gen/32x2-minmax-neonfma.c",
1139 "src/f32-spmm/gen/32x4-minmax-neonfma.c",
1140 "src/f32-vbinary/gen/vdiv-minmax-neon-x4.c",
1141 "src/f32-vbinary/gen/vdiv-minmax-neon-x8.c",
1142 "src/f32-vbinary/gen/vdivc-minmax-neon-x4.c",
1143 "src/f32-vbinary/gen/vdivc-minmax-neon-x8.c",
1144 "src/f32-vbinary/gen/vrdivc-minmax-neon-x4.c",
1145 "src/f32-vbinary/gen/vrdivc-minmax-neon-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001146 "src/f32-vsqrt/gen/neon-sqrt-x4.c",
1147 "src/f32-vsqrt/gen/neon-sqrt-x8.c",
Miao Wange9993472020-02-10 15:00:10 -08001148 "src/math/sigmoid-neonfma-rr1-lut64-p2-div.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001149 "src/math/sigmoid-neonfma-rr1-lut2048-p1-div.c",
Miao Wange9993472020-02-10 15:00:10 -08001150 "src/math/sigmoid-neonfma-rr1-p5-div.c",
Miao Wange9993472020-02-10 15:00:10 -08001151 "src/math/sigmoid-neonfma-rr2-lut64-p2-div.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001152 "src/math/sigmoid-neonfma-rr2-lut2048-p1-div.c",
Miao Wange9993472020-02-10 15:00:10 -08001153 "src/math/sigmoid-neonfma-rr2-p5-div.c",
1154]
1155
Miao Wangc0aa11a2020-06-10 13:41:26 -07001156NEONV8_UKERNELS = [
Miao Wang86f5fbe2020-07-24 11:16:10 -07001157 "src/f32-vrnd/gen/vrndd-neonv8-x4.c",
1158 "src/f32-vrnd/gen/vrndd-neonv8-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001159 "src/f32-vrnd/gen/vrndne-neonv8-x4.c",
1160 "src/f32-vrnd/gen/vrndne-neonv8-x8.c",
1161 "src/f32-vrnd/gen/vrndu-neonv8-x4.c",
1162 "src/f32-vrnd/gen/vrndu-neonv8-x8.c",
1163 "src/f32-vrnd/gen/vrndz-neonv8-x4.c",
1164 "src/f32-vrnd/gen/vrndz-neonv8-x8.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001165 "src/math/roundd-neonv8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001166 "src/math/roundne-neonv8.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001167 "src/math/roundu-neonv8.c",
1168 "src/math/roundz-neonv8.c",
1169]
1170
Miao Wange9993472020-02-10 15:00:10 -08001171AARCH64_NEONFP16ARITH_UKERNELS = [
Miao Wangc0aa11a2020-06-10 13:41:26 -07001172 "src/f16-clamp/gen/neonfp16arith-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001173 "src/f16-clamp/gen/neonfp16arith-x16.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001174 "src/f16-dwconv/gen/up8x4-minmax-neonfp16arith-acc2.c",
1175 "src/f16-dwconv/gen/up8x4-minmax-neonfp16arith.c",
1176 "src/f16-dwconv/gen/up8x9-minmax-neonfp16arith-acc2.c",
1177 "src/f16-dwconv/gen/up8x9-minmax-neonfp16arith.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001178 "src/f16-dwconv/gen/up8x25-minmax-neonfp16arith-acc2.c",
1179 "src/f16-dwconv/gen/up8x25-minmax-neonfp16arith.c",
1180 "src/f16-dwconv/gen/up16x4-minmax-neonfp16arith-acc2.c",
1181 "src/f16-dwconv/gen/up16x4-minmax-neonfp16arith.c",
1182 "src/f16-dwconv/gen/up16x9-minmax-neonfp16arith-acc2.c",
1183 "src/f16-dwconv/gen/up16x9-minmax-neonfp16arith.c",
1184 "src/f16-dwconv/gen/up16x25-minmax-neonfp16arith-acc2.c",
1185 "src/f16-dwconv/gen/up16x25-minmax-neonfp16arith.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001186 "src/f16-gavgpool/7p7x-minmax-neonfp16arith-c8.c",
1187 "src/f16-gavgpool/7x-minmax-neonfp16arith-c8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001188 "src/f16-gemm/gen-inc/1x8inc-minmax-neonfp16arith-ld64.c",
1189 "src/f16-gemm/gen-inc/1x16inc-minmax-neonfp16arith-ld64.c",
1190 "src/f16-gemm/gen-inc/4x8inc-minmax-neonfp16arith-ld64.c",
1191 "src/f16-gemm/gen-inc/4x16inc-minmax-neonfp16arith-ld64.c",
1192 "src/f16-gemm/gen-inc/6x8inc-minmax-neonfp16arith-ld64.c",
1193 "src/f16-gemm/gen-inc/6x16inc-minmax-neonfp16arith-ld64.c",
1194 "src/f16-gemm/gen-inc/8x8inc-minmax-neonfp16arith-ld64.c",
1195 "src/f16-gemm/gen-inc/8x16inc-minmax-neonfp16arith-ld64.c",
1196 "src/f16-gemm/gen/1x8-minmax-neonfp16arith-ld64.c",
1197 "src/f16-gemm/gen/1x16-minmax-neonfp16arith-ld64.c",
1198 "src/f16-gemm/gen/4x8-minmax-neonfp16arith-ld64.c",
1199 "src/f16-gemm/gen/4x16-minmax-neonfp16arith-ld64.c",
1200 "src/f16-gemm/gen/6x8-minmax-neonfp16arith-ld64.c",
1201 "src/f16-gemm/gen/6x16-minmax-neonfp16arith-ld64.c",
1202 "src/f16-gemm/gen/8x8-minmax-neonfp16arith-ld64.c",
1203 "src/f16-gemm/gen/8x16-minmax-neonfp16arith-ld64.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001204 "src/f16-hswish/gen/hswish-neonfp16arith-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001205 "src/f16-hswish/gen/hswish-neonfp16arith-x16.c",
1206 "src/f16-igemm/gen/1x8-minmax-neonfp16arith-ld64.c",
1207 "src/f16-igemm/gen/1x16-minmax-neonfp16arith-ld64.c",
1208 "src/f16-igemm/gen/4x8-minmax-neonfp16arith-ld64.c",
1209 "src/f16-igemm/gen/4x16-minmax-neonfp16arith-ld64.c",
1210 "src/f16-igemm/gen/6x8-minmax-neonfp16arith-ld64.c",
1211 "src/f16-igemm/gen/6x16-minmax-neonfp16arith-ld64.c",
1212 "src/f16-igemm/gen/8x8-minmax-neonfp16arith-ld64.c",
1213 "src/f16-igemm/gen/8x16-minmax-neonfp16arith-ld64.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001214 "src/f16-prelu/gen/neonfp16arith-2x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001215 "src/f16-prelu/gen/neonfp16arith-2x16.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001216 "src/f16-relu/gen/neonfp16arith-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001217 "src/f16-relu/gen/neonfp16arith-x16.c",
1218 "src/f16-spmm/gen/8x1-minmax-neonfp16arith-x2.c",
Miao Wang400e4042020-04-17 10:15:59 -07001219 "src/f16-spmm/gen/8x1-minmax-neonfp16arith.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001220 "src/f16-spmm/gen/16x1-minmax-neonfp16arith-x2.c",
Miao Wang400e4042020-04-17 10:15:59 -07001221 "src/f16-spmm/gen/16x1-minmax-neonfp16arith.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001222 "src/f16-spmm/gen/24x1-minmax-neonfp16arith-x2.c",
Miao Wang400e4042020-04-17 10:15:59 -07001223 "src/f16-spmm/gen/24x1-minmax-neonfp16arith.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001224 "src/f16-spmm/gen/32x1-minmax-neonfp16arith-x2.c",
Miao Wang400e4042020-04-17 10:15:59 -07001225 "src/f16-spmm/gen/32x1-minmax-neonfp16arith.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001226 "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x8.c",
1227 "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x16.c",
1228 "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x8.c",
1229 "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x16.c",
1230 "src/f16-vbinary/gen/vdiv-minmax-neonfp16arith-x8.c",
1231 "src/f16-vbinary/gen/vdiv-minmax-neonfp16arith-x16.c",
1232 "src/f16-vbinary/gen/vdivc-minmax-neonfp16arith-x8.c",
1233 "src/f16-vbinary/gen/vdivc-minmax-neonfp16arith-x16.c",
1234 "src/f16-vbinary/gen/vmax-neonfp16arith-x8.c",
1235 "src/f16-vbinary/gen/vmax-neonfp16arith-x16.c",
1236 "src/f16-vbinary/gen/vmaxc-neonfp16arith-x8.c",
1237 "src/f16-vbinary/gen/vmaxc-neonfp16arith-x16.c",
1238 "src/f16-vbinary/gen/vmin-neonfp16arith-x8.c",
1239 "src/f16-vbinary/gen/vmin-neonfp16arith-x16.c",
1240 "src/f16-vbinary/gen/vminc-neonfp16arith-x8.c",
1241 "src/f16-vbinary/gen/vminc-neonfp16arith-x16.c",
1242 "src/f16-vbinary/gen/vmul-minmax-neonfp16arith-x8.c",
1243 "src/f16-vbinary/gen/vmul-minmax-neonfp16arith-x16.c",
1244 "src/f16-vbinary/gen/vmulc-minmax-neonfp16arith-x8.c",
1245 "src/f16-vbinary/gen/vmulc-minmax-neonfp16arith-x16.c",
1246 "src/f16-vbinary/gen/vrdivc-minmax-neonfp16arith-x8.c",
1247 "src/f16-vbinary/gen/vrdivc-minmax-neonfp16arith-x16.c",
1248 "src/f16-vbinary/gen/vrsubc-minmax-neonfp16arith-x8.c",
1249 "src/f16-vbinary/gen/vrsubc-minmax-neonfp16arith-x16.c",
1250 "src/f16-vbinary/gen/vsub-minmax-neonfp16arith-x8.c",
1251 "src/f16-vbinary/gen/vsub-minmax-neonfp16arith-x16.c",
1252 "src/f16-vbinary/gen/vsubc-minmax-neonfp16arith-x8.c",
1253 "src/f16-vbinary/gen/vsubc-minmax-neonfp16arith-x16.c",
1254 "src/f16-vmulcaddc/gen/c8-minmax-neonfp16arith-2x.c",
1255 "src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c",
1256]
1257
1258NEONDOT_UKERNELS = [
1259 "src/qs8-gemm/gen/1x8c4-minmax-neondot.c",
1260 "src/qs8-gemm/gen/1x16c4-minmax-neondot.c",
1261 "src/qs8-gemm/gen/4x8c4-minmax-neondot.c",
1262 "src/qs8-gemm/gen/4x16c4-minmax-neondot.c",
1263 "src/qs8-gemm/gen/6x8c4-minmax-neondot.c",
1264 "src/qs8-gemm/gen/6x16c4-minmax-neondot.c",
1265 "src/qs8-gemm/gen/8x8c4-minmax-neondot.c",
1266 "src/qs8-gemm/gen/8x16c4-minmax-neondot.c",
1267 "src/qs8-gemm/gen/12x8c4-minmax-neondot.c",
1268 "src/qs8-igemm/gen/1x8c4-minmax-neondot.c",
1269 "src/qs8-igemm/gen/1x16c4-minmax-neondot.c",
1270 "src/qs8-igemm/gen/4x8c4-minmax-neondot.c",
1271 "src/qs8-igemm/gen/4x16c4-minmax-neondot.c",
1272 "src/qs8-igemm/gen/6x8c4-minmax-neondot.c",
1273 "src/qs8-igemm/gen/6x16c4-minmax-neondot.c",
1274 "src/qs8-igemm/gen/8x8c4-minmax-neondot.c",
1275 "src/qs8-igemm/gen/8x16c4-minmax-neondot.c",
1276 "src/qs8-igemm/gen/12x8c4-minmax-neondot.c",
Miao Wange9993472020-02-10 15:00:10 -08001277]
1278
1279SSE_UKERNELS = [
Miao Wang400e4042020-04-17 10:15:59 -07001280 "src/f32-avgpool/9p8x-minmax-sse-c4.c",
1281 "src/f32-avgpool/9x-minmax-sse-c4.c",
Miao Wang2534c2f2020-03-16 11:58:04 -07001282 "src/f32-clamp/gen/sse-x4.c",
1283 "src/f32-clamp/gen/sse-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001284 "src/f32-conv-hwc2chw/3x3s2p1c3x4-sse-1x1.c",
1285 "src/f32-conv-hwc2chw/3x3s2p1c3x4-sse-2x2.c",
Miao Wang400e4042020-04-17 10:15:59 -07001286 "src/f32-dwconv/gen/up4x4-minmax-sse-acc2.c",
1287 "src/f32-dwconv/gen/up4x4-minmax-sse.c",
1288 "src/f32-dwconv/gen/up4x9-minmax-sse-acc2.c",
1289 "src/f32-dwconv/gen/up4x9-minmax-sse.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001290 "src/f32-dwconv/gen/up4x25-minmax-sse-acc2.c",
1291 "src/f32-dwconv/gen/up4x25-minmax-sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001292 "src/f32-dwconv/gen/up8x4-minmax-sse-acc2.c",
1293 "src/f32-dwconv/gen/up8x4-minmax-sse.c",
1294 "src/f32-dwconv/gen/up8x9-minmax-sse-acc2.c",
1295 "src/f32-dwconv/gen/up8x9-minmax-sse.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001296 "src/f32-dwconv/gen/up8x25-minmax-sse-acc2.c",
1297 "src/f32-dwconv/gen/up8x25-minmax-sse.c",
1298 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-1x4-acc2.c",
1299 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-1x4-acc3.c",
1300 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-1x4-acc4.c",
1301 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-1x4.c",
1302 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-2x4-acc2.c",
1303 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-2x4.c",
1304 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-3x4.c",
1305 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-4x4.c",
1306 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-5x4.c",
1307 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-6x4.c",
1308 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-1x4-acc2.c",
1309 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-1x4-acc3.c",
1310 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-1x4-acc4.c",
1311 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-1x4.c",
1312 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-2x4-acc2.c",
1313 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-2x4.c",
1314 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-3x4.c",
1315 "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-sse-4x4.c",
1316 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-1x4-acc2.c",
1317 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-1x4-acc3.c",
1318 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-1x4-acc4.c",
1319 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-1x4-acc5.c",
1320 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-1x4.c",
1321 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-2x4-acc2.c",
1322 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-2x4-acc3.c",
1323 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-2x4.c",
1324 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-3x4-acc2.c",
1325 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-3x4.c",
1326 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-4x4-acc2.c",
1327 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-4x4.c",
1328 "src/f32-dwconv2d-chw/gen/5x5p2-minmax-sse-5x4.c",
1329 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-1x4-acc2.c",
1330 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-1x4-acc3.c",
1331 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-1x4-acc4.c",
1332 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-1x4-acc5.c",
1333 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-1x4.c",
1334 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-2x4-acc2.c",
1335 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-2x4-acc3.c",
1336 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-2x4.c",
1337 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-3x4-acc2.c",
1338 "src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-sse-3x4.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001339 "src/f32-gavgpool-cw/sse-x4.c",
Miao Wang400e4042020-04-17 10:15:59 -07001340 "src/f32-gavgpool/7p7x-minmax-sse-c4.c",
1341 "src/f32-gavgpool/7x-minmax-sse-c4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001342 "src/f32-gemm/gen-inc/1x8inc-minmax-sse-dup.c",
1343 "src/f32-gemm/gen-inc/1x8inc-minmax-sse-load1.c",
1344 "src/f32-gemm/gen-inc/1x8s4inc-minmax-sse.c",
Miao Wang55abe392021-02-03 14:54:41 -08001345 "src/f32-gemm/gen-inc/3x8inc-minmax-sse-dup.c",
1346 "src/f32-gemm/gen-inc/3x8inc-minmax-sse-load1.c",
1347 "src/f32-gemm/gen-inc/3x8s4inc-minmax-sse.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001348 "src/f32-gemm/gen-inc/4x8inc-minmax-sse-dup.c",
1349 "src/f32-gemm/gen-inc/4x8inc-minmax-sse-load1.c",
1350 "src/f32-gemm/gen-inc/4x8s4inc-minmax-sse.c",
Miao Wang55abe392021-02-03 14:54:41 -08001351 "src/f32-gemm/gen-inc/5x8inc-minmax-sse-dup.c",
1352 "src/f32-gemm/gen-inc/5x8inc-minmax-sse-load1.c",
1353 "src/f32-gemm/gen-inc/5x8s4inc-minmax-sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001354 "src/f32-gemm/gen/1x8-minmax-sse-dup.c",
1355 "src/f32-gemm/gen/1x8-minmax-sse-load1.c",
1356 "src/f32-gemm/gen/1x8s4-minmax-sse.c",
Miao Wang55abe392021-02-03 14:54:41 -08001357 "src/f32-gemm/gen/3x8-minmax-sse-dup.c",
1358 "src/f32-gemm/gen/3x8-minmax-sse-load1.c",
1359 "src/f32-gemm/gen/3x8s4-minmax-sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001360 "src/f32-gemm/gen/4x2c4-minmax-sse.c",
1361 "src/f32-gemm/gen/4x8-minmax-sse-dup.c",
1362 "src/f32-gemm/gen/4x8-minmax-sse-load1.c",
1363 "src/f32-gemm/gen/4x8s4-minmax-sse.c",
Miao Wang55abe392021-02-03 14:54:41 -08001364 "src/f32-gemm/gen/5x8-minmax-sse-dup.c",
1365 "src/f32-gemm/gen/5x8-minmax-sse-load1.c",
1366 "src/f32-gemm/gen/5x8s4-minmax-sse.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001367 "src/f32-hswish/gen/hswish-sse-x4.c",
1368 "src/f32-hswish/gen/hswish-sse-x8.c",
Miao Wang2534c2f2020-03-16 11:58:04 -07001369 "src/f32-ibilinear/gen/sse-c4.c",
1370 "src/f32-ibilinear/gen/sse-c8.c",
Miao Wang400e4042020-04-17 10:15:59 -07001371 "src/f32-igemm/gen/1x8-minmax-sse-dup.c",
1372 "src/f32-igemm/gen/1x8-minmax-sse-load1.c",
1373 "src/f32-igemm/gen/1x8s4-minmax-sse.c",
Miao Wang55abe392021-02-03 14:54:41 -08001374 "src/f32-igemm/gen/3x8-minmax-sse-dup.c",
1375 "src/f32-igemm/gen/3x8-minmax-sse-load1.c",
1376 "src/f32-igemm/gen/3x8s4-minmax-sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001377 "src/f32-igemm/gen/4x2c4-minmax-sse.c",
1378 "src/f32-igemm/gen/4x8-minmax-sse-dup.c",
1379 "src/f32-igemm/gen/4x8-minmax-sse-load1.c",
1380 "src/f32-igemm/gen/4x8s4-minmax-sse.c",
Miao Wang55abe392021-02-03 14:54:41 -08001381 "src/f32-igemm/gen/5x8-minmax-sse-dup.c",
1382 "src/f32-igemm/gen/5x8-minmax-sse-load1.c",
1383 "src/f32-igemm/gen/5x8s4-minmax-sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001384 "src/f32-maxpool/9p8x-minmax-sse-c4.c",
1385 "src/f32-pavgpool/9p8x-minmax-sse-c4.c",
1386 "src/f32-pavgpool/9x-minmax-sse-c4.c",
1387 "src/f32-ppmm/gen/4x8-minmax-sse.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001388 "src/f32-prelu/gen/sse-2x4.c",
1389 "src/f32-prelu/gen/sse-2x8.c",
1390 "src/f32-relu/gen/sse-x4.c",
1391 "src/f32-relu/gen/sse-x8.c",
Miao Wange9993472020-02-10 15:00:10 -08001392 "src/f32-rmax/sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001393 "src/f32-spmm/gen/4x1-minmax-sse.c",
1394 "src/f32-spmm/gen/8x1-minmax-sse.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001395 "src/f32-spmm/gen/16x1-minmax-sse.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001396 "src/f32-spmm/gen/32x1-minmax-sse.c",
Miao Wang400e4042020-04-17 10:15:59 -07001397 "src/f32-vbinary/gen/vadd-minmax-sse-x4.c",
1398 "src/f32-vbinary/gen/vadd-minmax-sse-x8.c",
1399 "src/f32-vbinary/gen/vaddc-minmax-sse-x4.c",
1400 "src/f32-vbinary/gen/vaddc-minmax-sse-x8.c",
1401 "src/f32-vbinary/gen/vdiv-minmax-sse-x4.c",
1402 "src/f32-vbinary/gen/vdiv-minmax-sse-x8.c",
1403 "src/f32-vbinary/gen/vdivc-minmax-sse-x4.c",
1404 "src/f32-vbinary/gen/vdivc-minmax-sse-x8.c",
Miao Wange9993472020-02-10 15:00:10 -08001405 "src/f32-vbinary/gen/vmax-sse-x4.c",
1406 "src/f32-vbinary/gen/vmax-sse-x8.c",
1407 "src/f32-vbinary/gen/vmaxc-sse-x4.c",
1408 "src/f32-vbinary/gen/vmaxc-sse-x8.c",
1409 "src/f32-vbinary/gen/vmin-sse-x4.c",
1410 "src/f32-vbinary/gen/vmin-sse-x8.c",
1411 "src/f32-vbinary/gen/vminc-sse-x4.c",
1412 "src/f32-vbinary/gen/vminc-sse-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -07001413 "src/f32-vbinary/gen/vmul-minmax-sse-x4.c",
1414 "src/f32-vbinary/gen/vmul-minmax-sse-x8.c",
1415 "src/f32-vbinary/gen/vmulc-minmax-sse-x4.c",
1416 "src/f32-vbinary/gen/vmulc-minmax-sse-x8.c",
1417 "src/f32-vbinary/gen/vrdivc-minmax-sse-x4.c",
1418 "src/f32-vbinary/gen/vrdivc-minmax-sse-x8.c",
1419 "src/f32-vbinary/gen/vrsubc-minmax-sse-x4.c",
1420 "src/f32-vbinary/gen/vrsubc-minmax-sse-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001421 "src/f32-vbinary/gen/vsqrdiff-sse-x4.c",
1422 "src/f32-vbinary/gen/vsqrdiff-sse-x8.c",
1423 "src/f32-vbinary/gen/vsqrdiffc-sse-x4.c",
1424 "src/f32-vbinary/gen/vsqrdiffc-sse-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -07001425 "src/f32-vbinary/gen/vsub-minmax-sse-x4.c",
1426 "src/f32-vbinary/gen/vsub-minmax-sse-x8.c",
1427 "src/f32-vbinary/gen/vsubc-minmax-sse-x4.c",
1428 "src/f32-vbinary/gen/vsubc-minmax-sse-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001429 "src/f32-vlrelu/gen/vlrelu-sse-x4.c",
1430 "src/f32-vlrelu/gen/vlrelu-sse-x8.c",
Miao Wang400e4042020-04-17 10:15:59 -07001431 "src/f32-vmulcaddc/gen/c4-minmax-sse-2x.c",
1432 "src/f32-vmulcaddc/gen/c8-minmax-sse-2x.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001433 "src/f32-vsqrt/gen/sse-sqrt-x4.c",
1434 "src/f32-vsqrt/gen/sse-sqrt-x8.c",
1435 "src/f32-vunary/gen/vabs-sse-x4.c",
1436 "src/f32-vunary/gen/vabs-sse-x8.c",
1437 "src/f32-vunary/gen/vneg-sse-x4.c",
1438 "src/f32-vunary/gen/vneg-sse-x8.c",
1439 "src/f32-vunary/gen/vsqr-sse-x4.c",
1440 "src/f32-vunary/gen/vsqr-sse-x8.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001441 "src/math/roundd-sse-addsub.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001442 "src/math/roundne-sse-addsub.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001443 "src/math/roundu-sse-addsub.c",
1444 "src/math/roundz-sse-addsub.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001445 "src/math/sqrt-sse-hh1mac.c",
1446 "src/math/sqrt-sse-nr1mac.c",
1447 "src/math/sqrt-sse-nr2mac.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001448 "src/x32-fill/sse.c",
1449 "src/x32-packx/x4-sse.c",
1450 "src/x32-pad/sse.c",
Miao Wange9993472020-02-10 15:00:10 -08001451]
1452
1453SSE2_UKERNELS = [
Miao Wange9993472020-02-10 15:00:10 -08001454 "src/f32-argmaxpool/4x-sse2-c4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001455 "src/f32-argmaxpool/9p8x-sse2-c4.c",
Miao Wange9993472020-02-10 15:00:10 -08001456 "src/f32-argmaxpool/9x-sse2-c4.c",
Miao Wang55abe392021-02-03 14:54:41 -08001457 "src/f32-gemm/gen-inc/1x8inc-minmax-sse2-dup.c",
1458 "src/f32-gemm/gen-inc/3x8inc-minmax-sse2-dup.c",
1459 "src/f32-gemm/gen-inc/4x8inc-minmax-sse2-dup.c",
1460 "src/f32-gemm/gen-inc/5x8inc-minmax-sse2-dup.c",
1461 "src/f32-gemm/gen/1x8-minmax-sse2-dup.c",
1462 "src/f32-gemm/gen/3x8-minmax-sse2-dup.c",
1463 "src/f32-gemm/gen/4x8-minmax-sse2-dup.c",
1464 "src/f32-gemm/gen/5x8-minmax-sse2-dup.c",
1465 "src/f32-igemm/gen/1x8-minmax-sse2-dup.c",
1466 "src/f32-igemm/gen/3x8-minmax-sse2-dup.c",
1467 "src/f32-igemm/gen/4x8-minmax-sse2-dup.c",
1468 "src/f32-igemm/gen/5x8-minmax-sse2-dup.c",
Miao Wange9993472020-02-10 15:00:10 -08001469 "src/f32-prelu/gen/sse2-2x4.c",
1470 "src/f32-prelu/gen/sse2-2x8.c",
1471 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x4.c",
Miao Wange9993472020-02-10 15:00:10 -08001472 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8-acc2.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001473 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8.c",
Miao Wange9993472020-02-10 15:00:10 -08001474 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc2.c",
1475 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001476 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12.c",
Miao Wange9993472020-02-10 15:00:10 -08001477 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc2.c",
1478 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001479 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16.c",
Miao Wange9993472020-02-10 15:00:10 -08001480 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc2.c",
1481 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001482 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20.c",
1483 "src/f32-sigmoid/gen/sse2-lut64-p2-div-x4.c",
1484 "src/f32-sigmoid/gen/sse2-lut64-p2-div-x8.c",
1485 "src/f32-sigmoid/gen/sse2-lut64-p2-div-x12.c",
1486 "src/f32-sigmoid/gen/sse2-lut64-p2-div-x16.c",
1487 "src/f32-sigmoid/gen/sse2-lut64-p2-div-x20.c",
1488 "src/f32-sigmoid/gen/sse2-lut64-p2-div-x24.c",
Miao Wange9993472020-02-10 15:00:10 -08001489 "src/f32-sigmoid/gen/sse2-p5-div-x4.c",
1490 "src/f32-sigmoid/gen/sse2-p5-div-x8.c",
1491 "src/f32-sigmoid/gen/sse2-p5-div-x12.c",
1492 "src/f32-sigmoid/gen/sse2-p5-div-x16.c",
1493 "src/f32-sigmoid/gen/sse2-p5-div-x20.c",
1494 "src/f32-sigmoid/gen/sse2-p5-div-x24.c",
Miao Wang55abe392021-02-03 14:54:41 -08001495 "src/f32-velu/gen/velu-sse2-rr2-lut16-p3-x4.c",
1496 "src/f32-velu/gen/velu-sse2-rr2-lut16-p3-x8.c",
1497 "src/f32-velu/gen/velu-sse2-rr2-lut16-p3-x12.c",
1498 "src/f32-velu/gen/velu-sse2-rr2-lut16-p3-x16.c",
1499 "src/f32-velu/gen/velu-sse2-rr2-lut16-p3-x20.c",
1500 "src/f32-velu/gen/velu-sse2-rr2-lut16-p3-x24.c",
1501 "src/f32-velu/gen/velu-sse2-rr2-p6-x4.c",
1502 "src/f32-velu/gen/velu-sse2-rr2-p6-x8.c",
1503 "src/f32-velu/gen/velu-sse2-rr2-p6-x12.c",
1504 "src/f32-velu/gen/velu-sse2-rr2-p6-x16.c",
1505 "src/f32-velu/gen/velu-sse2-rr2-p6-x20.c",
1506 "src/f32-velu/gen/velu-sse2-rr2-p6-x24.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001507 "src/f32-vlrelu/gen/vlrelu-sse2-x4.c",
1508 "src/f32-vlrelu/gen/vlrelu-sse2-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001509 "src/f32-vrnd/gen/vrndd-sse2-x4.c",
1510 "src/f32-vrnd/gen/vrndd-sse2-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001511 "src/f32-vrnd/gen/vrndne-sse2-x4.c",
1512 "src/f32-vrnd/gen/vrndne-sse2-x8.c",
1513 "src/f32-vrnd/gen/vrndu-sse2-x4.c",
1514 "src/f32-vrnd/gen/vrndu-sse2-x8.c",
1515 "src/f32-vrnd/gen/vrndz-sse2-x4.c",
1516 "src/f32-vrnd/gen/vrndz-sse2-x8.c",
1517 "src/math/exp-sse2-rr2-lut64-p2.c",
1518 "src/math/exp-sse2-rr2-p5.c",
1519 "src/math/expm1minus-sse2-rr2-lut16-p3.c",
1520 "src/math/expm1minus-sse2-rr2-p6.c",
1521 "src/math/expminus-sse2-rr2-p5.c",
1522 "src/math/roundd-sse2-cvt.c",
1523 "src/math/roundne-sse2-cvt.c",
1524 "src/math/roundu-sse2-cvt.c",
1525 "src/math/roundz-sse2-cvt.c",
1526 "src/math/sigmoid-sse2-rr2-lut64-p2-div.c",
1527 "src/math/sigmoid-sse2-rr2-lut64-p2-nr1.c",
1528 "src/math/sigmoid-sse2-rr2-lut64-p2-nr2.c",
1529 "src/math/sigmoid-sse2-rr2-p5-div.c",
1530 "src/math/sigmoid-sse2-rr2-p5-nr1.c",
1531 "src/math/sigmoid-sse2-rr2-p5-nr2.c",
1532 "src/qs8-dwconv/gen/up8x9-minmax-sse2-mul16.c",
1533 "src/qs8-dwconv/gen/up16x9-minmax-sse2-mul16.c",
1534 "src/qs8-dwconv/gen/up24x9-minmax-sse2-mul16.c",
1535 "src/qs8-gavgpool/gen/7p7x-minmax-sse2-c8-acc2.c",
1536 "src/qs8-gavgpool/gen/7p7x-minmax-sse2-c16-acc2.c",
1537 "src/qs8-gavgpool/gen/7p7x-minmax-sse2-c24-acc2.c",
1538 "src/qs8-gavgpool/gen/7x-minmax-sse2-c8-acc2.c",
1539 "src/qs8-gavgpool/gen/7x-minmax-sse2-c16-acc2.c",
1540 "src/qs8-gavgpool/gen/7x-minmax-sse2-c24-acc2.c",
1541 "src/qs8-gemm/gen/1x4c2-minmax-sse2-ld64.c",
1542 "src/qs8-gemm/gen/1x4c2-minmax-sse2-ld128.c",
1543 "src/qs8-gemm/gen/1x4c2-xw-minmax-sse2.c",
1544 "src/qs8-gemm/gen/1x4c8-minmax-sse2-ld64.c",
1545 "src/qs8-gemm/gen/1x4c8-minmax-sse2-ld128.c",
1546 "src/qs8-gemm/gen/1x4c8-xw-minmax-sse2.c",
1547 "src/qs8-gemm/gen/2x4c8-minmax-sse2-ld64.c",
1548 "src/qs8-gemm/gen/2x4c8-minmax-sse2-ld128.c",
1549 "src/qs8-gemm/gen/2x4c8-xw-minmax-sse2.c",
1550 "src/qs8-gemm/gen/3x4c8-minmax-sse2-ld64.c",
1551 "src/qs8-gemm/gen/3x4c8-minmax-sse2-ld128.c",
1552 "src/qs8-gemm/gen/3x4c8-xw-minmax-sse2.c",
1553 "src/qs8-gemm/gen/4x4c2-minmax-sse2-ld64.c",
1554 "src/qs8-gemm/gen/4x4c2-minmax-sse2-ld128.c",
1555 "src/qs8-gemm/gen/4x4c2-xw-minmax-sse2.c",
1556 "src/qs8-igemm/gen/1x4c2-minmax-sse2-ld64.c",
1557 "src/qs8-igemm/gen/1x4c2-minmax-sse2-ld128.c",
1558 "src/qs8-igemm/gen/1x4c8-minmax-sse2-ld64.c",
1559 "src/qs8-igemm/gen/1x4c8-minmax-sse2-ld128.c",
1560 "src/qs8-igemm/gen/2x4c8-minmax-sse2-ld64.c",
1561 "src/qs8-igemm/gen/2x4c8-minmax-sse2-ld128.c",
1562 "src/qs8-igemm/gen/3x4c8-minmax-sse2-ld64.c",
1563 "src/qs8-igemm/gen/3x4c8-minmax-sse2-ld128.c",
1564 "src/qs8-igemm/gen/4x4c2-minmax-sse2-ld64.c",
1565 "src/qs8-igemm/gen/4x4c2-minmax-sse2-ld128.c",
1566 "src/qs8-requantization/fp32-sse2.c",
1567 "src/qs8-requantization/precise-sse2.c",
1568 "src/qs8-requantization/q31-sse2.c",
1569 "src/qs8-vadd/gen/minmax-sse2-mul16-ld64-x8.c",
1570 "src/qs8-vadd/gen/minmax-sse2-mul16-ld64-x16.c",
1571 "src/qs8-vadd/gen/minmax-sse2-mul16-ld64-x24.c",
1572 "src/qs8-vadd/gen/minmax-sse2-mul16-ld64-x32.c",
1573 "src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x8.c",
1574 "src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x16.c",
1575 "src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x24.c",
1576 "src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x32.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001577 "src/qu8-avgpool/9p8x-minmax-sse2-c8.c",
1578 "src/qu8-avgpool/9x-minmax-sse2-c8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001579 "src/qu8-dwconv/up8x9-minmax-sse2.c",
1580 "src/qu8-gavgpool/7p7x-minmax-sse2-c8.c",
1581 "src/qu8-gavgpool/7x-minmax-sse2-c8.c",
1582 "src/qu8-gemm/2x4c8-minmax-sse2.c",
1583 "src/qu8-gemm/4x4c2-minmax-sse2.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001584 "src/qu8-igemm/4x4c2-minmax-sse2.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001585 "src/qu8-requantization/fp32-sse2.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001586 "src/qu8-requantization/precise-sse2.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001587 "src/qu8-requantization/q31-sse2.c",
1588 "src/qu8-vadd/minmax-sse2.c",
Miao Wang2534c2f2020-03-16 11:58:04 -07001589 "src/u8-clamp/sse2-x64.c",
Miao Wang400e4042020-04-17 10:15:59 -07001590 "src/u8-maxpool/9p8x-minmax-sse2-c16.c",
Miao Wange9993472020-02-10 15:00:10 -08001591 "src/u8-rmax/sse2.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001592 "src/x8-zip/x2-sse2.c",
1593 "src/x8-zip/x3-sse2.c",
1594 "src/x8-zip/x4-sse2.c",
1595 "src/x8-zip/xm-sse2.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001596 "src/x32-unpool/sse2.c",
Miao Wange9993472020-02-10 15:00:10 -08001597 "src/x32-zip/x2-sse2.c",
1598 "src/x32-zip/x3-sse2.c",
1599 "src/x32-zip/x4-sse2.c",
1600 "src/x32-zip/xm-sse2.c",
Miao Wang2534c2f2020-03-16 11:58:04 -07001601]
1602
1603SSSE3_UKERNELS = [
Miao Wang5eea8312020-12-07 09:12:40 -08001604 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-1x4-acc2.c",
1605 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-1x4-acc3.c",
1606 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-1x4-acc4.c",
1607 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-1x4.c",
1608 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-2x4-acc2.c",
1609 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-2x4.c",
1610 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-3x4.c",
1611 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-4x4.c",
1612 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-5x4.c",
1613 "src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-6x4.c",
1614 "src/qs8-dwconv/gen/up8x9-minmax-ssse3-mul16.c",
1615 "src/qs8-dwconv/gen/up16x9-minmax-ssse3-mul16.c",
1616 "src/qs8-dwconv/gen/up24x9-minmax-ssse3-mul16.c",
1617 "src/qs8-gavgpool/gen/7p7x-minmax-ssse3-c8-acc2.c",
1618 "src/qs8-gavgpool/gen/7p7x-minmax-ssse3-c16-acc2.c",
1619 "src/qs8-gavgpool/gen/7p7x-minmax-ssse3-c24-acc2.c",
1620 "src/qs8-gavgpool/gen/7x-minmax-ssse3-c8-acc2.c",
1621 "src/qs8-gavgpool/gen/7x-minmax-ssse3-c16-acc2.c",
1622 "src/qs8-gavgpool/gen/7x-minmax-ssse3-c24-acc2.c",
1623 "src/qs8-gemm/gen/1x4c2-minmax-ssse3-ld64.c",
1624 "src/qs8-gemm/gen/1x4c2-minmax-ssse3-ld128.c",
1625 "src/qs8-gemm/gen/1x4c2-xw-minmax-ssse3.c",
1626 "src/qs8-gemm/gen/1x4c8-minmax-ssse3-ld64.c",
1627 "src/qs8-gemm/gen/1x4c8-minmax-ssse3-ld128.c",
1628 "src/qs8-gemm/gen/1x4c8-xw-minmax-ssse3.c",
1629 "src/qs8-gemm/gen/2x4c8-minmax-ssse3-ld64.c",
1630 "src/qs8-gemm/gen/2x4c8-minmax-ssse3-ld128.c",
1631 "src/qs8-gemm/gen/2x4c8-xw-minmax-ssse3.c",
1632 "src/qs8-gemm/gen/3x4c8-minmax-ssse3-ld64.c",
1633 "src/qs8-gemm/gen/3x4c8-minmax-ssse3-ld128.c",
1634 "src/qs8-gemm/gen/3x4c8-xw-minmax-ssse3.c",
1635 "src/qs8-gemm/gen/4x4c2-minmax-ssse3-ld64.c",
1636 "src/qs8-gemm/gen/4x4c2-minmax-ssse3-ld128.c",
1637 "src/qs8-gemm/gen/4x4c2-xw-minmax-ssse3.c",
1638 "src/qs8-igemm/gen/1x4c2-minmax-ssse3-ld64.c",
1639 "src/qs8-igemm/gen/1x4c2-minmax-ssse3-ld128.c",
1640 "src/qs8-igemm/gen/1x4c8-minmax-ssse3-ld64.c",
1641 "src/qs8-igemm/gen/1x4c8-minmax-ssse3-ld128.c",
1642 "src/qs8-igemm/gen/2x4c8-minmax-ssse3-ld64.c",
1643 "src/qs8-igemm/gen/2x4c8-minmax-ssse3-ld128.c",
1644 "src/qs8-igemm/gen/3x4c8-minmax-ssse3-ld64.c",
1645 "src/qs8-igemm/gen/3x4c8-minmax-ssse3-ld128.c",
1646 "src/qs8-igemm/gen/4x4c2-minmax-ssse3-ld64.c",
1647 "src/qs8-igemm/gen/4x4c2-minmax-ssse3-ld128.c",
1648 "src/qs8-requantization/precise-ssse3.c",
1649 "src/qs8-requantization/q31-ssse3.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001650 "src/qu8-requantization/precise-ssse3.c",
1651 "src/qu8-requantization/q31-ssse3.c",
Miao Wange9993472020-02-10 15:00:10 -08001652]
1653
1654SSE41_UKERNELS = [
1655 "src/f32-prelu/gen/sse41-2x4.c",
1656 "src/f32-prelu/gen/sse41-2x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001657 "src/f32-sigmoid/gen/sse41-lut64-p2-div-x4.c",
1658 "src/f32-sigmoid/gen/sse41-lut64-p2-div-x8.c",
1659 "src/f32-sigmoid/gen/sse41-lut64-p2-div-x12.c",
1660 "src/f32-sigmoid/gen/sse41-lut64-p2-div-x16.c",
1661 "src/f32-sigmoid/gen/sse41-lut64-p2-div-x20.c",
1662 "src/f32-sigmoid/gen/sse41-lut64-p2-div-x24.c",
Miao Wange9993472020-02-10 15:00:10 -08001663 "src/f32-sigmoid/gen/sse41-p5-div-x4.c",
1664 "src/f32-sigmoid/gen/sse41-p5-div-x8.c",
1665 "src/f32-sigmoid/gen/sse41-p5-div-x12.c",
1666 "src/f32-sigmoid/gen/sse41-p5-div-x16.c",
1667 "src/f32-sigmoid/gen/sse41-p5-div-x20.c",
1668 "src/f32-sigmoid/gen/sse41-p5-div-x24.c",
Miao Wang55abe392021-02-03 14:54:41 -08001669 "src/f32-velu/gen/velu-sse41-rr2-lut16-p3-x4.c",
1670 "src/f32-velu/gen/velu-sse41-rr2-lut16-p3-x8.c",
1671 "src/f32-velu/gen/velu-sse41-rr2-lut16-p3-x12.c",
1672 "src/f32-velu/gen/velu-sse41-rr2-lut16-p3-x16.c",
1673 "src/f32-velu/gen/velu-sse41-rr2-lut16-p3-x20.c",
1674 "src/f32-velu/gen/velu-sse41-rr2-lut16-p3-x24.c",
1675 "src/f32-velu/gen/velu-sse41-rr2-p6-x4.c",
1676 "src/f32-velu/gen/velu-sse41-rr2-p6-x8.c",
1677 "src/f32-velu/gen/velu-sse41-rr2-p6-x12.c",
1678 "src/f32-velu/gen/velu-sse41-rr2-p6-x16.c",
1679 "src/f32-velu/gen/velu-sse41-rr2-p6-x20.c",
1680 "src/f32-velu/gen/velu-sse41-rr2-p6-x24.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001681 "src/f32-vlrelu/gen/vlrelu-sse41-x4.c",
1682 "src/f32-vlrelu/gen/vlrelu-sse41-x8.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001683 "src/f32-vrnd/gen/vrndd-sse41-x4.c",
1684 "src/f32-vrnd/gen/vrndd-sse41-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001685 "src/f32-vrnd/gen/vrndne-sse41-x4.c",
1686 "src/f32-vrnd/gen/vrndne-sse41-x8.c",
1687 "src/f32-vrnd/gen/vrndu-sse41-x4.c",
1688 "src/f32-vrnd/gen/vrndu-sse41-x8.c",
1689 "src/f32-vrnd/gen/vrndz-sse41-x4.c",
1690 "src/f32-vrnd/gen/vrndz-sse41-x8.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001691 "src/math/roundd-sse41.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001692 "src/math/roundne-sse41.c",
Miao Wangc0aa11a2020-06-10 13:41:26 -07001693 "src/math/roundu-sse41.c",
1694 "src/math/roundz-sse41.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001695 "src/qs8-dwconv/gen/up8x9-minmax-sse41-mul16.c",
1696 "src/qs8-dwconv/gen/up16x9-minmax-sse41-mul16.c",
1697 "src/qs8-dwconv/gen/up24x9-minmax-sse41-mul16.c",
1698 "src/qs8-gavgpool/gen/7p7x-minmax-sse41-c8-acc2.c",
1699 "src/qs8-gavgpool/gen/7p7x-minmax-sse41-c16-acc2.c",
1700 "src/qs8-gavgpool/gen/7p7x-minmax-sse41-c24-acc2.c",
1701 "src/qs8-gavgpool/gen/7x-minmax-sse41-c8-acc2.c",
1702 "src/qs8-gavgpool/gen/7x-minmax-sse41-c16-acc2.c",
1703 "src/qs8-gavgpool/gen/7x-minmax-sse41-c24-acc2.c",
1704 "src/qs8-gemm/gen/1x4c2-minmax-sse41-ld64.c",
1705 "src/qs8-gemm/gen/1x4c2-minmax-sse41-ld128.c",
1706 "src/qs8-gemm/gen/1x4c2-xw-minmax-sse41.c",
1707 "src/qs8-gemm/gen/1x4c8-minmax-sse41-ld64.c",
1708 "src/qs8-gemm/gen/1x4c8-minmax-sse41-ld128.c",
1709 "src/qs8-gemm/gen/1x4c8-xw-minmax-sse41.c",
1710 "src/qs8-gemm/gen/2x4c8-minmax-sse41-ld64.c",
1711 "src/qs8-gemm/gen/2x4c8-minmax-sse41-ld128.c",
1712 "src/qs8-gemm/gen/2x4c8-xw-minmax-sse41.c",
1713 "src/qs8-gemm/gen/3x4c8-minmax-sse41-ld64.c",
1714 "src/qs8-gemm/gen/3x4c8-minmax-sse41-ld128.c",
1715 "src/qs8-gemm/gen/3x4c8-xw-minmax-sse41.c",
1716 "src/qs8-gemm/gen/4x4c2-minmax-sse41-ld64.c",
1717 "src/qs8-gemm/gen/4x4c2-minmax-sse41-ld128.c",
1718 "src/qs8-gemm/gen/4x4c2-xw-minmax-sse41.c",
1719 "src/qs8-igemm/gen/1x4c2-minmax-sse41-ld64.c",
1720 "src/qs8-igemm/gen/1x4c2-minmax-sse41-ld128.c",
1721 "src/qs8-igemm/gen/1x4c8-minmax-sse41-ld64.c",
1722 "src/qs8-igemm/gen/1x4c8-minmax-sse41-ld128.c",
1723 "src/qs8-igemm/gen/2x4c8-minmax-sse41-ld64.c",
1724 "src/qs8-igemm/gen/2x4c8-minmax-sse41-ld128.c",
1725 "src/qs8-igemm/gen/3x4c8-minmax-sse41-ld64.c",
1726 "src/qs8-igemm/gen/3x4c8-minmax-sse41-ld128.c",
1727 "src/qs8-igemm/gen/4x4c2-minmax-sse41-ld64.c",
1728 "src/qs8-igemm/gen/4x4c2-minmax-sse41-ld128.c",
1729 "src/qs8-requantization/fp32-sse4.c",
1730 "src/qs8-requantization/precise-sse4.c",
1731 "src/qs8-requantization/q31-sse4.c",
1732 "src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x8.c",
1733 "src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x16.c",
1734 "src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x24.c",
1735 "src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x32.c",
1736 "src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x8.c",
1737 "src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x16.c",
1738 "src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x24.c",
1739 "src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x32.c",
1740 "src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x8.c",
1741 "src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x16.c",
1742 "src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x24.c",
1743 "src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c",
1744 "src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x8.c",
1745 "src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x16.c",
1746 "src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x24.c",
1747 "src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x32.c",
1748 "src/qu8-requantization/precise-sse4.c",
1749 "src/qu8-requantization/q31-sse4.c",
Miao Wange9993472020-02-10 15:00:10 -08001750]
1751
1752AVX_UKERNELS = [
Miao Wang2534c2f2020-03-16 11:58:04 -07001753 "src/f32-clamp/gen/avx-x8.c",
1754 "src/f32-clamp/gen/avx-x16.c",
Miao Wang400e4042020-04-17 10:15:59 -07001755 "src/f32-dwconv/gen/up8x4-minmax-avx-acc2.c",
1756 "src/f32-dwconv/gen/up8x4-minmax-avx.c",
Miao Wang400e4042020-04-17 10:15:59 -07001757 "src/f32-dwconv/gen/up8x9-minmax-avx-acc2.c",
1758 "src/f32-dwconv/gen/up8x9-minmax-avx.c",
Miao Wang400e4042020-04-17 10:15:59 -07001759 "src/f32-dwconv/gen/up8x25-minmax-avx-acc2.c",
1760 "src/f32-dwconv/gen/up8x25-minmax-avx.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001761 "src/f32-dwconv/gen/up16x4-minmax-avx-acc2.c",
1762 "src/f32-dwconv/gen/up16x4-minmax-avx.c",
1763 "src/f32-dwconv/gen/up16x9-minmax-avx-acc2.c",
1764 "src/f32-dwconv/gen/up16x9-minmax-avx.c",
1765 "src/f32-dwconv/gen/up16x25-minmax-avx-acc2.c",
1766 "src/f32-dwconv/gen/up16x25-minmax-avx.c",
Miao Wang400e4042020-04-17 10:15:59 -07001767 "src/f32-gemm/gen-inc/1x8inc-minmax-avx-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001768 "src/f32-gemm/gen-inc/1x16inc-minmax-avx-broadcast.c",
1769 "src/f32-gemm/gen-inc/3x16inc-minmax-avx-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001770 "src/f32-gemm/gen-inc/4x8inc-minmax-avx-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001771 "src/f32-gemm/gen-inc/4x16inc-minmax-avx-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001772 "src/f32-gemm/gen-inc/5x8inc-minmax-avx-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001773 "src/f32-gemm/gen-inc/5x16inc-minmax-avx-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001774 "src/f32-gemm/gen-inc/6x8inc-minmax-avx-broadcast.c",
1775 "src/f32-gemm/gen-inc/7x8inc-minmax-avx-broadcast.c",
1776 "src/f32-gemm/gen/1x8-minmax-avx-broadcast.c",
1777 "src/f32-gemm/gen/1x16-minmax-avx-broadcast.c",
1778 "src/f32-gemm/gen/3x16-minmax-avx-broadcast.c",
1779 "src/f32-gemm/gen/4x8-minmax-avx-broadcast.c",
1780 "src/f32-gemm/gen/4x16-minmax-avx-broadcast.c",
1781 "src/f32-gemm/gen/5x8-minmax-avx-broadcast.c",
1782 "src/f32-gemm/gen/5x16-minmax-avx-broadcast.c",
1783 "src/f32-gemm/gen/6x8-minmax-avx-broadcast.c",
1784 "src/f32-gemm/gen/7x8-minmax-avx-broadcast.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001785 "src/f32-hswish/gen/hswish-avx-x8.c",
1786 "src/f32-hswish/gen/hswish-avx-x16.c",
Miao Wang400e4042020-04-17 10:15:59 -07001787 "src/f32-igemm/gen/1x8-minmax-avx-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001788 "src/f32-igemm/gen/1x16-minmax-avx-broadcast.c",
1789 "src/f32-igemm/gen/3x16-minmax-avx-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001790 "src/f32-igemm/gen/4x8-minmax-avx-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001791 "src/f32-igemm/gen/4x16-minmax-avx-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001792 "src/f32-igemm/gen/5x8-minmax-avx-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001793 "src/f32-igemm/gen/5x16-minmax-avx-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001794 "src/f32-igemm/gen/6x8-minmax-avx-broadcast.c",
1795 "src/f32-igemm/gen/7x8-minmax-avx-broadcast.c",
Miao Wang2534c2f2020-03-16 11:58:04 -07001796 "src/f32-prelu/gen/avx-2x8.c",
1797 "src/f32-prelu/gen/avx-2x16.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001798 "src/f32-relu/gen/avx-x8.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001799 "src/f32-relu/gen/avx-x16.c",
Miao Wange9993472020-02-10 15:00:10 -08001800 "src/f32-rmax/avx.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001801 "src/f32-sigmoid/gen/avx-rr2-p5-div-x8.c",
1802 "src/f32-sigmoid/gen/avx-rr2-p5-div-x16.c",
1803 "src/f32-sigmoid/gen/avx-rr2-p5-div-x24.c",
1804 "src/f32-sigmoid/gen/avx-rr2-p5-div-x32.c",
1805 "src/f32-sigmoid/gen/avx-rr2-p5-div-x40.c",
1806 "src/f32-sigmoid/gen/avx-rr2-p5-div-x48.c",
1807 "src/f32-sigmoid/gen/avx-rr2-p5-div-x56.c",
1808 "src/f32-sigmoid/gen/avx-rr2-p5-div-x64.c",
1809 "src/f32-sigmoid/gen/avx-rr2-p5-div-x72.c",
1810 "src/f32-sigmoid/gen/avx-rr2-p5-div-x80.c",
1811 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x8.c",
1812 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x16.c",
1813 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x24.c",
1814 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x32.c",
1815 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x40.c",
1816 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x48.c",
1817 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x56.c",
1818 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x64.c",
1819 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x72.c",
1820 "src/f32-sigmoid/gen/avx-rr2-p5-nr2-x80.c",
Miao Wang400e4042020-04-17 10:15:59 -07001821 "src/f32-vbinary/gen/vadd-minmax-avx-x8.c",
1822 "src/f32-vbinary/gen/vadd-minmax-avx-x16.c",
1823 "src/f32-vbinary/gen/vaddc-minmax-avx-x8.c",
1824 "src/f32-vbinary/gen/vaddc-minmax-avx-x16.c",
1825 "src/f32-vbinary/gen/vdiv-minmax-avx-x8.c",
1826 "src/f32-vbinary/gen/vdiv-minmax-avx-x16.c",
1827 "src/f32-vbinary/gen/vdivc-minmax-avx-x8.c",
1828 "src/f32-vbinary/gen/vdivc-minmax-avx-x16.c",
Miao Wange9993472020-02-10 15:00:10 -08001829 "src/f32-vbinary/gen/vmax-avx-x8.c",
1830 "src/f32-vbinary/gen/vmax-avx-x16.c",
1831 "src/f32-vbinary/gen/vmaxc-avx-x8.c",
1832 "src/f32-vbinary/gen/vmaxc-avx-x16.c",
1833 "src/f32-vbinary/gen/vmin-avx-x8.c",
1834 "src/f32-vbinary/gen/vmin-avx-x16.c",
1835 "src/f32-vbinary/gen/vminc-avx-x8.c",
1836 "src/f32-vbinary/gen/vminc-avx-x16.c",
Miao Wang400e4042020-04-17 10:15:59 -07001837 "src/f32-vbinary/gen/vmul-minmax-avx-x8.c",
1838 "src/f32-vbinary/gen/vmul-minmax-avx-x16.c",
1839 "src/f32-vbinary/gen/vmulc-minmax-avx-x8.c",
1840 "src/f32-vbinary/gen/vmulc-minmax-avx-x16.c",
1841 "src/f32-vbinary/gen/vrdivc-minmax-avx-x8.c",
1842 "src/f32-vbinary/gen/vrdivc-minmax-avx-x16.c",
1843 "src/f32-vbinary/gen/vrsubc-minmax-avx-x8.c",
1844 "src/f32-vbinary/gen/vrsubc-minmax-avx-x16.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001845 "src/f32-vbinary/gen/vsqrdiff-avx-x8.c",
1846 "src/f32-vbinary/gen/vsqrdiff-avx-x16.c",
1847 "src/f32-vbinary/gen/vsqrdiffc-avx-x8.c",
1848 "src/f32-vbinary/gen/vsqrdiffc-avx-x16.c",
Miao Wang400e4042020-04-17 10:15:59 -07001849 "src/f32-vbinary/gen/vsub-minmax-avx-x8.c",
1850 "src/f32-vbinary/gen/vsub-minmax-avx-x16.c",
1851 "src/f32-vbinary/gen/vsubc-minmax-avx-x8.c",
1852 "src/f32-vbinary/gen/vsubc-minmax-avx-x16.c",
Miao Wang55abe392021-02-03 14:54:41 -08001853 "src/f32-velu/gen/velu-avx-rr2-lut4-p4-perm-x8.c",
1854 "src/f32-velu/gen/velu-avx-rr2-lut4-p4-perm-x16.c",
1855 "src/f32-velu/gen/velu-avx-rr2-lut4-p4-perm-x24.c",
1856 "src/f32-velu/gen/velu-avx-rr2-lut4-p4-perm-x32.c",
1857 "src/f32-velu/gen/velu-avx-rr2-lut4-p4-perm-x40.c",
1858 "src/f32-velu/gen/velu-avx-rr2-lut4-p4-perm-x48.c",
1859 "src/f32-velu/gen/velu-avx-rr2-lut16-p3-x8.c",
1860 "src/f32-velu/gen/velu-avx-rr2-lut16-p3-x16.c",
1861 "src/f32-velu/gen/velu-avx-rr2-lut16-p3-x24.c",
1862 "src/f32-velu/gen/velu-avx-rr2-lut16-p3-x32.c",
1863 "src/f32-velu/gen/velu-avx-rr2-lut16-p3-x40.c",
1864 "src/f32-velu/gen/velu-avx-rr2-lut16-p3-x48.c",
1865 "src/f32-velu/gen/velu-avx-rr2-p6-x8.c",
1866 "src/f32-velu/gen/velu-avx-rr2-p6-x16.c",
1867 "src/f32-velu/gen/velu-avx-rr2-p6-x24.c",
1868 "src/f32-velu/gen/velu-avx-rr2-p6-x32.c",
1869 "src/f32-velu/gen/velu-avx-rr2-p6-x40.c",
1870 "src/f32-velu/gen/velu-avx-rr2-p6-x48.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001871 "src/f32-vlrelu/gen/vlrelu-avx-x8.c",
1872 "src/f32-vlrelu/gen/vlrelu-avx-x16.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001873 "src/f32-vrnd/gen/vrndd-avx-x8.c",
1874 "src/f32-vrnd/gen/vrndd-avx-x16.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001875 "src/f32-vrnd/gen/vrndne-avx-x8.c",
1876 "src/f32-vrnd/gen/vrndne-avx-x16.c",
1877 "src/f32-vrnd/gen/vrndu-avx-x8.c",
1878 "src/f32-vrnd/gen/vrndu-avx-x16.c",
1879 "src/f32-vrnd/gen/vrndz-avx-x8.c",
1880 "src/f32-vrnd/gen/vrndz-avx-x16.c",
1881 "src/f32-vscale/avx-x32.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001882 "src/f32-vsqrt/gen/avx-sqrt-x8.c",
1883 "src/f32-vsqrt/gen/avx-sqrt-x16.c",
1884 "src/f32-vunary/gen/vabs-avx-x8.c",
1885 "src/f32-vunary/gen/vabs-avx-x16.c",
1886 "src/f32-vunary/gen/vneg-avx-x8.c",
1887 "src/f32-vunary/gen/vneg-avx-x16.c",
1888 "src/f32-vunary/gen/vsqr-avx-x8.c",
1889 "src/f32-vunary/gen/vsqr-avx-x16.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001890 "src/math/exp-avx-rr2-p5.c",
1891 "src/math/expm1minus-avx-rr2-lut4-p4-perm.c",
1892 "src/math/expm1minus-avx-rr2-lut16-p3.c",
1893 "src/math/expm1minus-avx-rr2-p6.c",
1894 "src/math/sigmoid-avx-rr2-lut64-p2-div.c",
1895 "src/math/sigmoid-avx-rr2-p5-div.c",
1896 "src/math/sigmoid-avx-rr2-p5-nr1.c",
1897 "src/math/sigmoid-avx-rr2-p5-nr2.c",
1898]
1899
1900XOP_UKERNELS = [
1901 "src/qs8-gemm/gen/1x4c2-minmax-xop-ld64.c",
1902 "src/qs8-gemm/gen/1x4c2-minmax-xop-ld128.c",
1903 "src/qs8-gemm/gen/1x4c2-xw-minmax-xop.c",
1904 "src/qs8-gemm/gen/1x4c8-minmax-xop-ld64.c",
1905 "src/qs8-gemm/gen/1x4c8-minmax-xop-ld128.c",
1906 "src/qs8-gemm/gen/1x4c8-xw-minmax-xop.c",
1907 "src/qs8-gemm/gen/2x4c8-minmax-xop-ld64.c",
1908 "src/qs8-gemm/gen/2x4c8-minmax-xop-ld128.c",
1909 "src/qs8-gemm/gen/2x4c8-xw-minmax-xop.c",
1910 "src/qs8-gemm/gen/3x4c8-minmax-xop-ld64.c",
1911 "src/qs8-gemm/gen/3x4c8-minmax-xop-ld128.c",
1912 "src/qs8-gemm/gen/3x4c8-xw-minmax-xop.c",
1913 "src/qs8-gemm/gen/4x4c2-minmax-xop-ld64.c",
1914 "src/qs8-gemm/gen/4x4c2-minmax-xop-ld128.c",
1915 "src/qs8-gemm/gen/4x4c2-xw-minmax-xop.c",
1916 "src/qs8-igemm/gen/1x4c2-minmax-xop-ld64.c",
1917 "src/qs8-igemm/gen/1x4c2-minmax-xop-ld128.c",
1918 "src/qs8-igemm/gen/1x4c8-minmax-xop-ld64.c",
1919 "src/qs8-igemm/gen/1x4c8-minmax-xop-ld128.c",
1920 "src/qs8-igemm/gen/2x4c8-minmax-xop-ld64.c",
1921 "src/qs8-igemm/gen/2x4c8-minmax-xop-ld128.c",
1922 "src/qs8-igemm/gen/3x4c8-minmax-xop-ld64.c",
1923 "src/qs8-igemm/gen/3x4c8-minmax-xop-ld128.c",
1924 "src/qs8-igemm/gen/4x4c2-minmax-xop-ld64.c",
1925 "src/qs8-igemm/gen/4x4c2-minmax-xop-ld128.c",
1926 "src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c",
1927 "src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c",
1928 "src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c",
1929 "src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c",
1930 "src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c",
1931 "src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c",
1932 "src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c",
1933 "src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c",
Miao Wange9993472020-02-10 15:00:10 -08001934]
1935
1936FMA3_UKERNELS = [
Miao Wang400e4042020-04-17 10:15:59 -07001937 "src/f32-dwconv/gen/up8x4-minmax-fma3-acc2.c",
1938 "src/f32-dwconv/gen/up8x4-minmax-fma3.c",
Miao Wang400e4042020-04-17 10:15:59 -07001939 "src/f32-dwconv/gen/up8x9-minmax-fma3-acc2.c",
1940 "src/f32-dwconv/gen/up8x9-minmax-fma3.c",
Miao Wang400e4042020-04-17 10:15:59 -07001941 "src/f32-dwconv/gen/up8x25-minmax-fma3-acc2.c",
1942 "src/f32-dwconv/gen/up8x25-minmax-fma3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001943 "src/f32-dwconv/gen/up16x4-minmax-fma3-acc2.c",
1944 "src/f32-dwconv/gen/up16x4-minmax-fma3.c",
1945 "src/f32-dwconv/gen/up16x9-minmax-fma3-acc2.c",
1946 "src/f32-dwconv/gen/up16x9-minmax-fma3.c",
1947 "src/f32-dwconv/gen/up16x25-minmax-fma3-acc2.c",
1948 "src/f32-dwconv/gen/up16x25-minmax-fma3.c",
Miao Wang400e4042020-04-17 10:15:59 -07001949 "src/f32-gemm/gen-inc/1x8inc-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001950 "src/f32-gemm/gen-inc/1x16inc-minmax-fma3-broadcast.c",
1951 "src/f32-gemm/gen-inc/1x16s4inc-minmax-fma3-broadcast.c",
1952 "src/f32-gemm/gen-inc/3x16inc-minmax-fma3-broadcast.c",
1953 "src/f32-gemm/gen-inc/3x16s4inc-minmax-fma3-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001954 "src/f32-gemm/gen-inc/4x8inc-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001955 "src/f32-gemm/gen-inc/4x16inc-minmax-fma3-broadcast.c",
1956 "src/f32-gemm/gen-inc/4x16s4inc-minmax-fma3-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001957 "src/f32-gemm/gen-inc/5x8inc-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001958 "src/f32-gemm/gen-inc/5x16inc-minmax-fma3-broadcast.c",
1959 "src/f32-gemm/gen-inc/5x16s4inc-minmax-fma3-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001960 "src/f32-gemm/gen-inc/6x8inc-minmax-fma3-broadcast.c",
1961 "src/f32-gemm/gen-inc/7x8inc-minmax-fma3-broadcast.c",
1962 "src/f32-gemm/gen-inc/8x8inc-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001963 "src/f32-gemm/gen/1x8-minmax-fma3-broadcast.c",
1964 "src/f32-gemm/gen/1x16-minmax-fma3-broadcast.c",
1965 "src/f32-gemm/gen/1x16s4-minmax-fma3-broadcast.c",
1966 "src/f32-gemm/gen/3x16-minmax-fma3-broadcast.c",
1967 "src/f32-gemm/gen/3x16s4-minmax-fma3-broadcast.c",
1968 "src/f32-gemm/gen/4x8-minmax-fma3-broadcast.c",
1969 "src/f32-gemm/gen/4x16-minmax-fma3-broadcast.c",
1970 "src/f32-gemm/gen/4x16s4-minmax-fma3-broadcast.c",
1971 "src/f32-gemm/gen/5x8-minmax-fma3-broadcast.c",
1972 "src/f32-gemm/gen/5x16-minmax-fma3-broadcast.c",
1973 "src/f32-gemm/gen/5x16s4-minmax-fma3-broadcast.c",
1974 "src/f32-gemm/gen/6x8-minmax-fma3-broadcast.c",
1975 "src/f32-gemm/gen/7x8-minmax-fma3-broadcast.c",
1976 "src/f32-gemm/gen/8x8-minmax-fma3-broadcast.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001977 "src/f32-hswish/gen/hswish-fma3-x8.c",
1978 "src/f32-hswish/gen/hswish-fma3-x16.c",
Miao Wang400e4042020-04-17 10:15:59 -07001979 "src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001980 "src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c",
1981 "src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c",
1982 "src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c",
1983 "src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001984 "src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001985 "src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c",
1986 "src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001987 "src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08001988 "src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c",
1989 "src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c",
Miao Wang400e4042020-04-17 10:15:59 -07001990 "src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c",
1991 "src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c",
1992 "src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07001993 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x8.c",
1994 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x16.c",
1995 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x24.c",
1996 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x32.c",
1997 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x40.c",
1998 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x48.c",
1999 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x56.c",
2000 "src/f32-vsqrt/gen/fma3-nr1fma1adj-x64.c",
2001 "src/math/sqrt-fma3-nr1fma.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002002 "src/math/sqrt-fma3-nr1fma1adj.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002003 "src/math/sqrt-fma3-nr2fma.c",
Miao Wange9993472020-02-10 15:00:10 -08002004]
2005
2006AVX2_UKERNELS = [
Miao Wange9993472020-02-10 15:00:10 -08002007 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc2.c",
2008 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002009 "src/f32-raddexpminusmax/gen/avx2-p5-x64.c",
Miao Wange9993472020-02-10 15:00:10 -08002010 "src/f32-raddexpminusmax/gen/avx2-p5-x72-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002011 "src/f32-raddexpminusmax/gen/avx2-p5-x72.c",
Miao Wange9993472020-02-10 15:00:10 -08002012 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc2.c",
2013 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002014 "src/f32-raddexpminusmax/gen/avx2-p5-x80.c",
Miao Wange9993472020-02-10 15:00:10 -08002015 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc2.c",
2016 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc3.c",
2017 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002018 "src/f32-raddexpminusmax/gen/avx2-p5-x96.c",
Miao Wange9993472020-02-10 15:00:10 -08002019 "src/f32-raddextexp/gen/avx2-p5-x64-acc2.c",
2020 "src/f32-raddextexp/gen/avx2-p5-x64-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002021 "src/f32-raddextexp/gen/avx2-p5-x64.c",
Miao Wange9993472020-02-10 15:00:10 -08002022 "src/f32-raddextexp/gen/avx2-p5-x72-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002023 "src/f32-raddextexp/gen/avx2-p5-x72.c",
Miao Wange9993472020-02-10 15:00:10 -08002024 "src/f32-raddextexp/gen/avx2-p5-x80-acc2.c",
2025 "src/f32-raddextexp/gen/avx2-p5-x80-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002026 "src/f32-raddextexp/gen/avx2-p5-x80.c",
Miao Wange9993472020-02-10 15:00:10 -08002027 "src/f32-raddextexp/gen/avx2-p5-x96-acc2.c",
2028 "src/f32-raddextexp/gen/avx2-p5-x96-acc3.c",
2029 "src/f32-raddextexp/gen/avx2-p5-x96-acc6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002030 "src/f32-raddextexp/gen/avx2-p5-x96.c",
Miao Wange9993472020-02-10 15:00:10 -08002031 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc2.c",
2032 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002033 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64.c",
Miao Wange9993472020-02-10 15:00:10 -08002034 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002035 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72.c",
Miao Wange9993472020-02-10 15:00:10 -08002036 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc2.c",
2037 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002038 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80.c",
Miao Wange9993472020-02-10 15:00:10 -08002039 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc2.c",
2040 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc3.c",
2041 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002042 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96.c",
Miao Wange9993472020-02-10 15:00:10 -08002043 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x8.c",
2044 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x16.c",
2045 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x24.c",
2046 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x32.c",
2047 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x40.c",
2048 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x48.c",
2049 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x56.c",
2050 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x64.c",
2051 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x72.c",
2052 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x80.c",
2053 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x8.c",
2054 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x16.c",
2055 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x24.c",
2056 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x32.c",
2057 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x40.c",
2058 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x48.c",
2059 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x56.c",
2060 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x64.c",
2061 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x72.c",
2062 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x80.c",
2063 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x8.c",
2064 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x16.c",
2065 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x24.c",
2066 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x32.c",
2067 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x40.c",
2068 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x48.c",
2069 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x56.c",
2070 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x64.c",
2071 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x72.c",
2072 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x80.c",
Miao Wang55abe392021-02-03 14:54:41 -08002073 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x8.c",
2074 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x16.c",
2075 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x24.c",
2076 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x32.c",
2077 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x40.c",
2078 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x48.c",
2079 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x56.c",
2080 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x64.c",
2081 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x72.c",
2082 "src/f32-velu/gen/velu-avx2-rr1-lut4-p4-perm-x80.c",
2083 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x8.c",
2084 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x16.c",
2085 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x24.c",
2086 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x32.c",
2087 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x40.c",
2088 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x48.c",
2089 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x56.c",
2090 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x64.c",
2091 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x72.c",
2092 "src/f32-velu/gen/velu-avx2-rr1-lut8-p4-perm-x80.c",
2093 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x8.c",
2094 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x16.c",
2095 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x24.c",
2096 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x32.c",
2097 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x40.c",
2098 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x48.c",
2099 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x56.c",
2100 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x64.c",
2101 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x72.c",
2102 "src/f32-velu/gen/velu-avx2-rr1-lut16-p3-gather-x80.c",
2103 "src/f32-velu/gen/velu-avx2-rr1-p6-x8.c",
2104 "src/f32-velu/gen/velu-avx2-rr1-p6-x16.c",
2105 "src/f32-velu/gen/velu-avx2-rr1-p6-x24.c",
2106 "src/f32-velu/gen/velu-avx2-rr1-p6-x32.c",
2107 "src/f32-velu/gen/velu-avx2-rr1-p6-x40.c",
2108 "src/f32-velu/gen/velu-avx2-rr1-p6-x48.c",
2109 "src/f32-velu/gen/velu-avx2-rr1-p6-x56.c",
2110 "src/f32-velu/gen/velu-avx2-rr1-p6-x64.c",
2111 "src/f32-velu/gen/velu-avx2-rr1-p6-x72.c",
2112 "src/f32-velu/gen/velu-avx2-rr1-p6-x80.c",
Miao Wange9993472020-02-10 15:00:10 -08002113 "src/f32-vscaleexpminusmax/gen/avx2-p5-x8.c",
2114 "src/f32-vscaleexpminusmax/gen/avx2-p5-x16.c",
2115 "src/f32-vscaleexpminusmax/gen/avx2-p5-x24.c",
2116 "src/f32-vscaleexpminusmax/gen/avx2-p5-x32.c",
2117 "src/f32-vscaleexpminusmax/gen/avx2-p5-x40.c",
2118 "src/f32-vscaleexpminusmax/gen/avx2-p5-x48.c",
2119 "src/f32-vscaleexpminusmax/gen/avx2-p5-x56.c",
2120 "src/f32-vscaleexpminusmax/gen/avx2-p5-x64.c",
2121 "src/f32-vscaleexpminusmax/gen/avx2-p5-x72.c",
2122 "src/f32-vscaleexpminusmax/gen/avx2-p5-x80.c",
2123 "src/f32-vscaleexpminusmax/gen/avx2-p5-x88.c",
2124 "src/f32-vscaleexpminusmax/gen/avx2-p5-x96.c",
2125 "src/f32-vscaleextexp/gen/avx2-p5-x8.c",
2126 "src/f32-vscaleextexp/gen/avx2-p5-x16.c",
2127 "src/f32-vscaleextexp/gen/avx2-p5-x24.c",
2128 "src/f32-vscaleextexp/gen/avx2-p5-x32.c",
2129 "src/f32-vscaleextexp/gen/avx2-p5-x40.c",
2130 "src/f32-vscaleextexp/gen/avx2-p5-x48.c",
2131 "src/f32-vscaleextexp/gen/avx2-p5-x56.c",
2132 "src/f32-vscaleextexp/gen/avx2-p5-x64.c",
2133 "src/f32-vscaleextexp/gen/avx2-p5-x72.c",
2134 "src/f32-vscaleextexp/gen/avx2-p5-x80.c",
2135 "src/f32-vscaleextexp/gen/avx2-p5-x88.c",
2136 "src/f32-vscaleextexp/gen/avx2-p5-x96.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002137 "src/math/exp-avx2-rr2-lut8-p3-perm.c",
2138 "src/math/exp-avx2-rr2-lut8-p4-perm.c",
2139 "src/math/exp-avx2-rr2-p5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002140 "src/math/expm1minus-avx2-rr1-lut4-p4-perm.c",
2141 "src/math/expm1minus-avx2-rr1-lut8-p4-perm.c",
2142 "src/math/expm1minus-avx2-rr1-lut16-p3-gather.c",
2143 "src/math/expm1minus-avx2-rr1-p6.c",
Miao Wang55abe392021-02-03 14:54:41 -08002144 "src/math/expminus-avx2-rr2-p5.c",
Miao Wange9993472020-02-10 15:00:10 -08002145 "src/math/extexp-avx2-p5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002146 "src/math/sigmoid-avx2-rr1-lut64-p2-gather-div.c",
2147 "src/math/sigmoid-avx2-rr1-lut64-p2-gather-nr1fma.c",
2148 "src/math/sigmoid-avx2-rr1-lut64-p2-gather-nr2fma.c",
2149 "src/math/sigmoid-avx2-rr1-lut64-p2-gather-nr2fma1adj.c",
Miao Wange9993472020-02-10 15:00:10 -08002150 "src/math/sigmoid-avx2-rr1-p5-div.c",
Miao Wange9993472020-02-10 15:00:10 -08002151 "src/math/sigmoid-avx2-rr1-p5-nr1fma.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002152 "src/math/sigmoid-avx2-rr1-p5-nr2fma.c",
2153 "src/math/sigmoid-avx2-rr2-lut64-p2-gather-div.c",
2154 "src/math/sigmoid-avx2-rr2-lut64-p2-gather-nr1fma.c",
2155 "src/math/sigmoid-avx2-rr2-lut64-p2-gather-nr2fma.c",
2156 "src/math/sigmoid-avx2-rr2-lut64-p2-gather-nr2fma1adj.c",
2157 "src/math/sigmoid-avx2-rr2-p5-div.c",
2158 "src/math/sigmoid-avx2-rr2-p5-nr1fma.c",
2159 "src/math/sigmoid-avx2-rr2-p5-nr2fma.c",
2160 "src/qs8-dwconv/gen/up8x9-minmax-avx2-mul32.c",
2161 "src/qs8-dwconv/gen/up16x9-minmax-avx2-mul16.c",
2162 "src/qs8-dwconv/gen/up16x9-minmax-avx2-mul32.c",
2163 "src/qs8-dwconv/gen/up24x9-minmax-avx2-mul32.c",
2164 "src/qs8-dwconv/gen/up32x9-minmax-avx2-mul16.c",
2165 "src/qs8-dwconv/gen/up32x9-minmax-avx2-mul32.c",
2166 "src/qs8-gemm/gen/1x8c8-minmax-avx2.c",
2167 "src/qs8-gemm/gen/1x8c8-xw-minmax-avx2.c",
2168 "src/qs8-gemm/gen/2x8c8-minmax-avx2.c",
2169 "src/qs8-gemm/gen/2x8c8-xw-minmax-avx2.c",
2170 "src/qs8-gemm/gen/3x8c8-minmax-avx2.c",
2171 "src/qs8-gemm/gen/3x8c8-xw-minmax-avx2.c",
2172 "src/qs8-igemm/gen/1x8c8-minmax-avx2.c",
2173 "src/qs8-igemm/gen/2x8c8-minmax-avx2.c",
2174 "src/qs8-igemm/gen/3x8c8-minmax-avx2.c",
2175 "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x8.c",
2176 "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x16.c",
2177 "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x24.c",
2178 "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x32.c",
2179 "src/qs8-vaddc/gen/minmax-avx2-mul32-ld64-x8.c",
2180 "src/qs8-vaddc/gen/minmax-avx2-mul32-ld64-x16.c",
2181 "src/qs8-vaddc/gen/minmax-avx2-mul32-ld64-x24.c",
2182 "src/qs8-vaddc/gen/minmax-avx2-mul32-ld64-x32.c",
Miao Wange9993472020-02-10 15:00:10 -08002183]
2184
2185AVX512F_UKERNELS = [
Miao Wang2534c2f2020-03-16 11:58:04 -07002186 "src/f32-clamp/gen/avx512f-x16.c",
2187 "src/f32-clamp/gen/avx512f-x32.c",
Miao Wang400e4042020-04-17 10:15:59 -07002188 "src/f32-dwconv/gen/up16x4-minmax-avx512f-acc2.c",
2189 "src/f32-dwconv/gen/up16x4-minmax-avx512f.c",
Miao Wang400e4042020-04-17 10:15:59 -07002190 "src/f32-dwconv/gen/up16x9-minmax-avx512f-acc2.c",
2191 "src/f32-dwconv/gen/up16x9-minmax-avx512f.c",
Miao Wang400e4042020-04-17 10:15:59 -07002192 "src/f32-dwconv/gen/up16x25-minmax-avx512f-acc2.c",
2193 "src/f32-dwconv/gen/up16x25-minmax-avx512f.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002194 "src/f32-dwconv/gen/up32x4-minmax-avx512f-acc2.c",
2195 "src/f32-dwconv/gen/up32x4-minmax-avx512f.c",
2196 "src/f32-dwconv/gen/up32x9-minmax-avx512f-acc2.c",
2197 "src/f32-dwconv/gen/up32x9-minmax-avx512f.c",
2198 "src/f32-dwconv/gen/up32x25-minmax-avx512f-acc2.c",
2199 "src/f32-dwconv/gen/up32x25-minmax-avx512f.c",
Miao Wang400e4042020-04-17 10:15:59 -07002200 "src/f32-gemm/gen-inc/1x16inc-minmax-avx512f-broadcast.c",
2201 "src/f32-gemm/gen-inc/4x16inc-minmax-avx512f-broadcast.c",
2202 "src/f32-gemm/gen-inc/5x16inc-minmax-avx512f-broadcast.c",
2203 "src/f32-gemm/gen-inc/6x16inc-minmax-avx512f-broadcast.c",
2204 "src/f32-gemm/gen-inc/7x16inc-minmax-avx512f-broadcast.c",
2205 "src/f32-gemm/gen-inc/8x16inc-minmax-avx512f-broadcast.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002206 "src/f32-gemm/gen/1x16-minmax-avx512f-broadcast.c",
2207 "src/f32-gemm/gen/4x16-minmax-avx512f-broadcast.c",
2208 "src/f32-gemm/gen/5x16-minmax-avx512f-broadcast.c",
2209 "src/f32-gemm/gen/6x16-minmax-avx512f-broadcast.c",
2210 "src/f32-gemm/gen/7x16-minmax-avx512f-broadcast.c",
2211 "src/f32-gemm/gen/8x16-minmax-avx512f-broadcast.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002212 "src/f32-hswish/gen/hswish-avx512f-x16.c",
2213 "src/f32-hswish/gen/hswish-avx512f-x32.c",
Miao Wang400e4042020-04-17 10:15:59 -07002214 "src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c",
2215 "src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c",
2216 "src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c",
2217 "src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c",
2218 "src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c",
2219 "src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c",
Miao Wang2534c2f2020-03-16 11:58:04 -07002220 "src/f32-prelu/gen/avx512f-2x16.c",
2221 "src/f32-prelu/gen/avx512f-2x32.c",
Miao Wange9993472020-02-10 15:00:10 -08002222 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
2223 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002224 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128.c",
Miao Wange9993472020-02-10 15:00:10 -08002225 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002226 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144.c",
Miao Wange9993472020-02-10 15:00:10 -08002227 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
2228 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002229 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160.c",
Miao Wange9993472020-02-10 15:00:10 -08002230 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
2231 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
2232 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002233 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192.c",
Miao Wange9993472020-02-10 15:00:10 -08002234 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc2.c",
2235 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002236 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128.c",
Miao Wange9993472020-02-10 15:00:10 -08002237 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002238 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144.c",
Miao Wange9993472020-02-10 15:00:10 -08002239 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc2.c",
2240 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002241 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160.c",
Miao Wange9993472020-02-10 15:00:10 -08002242 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc2.c",
2243 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc3.c",
2244 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002245 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192.c",
Miao Wange9993472020-02-10 15:00:10 -08002246 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
2247 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002248 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128.c",
Miao Wange9993472020-02-10 15:00:10 -08002249 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002250 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144.c",
Miao Wange9993472020-02-10 15:00:10 -08002251 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
2252 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002253 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160.c",
Miao Wange9993472020-02-10 15:00:10 -08002254 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
2255 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
2256 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002257 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002258 "src/f32-relu/gen/avx512f-x16.c",
2259 "src/f32-relu/gen/avx512f-x32.c",
Miao Wange9993472020-02-10 15:00:10 -08002260 "src/f32-rmax/avx512f.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002261 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x16.c",
2262 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x32.c",
2263 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x48.c",
2264 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x64.c",
2265 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x80.c",
2266 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x96.c",
2267 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x112.c",
2268 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-div-x128.c",
2269 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x16.c",
2270 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c",
2271 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c",
2272 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c",
2273 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c",
2274 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c",
2275 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c",
2276 "src/f32-sigmoid/gen/avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c",
2277 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x16.c",
2278 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x32.c",
2279 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x48.c",
2280 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x64.c",
2281 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x80.c",
2282 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x96.c",
2283 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x112.c",
2284 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-div-x128.c",
2285 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x16.c",
2286 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x32.c",
2287 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x48.c",
2288 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x64.c",
2289 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x80.c",
2290 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x96.c",
2291 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x112.c",
2292 "src/f32-sigmoid/gen/avx512f-rr1-p5-scalef-nr1fma-x128.c",
2293 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x16.c",
2294 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x32.c",
2295 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x48.c",
2296 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x64.c",
2297 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x80.c",
2298 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x96.c",
2299 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c",
2300 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c",
2301 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x16.c",
2302 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c",
2303 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c",
2304 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c",
2305 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c",
2306 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c",
2307 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c",
2308 "src/f32-sigmoid/gen/avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c",
Miao Wang400e4042020-04-17 10:15:59 -07002309 "src/f32-vbinary/gen/vadd-minmax-avx512f-x16.c",
2310 "src/f32-vbinary/gen/vadd-minmax-avx512f-x32.c",
2311 "src/f32-vbinary/gen/vaddc-minmax-avx512f-x16.c",
2312 "src/f32-vbinary/gen/vaddc-minmax-avx512f-x32.c",
2313 "src/f32-vbinary/gen/vdiv-minmax-avx512f-x16.c",
2314 "src/f32-vbinary/gen/vdiv-minmax-avx512f-x32.c",
2315 "src/f32-vbinary/gen/vdivc-minmax-avx512f-x16.c",
2316 "src/f32-vbinary/gen/vdivc-minmax-avx512f-x32.c",
Miao Wange9993472020-02-10 15:00:10 -08002317 "src/f32-vbinary/gen/vmax-avx512f-x16.c",
2318 "src/f32-vbinary/gen/vmax-avx512f-x32.c",
2319 "src/f32-vbinary/gen/vmaxc-avx512f-x16.c",
2320 "src/f32-vbinary/gen/vmaxc-avx512f-x32.c",
2321 "src/f32-vbinary/gen/vmin-avx512f-x16.c",
2322 "src/f32-vbinary/gen/vmin-avx512f-x32.c",
2323 "src/f32-vbinary/gen/vminc-avx512f-x16.c",
2324 "src/f32-vbinary/gen/vminc-avx512f-x32.c",
Miao Wang400e4042020-04-17 10:15:59 -07002325 "src/f32-vbinary/gen/vmul-minmax-avx512f-x16.c",
2326 "src/f32-vbinary/gen/vmul-minmax-avx512f-x32.c",
2327 "src/f32-vbinary/gen/vmulc-minmax-avx512f-x16.c",
2328 "src/f32-vbinary/gen/vmulc-minmax-avx512f-x32.c",
2329 "src/f32-vbinary/gen/vrdivc-minmax-avx512f-x16.c",
2330 "src/f32-vbinary/gen/vrdivc-minmax-avx512f-x32.c",
2331 "src/f32-vbinary/gen/vrsubc-minmax-avx512f-x16.c",
2332 "src/f32-vbinary/gen/vrsubc-minmax-avx512f-x32.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002333 "src/f32-vbinary/gen/vsqrdiff-avx512f-x16.c",
2334 "src/f32-vbinary/gen/vsqrdiff-avx512f-x32.c",
2335 "src/f32-vbinary/gen/vsqrdiffc-avx512f-x16.c",
2336 "src/f32-vbinary/gen/vsqrdiffc-avx512f-x32.c",
Miao Wang400e4042020-04-17 10:15:59 -07002337 "src/f32-vbinary/gen/vsub-minmax-avx512f-x16.c",
2338 "src/f32-vbinary/gen/vsub-minmax-avx512f-x32.c",
2339 "src/f32-vbinary/gen/vsubc-minmax-avx512f-x16.c",
2340 "src/f32-vbinary/gen/vsubc-minmax-avx512f-x32.c",
Miao Wang55abe392021-02-03 14:54:41 -08002341 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x16.c",
2342 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x32.c",
2343 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x48.c",
2344 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x64.c",
2345 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x80.c",
2346 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x96.c",
2347 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x112.c",
2348 "src/f32-velu/gen/velu-avx512f-rr1-lut16-p3-perm-x128.c",
2349 "src/f32-velu/gen/velu-avx512f-rr1-p6-x16.c",
2350 "src/f32-velu/gen/velu-avx512f-rr1-p6-x32.c",
2351 "src/f32-velu/gen/velu-avx512f-rr1-p6-x48.c",
2352 "src/f32-velu/gen/velu-avx512f-rr1-p6-x64.c",
2353 "src/f32-velu/gen/velu-avx512f-rr1-p6-x80.c",
2354 "src/f32-velu/gen/velu-avx512f-rr1-p6-x96.c",
2355 "src/f32-velu/gen/velu-avx512f-rr1-p6-x112.c",
2356 "src/f32-velu/gen/velu-avx512f-rr1-p6-x128.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002357 "src/f32-vlrelu/gen/vlrelu-avx512f-x16.c",
2358 "src/f32-vlrelu/gen/vlrelu-avx512f-x32.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002359 "src/f32-vrnd/gen/vrndd-avx512f-x16.c",
2360 "src/f32-vrnd/gen/vrndd-avx512f-x32.c",
2361 "src/f32-vrnd/gen/vrndne-avx512f-x16.c",
2362 "src/f32-vrnd/gen/vrndne-avx512f-x32.c",
2363 "src/f32-vrnd/gen/vrndu-avx512f-x16.c",
2364 "src/f32-vrnd/gen/vrndu-avx512f-x32.c",
2365 "src/f32-vrnd/gen/vrndz-avx512f-x16.c",
2366 "src/f32-vrnd/gen/vrndz-avx512f-x32.c",
2367 "src/f32-vscale/avx512f-x64.c",
Miao Wange9993472020-02-10 15:00:10 -08002368 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x16.c",
2369 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x32.c",
2370 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x48.c",
2371 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x64.c",
2372 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x80.c",
2373 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x96.c",
2374 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x112.c",
2375 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x128.c",
2376 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x144.c",
2377 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x160.c",
2378 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x176.c",
2379 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x192.c",
2380 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x16.c",
2381 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x32.c",
2382 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x48.c",
2383 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x64.c",
2384 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x80.c",
2385 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x96.c",
2386 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x112.c",
2387 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x128.c",
2388 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x144.c",
2389 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x160.c",
2390 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x176.c",
2391 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x192.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002392 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x16.c",
2393 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x32.c",
2394 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x48.c",
2395 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x64.c",
2396 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x80.c",
2397 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x96.c",
2398 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x112.c",
2399 "src/f32-vsqrt/gen/avx512f-nr1fma1adj-x128.c",
2400 "src/f32-vunary/gen/vabs-avx512f-x16.c",
2401 "src/f32-vunary/gen/vabs-avx512f-x32.c",
2402 "src/f32-vunary/gen/vneg-avx512f-x16.c",
2403 "src/f32-vunary/gen/vneg-avx512f-x32.c",
2404 "src/f32-vunary/gen/vsqr-avx512f-x16.c",
2405 "src/f32-vunary/gen/vsqr-avx512f-x32.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002406 "src/math/exp-avx512f-rr2-lut16-p3-perm-scalef.c",
2407 "src/math/exp-avx512f-rr2-lut16-p3-perm.c",
2408 "src/math/exp-avx512f-rr2-lut32-p2-perm2-scalef.c",
2409 "src/math/exp-avx512f-rr2-lut32-p2-perm2.c",
2410 "src/math/exp-avx512f-rr2-p5-scalef.c",
2411 "src/math/exp-avx512f-rr2-p5.c",
2412 "src/math/expm1minus-avx512f-rr1-lut16-p3-perm.c",
2413 "src/math/expm1minus-avx512f-rr1-p6.c",
Miao Wange9993472020-02-10 15:00:10 -08002414 "src/math/extexp-avx512f-p5.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002415 "src/math/sigmoid-avx512f-rr1-lut16-p3-perm-scalef-div.c",
2416 "src/math/sigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma.c",
2417 "src/math/sigmoid-avx512f-rr1-lut16-p3-perm-scalef-nr1fma1adj.c",
2418 "src/math/sigmoid-avx512f-rr1-lut32-p2-perm2-scalef-div.c",
2419 "src/math/sigmoid-avx512f-rr1-lut32-p2-perm2-scalef-nr1fma.c",
2420 "src/math/sigmoid-avx512f-rr1-lut32-p2-perm2-scalef-nr1fma1adj.c",
2421 "src/math/sigmoid-avx512f-rr1-lut64-p2-gather-scalef-div.c",
2422 "src/math/sigmoid-avx512f-rr1-lut64-p2-gather-scalef-nr1fma.c",
2423 "src/math/sigmoid-avx512f-rr1-lut64-p2-gather-scalef-nr1fma1adj.c",
2424 "src/math/sigmoid-avx512f-rr1-p5-scalef-div.c",
2425 "src/math/sigmoid-avx512f-rr1-p5-scalef-nr1fma.c",
2426 "src/math/sigmoid-avx512f-rr1-p5-scalef-nr1fma1adj.c",
2427 "src/math/sigmoid-avx512f-rr2-lut16-p3-perm-scalef-div.c",
2428 "src/math/sigmoid-avx512f-rr2-lut16-p3-perm-scalef-nr1fma.c",
2429 "src/math/sigmoid-avx512f-rr2-lut16-p3-perm-scalef-nr1fma1adj.c",
2430 "src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div.c",
2431 "src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma.c",
2432 "src/math/sigmoid-avx512f-rr2-lut32-p2-perm2-scalef-nr1fma1adj.c",
2433 "src/math/sigmoid-avx512f-rr2-lut64-p2-gather-scalef-div.c",
2434 "src/math/sigmoid-avx512f-rr2-lut64-p2-gather-scalef-nr1fma.c",
2435 "src/math/sigmoid-avx512f-rr2-lut64-p2-gather-scalef-nr1fma1adj.c",
2436 "src/math/sigmoid-avx512f-rr2-p5-scalef-div.c",
2437 "src/math/sigmoid-avx512f-rr2-p5-scalef-nr1fma.c",
2438 "src/math/sigmoid-avx512f-rr2-p5-scalef-nr1fma1adj.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002439 "src/math/sqrt-avx512f-nr1fma.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002440 "src/math/sqrt-avx512f-nr1fma1adj.c",
Miao Wang5eea8312020-12-07 09:12:40 -08002441 "src/math/sqrt-avx512f-nr2fma.c",
2442]
2443
2444AVX512SKX_UKERNELS = [
2445 "src/qs8-dwconv/gen/up16x9-minmax-avx512skx-mul32.c",
2446 "src/qs8-dwconv/gen/up32x9-minmax-avx512skx-mul32.c",
2447 "src/qs8-gemm/gen/1x16c8-minmax-avx512skx.c",
2448 "src/qs8-gemm/gen/2x16c8-minmax-avx512skx.c",
2449 "src/qs8-gemm/gen/3x16c8-minmax-avx512skx.c",
2450 "src/qs8-gemm/gen/4x16c8-minmax-avx512skx.c",
2451 "src/qs8-igemm/gen/1x16c8-minmax-avx512skx.c",
2452 "src/qs8-igemm/gen/2x16c8-minmax-avx512skx.c",
2453 "src/qs8-igemm/gen/3x16c8-minmax-avx512skx.c",
2454 "src/qs8-igemm/gen/4x16c8-minmax-avx512skx.c",
Miao Wange9993472020-02-10 15:00:10 -08002455]
2456
2457AARCH32_ASM_UKERNELS = [
Miao Wang86f5fbe2020-07-24 11:16:10 -07002458 "src/f32-gemm/4x4-aarch32-vfp-ld64.S",
2459 "src/f32-gemm/4x4-minmax-aarch32-vfp-ld64.S",
Miao Wang400e4042020-04-17 10:15:59 -07002460 "src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a53.S",
2461 "src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a55.S",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002462 "src/f32-gemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S",
Miao Wang400e4042020-04-17 10:15:59 -07002463 "src/f32-gemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002464 "src/f32-gemm/gen/4x8-minmax-aarch32-neon-ld64.S",
Miao Wang400e4042020-04-17 10:15:59 -07002465 "src/f32-gemm/gen/4x8-minmax-aarch32-neon-pld-cortex-a75.S",
Miao Wang400e4042020-04-17 10:15:59 -07002466 "src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a53.S",
2467 "src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a55.S",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002468 "src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S",
2469 "src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S",
2470 "src/f32-igemm/gen/4x8-minmax-aarch32-neon-ld64.S",
2471 "src/f32-igemm/gen/4x8-minmax-aarch32-neon-pld-cortex-a75.S",
Miao Wange9993472020-02-10 15:00:10 -08002472]
2473
2474AARCH64_ASM_UKERNELS = [
Miao Wangc0aa11a2020-06-10 13:41:26 -07002475 "src/f16-gemm/gen-inc/1x8inc-minmax-aarch64-neonfp16arith-ld64.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002476 "src/f16-gemm/gen-inc/1x16inc-minmax-aarch64-neonfp16arith-ld32.S",
Miao Wangc0aa11a2020-06-10 13:41:26 -07002477 "src/f16-gemm/gen-inc/4x8inc-minmax-aarch64-neonfp16arith-ld64.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002478 "src/f16-gemm/gen-inc/4x16inc-minmax-aarch64-neonfp16arith-ld32.S",
Miao Wangc0aa11a2020-06-10 13:41:26 -07002479 "src/f16-gemm/gen-inc/6x8inc-minmax-aarch64-neonfp16arith-ld64.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002480 "src/f16-gemm/gen-inc/6x16inc-minmax-aarch64-neonfp16arith-ld32.S",
Miao Wangc0aa11a2020-06-10 13:41:26 -07002481 "src/f16-gemm/gen-inc/8x8inc-minmax-aarch64-neonfp16arith-ld64.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002482 "src/f16-gemm/gen/1x8-minmax-aarch64-neonfp16arith-ld64.S",
2483 "src/f16-gemm/gen/1x16-minmax-aarch64-neonfp16arith-ld32.S",
2484 "src/f16-gemm/gen/4x8-minmax-aarch64-neonfp16arith-ld64.S",
2485 "src/f16-gemm/gen/4x16-minmax-aarch64-neonfp16arith-ld32.S",
2486 "src/f16-gemm/gen/6x8-minmax-aarch64-neonfp16arith-ld64.S",
2487 "src/f16-gemm/gen/6x16-minmax-aarch64-neonfp16arith-ld32.S",
2488 "src/f16-gemm/gen/8x8-minmax-aarch64-neonfp16arith-ld64.S",
Miao Wang400e4042020-04-17 10:15:59 -07002489 "src/f32-dwconv/up4x9-minmax-aarch64-neonfma-cortex-a55.S",
2490 "src/f32-dwconv/up4x9-minmax-aarch64-neonfma.S",
Miao Wang400e4042020-04-17 10:15:59 -07002491 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a53.S",
2492 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a57.S",
2493 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a75.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002494 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-ld64.S",
2495 "src/f32-gemm/gen-inc/1x12inc-minmax-aarch64-neonfma-cortex-a53.S",
Miao Wang400e4042020-04-17 10:15:59 -07002496 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a53.S",
2497 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a55.S",
2498 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a57.S",
2499 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a75.S",
Miao Wang400e4042020-04-17 10:15:59 -07002500 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-ld64.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002501 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-ld128.S",
2502 "src/f32-gemm/gen-inc/4x12inc-minmax-aarch64-neonfma-cortex-a53.S",
Miao Wang400e4042020-04-17 10:15:59 -07002503 "src/f32-gemm/gen-inc/5x8inc-minmax-aarch64-neonfma-cortex-a57.S",
2504 "src/f32-gemm/gen-inc/5x8inc-minmax-aarch64-neonfma-cortex-a75.S",
2505 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a53.S",
2506 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a55.S",
Miao Wang400e4042020-04-17 10:15:59 -07002507 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a57.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002508 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a73.S",
Miao Wang400e4042020-04-17 10:15:59 -07002509 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a75.S",
Miao Wang400e4042020-04-17 10:15:59 -07002510 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ld64.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002511 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ld128.S",
2512 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a53.S",
2513 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a57.S",
2514 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S",
2515 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-ld64.S",
2516 "src/f32-gemm/gen/1x12-minmax-aarch64-neonfma-cortex-a53.S",
2517 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a53.S",
2518 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a55.S",
2519 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a57.S",
2520 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S",
2521 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-ld64.S",
2522 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-ld128.S",
2523 "src/f32-gemm/gen/4x12-minmax-aarch64-neonfma-cortex-a53.S",
2524 "src/f32-gemm/gen/5x8-minmax-aarch64-neonfma-cortex-a57.S",
2525 "src/f32-gemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S",
2526 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a53.S",
2527 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a55.S",
2528 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a57.S",
2529 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a73.S",
2530 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S",
2531 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld64.S",
2532 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld128.S",
Miao Wang400e4042020-04-17 10:15:59 -07002533 "src/f32-igemm/1x8-minmax-aarch64-neonfma-cortex-a53.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002534 "src/f32-igemm/1x12-minmax-aarch64-neonfma-cortex-a53.S",
Miao Wang400e4042020-04-17 10:15:59 -07002535 "src/f32-igemm/4x8-minmax-aarch64-neonfma-cortex-a53.S",
2536 "src/f32-igemm/4x8-minmax-aarch64-neonfma-cortex-a55.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002537 "src/f32-igemm/4x12-minmax-aarch64-neonfma-cortex-a53.S",
2538 "src/f32-igemm/6x8-minmax-aarch64-neonfma-cortex-a53.S",
2539 "src/f32-igemm/6x8-minmax-aarch64-neonfma-cortex-a55.S",
2540 "src/f32-igemm/6x8-minmax-aarch64-neonfma-cortex-a73.S",
2541 "src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a57.S",
2542 "src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S",
Miao Wang400e4042020-04-17 10:15:59 -07002543 "src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a57.S",
2544 "src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S",
2545 "src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a57.S",
2546 "src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S",
Miao Wang400e4042020-04-17 10:15:59 -07002547 "src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a57.S",
2548 "src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S",
Miao Wang5eea8312020-12-07 09:12:40 -08002549 "src/qs8-gemm/1x16c4-aarch64-neondot-ld64.S",
2550 "src/qs8-gemm/4x16c4-aarch64-neondot-cortex-a55.S",
2551 "src/qs8-gemm/4x16c4-aarch64-neondot-ld64.S",
Miao Wange9993472020-02-10 15:00:10 -08002552]
2553
2554cc_defaults {
2555 name: "xnnpack_internal_default",
2556 vendor_available: true,
2557 sdk_version: "current",
2558 local_include_dirs: [
2559 "include",
2560 "src",
2561 ],
2562 cflags: [
2563 "-std=c99",
2564 "-DXNN_LOG_LEVEL=2",
2565 "-Wno-unused-parameter",
2566 "-Wno-missing-field-initializers",
2567 "-Wno-pointer-arith",
Miao Wangef75bc92020-07-25 09:15:12 -07002568 "-Wno-implicit-function-declaration",
Miao Wange9993472020-02-10 15:00:10 -08002569 ],
2570 stl: "libc++_static",
2571}
2572
2573cc_library_static {
2574 name: "xnnpack_tables",
2575 defaults: ["xnnpack_internal_default"],
2576 srcs: TABLE_SRCS,
2577}
2578
2579cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07002580 name: "xnnpack_logging_utils",
2581 defaults: ["xnnpack_internal_default"],
2582 srcs: LOGGING_SRCS,
2583 header_libs: [
2584 "fp16_headers",
2585 ],
2586 static_libs: [
2587 "libclog",
2588 "libpthreadpool",
2589 ],
2590}
2591
2592cc_library_static {
2593 name: "xnnpack_memory_planner",
2594 defaults: ["xnnpack_internal_default"],
2595 srcs: [
2596 "src/memory-planner.c",
2597 ],
2598 cflags: [
2599 "-DXNN_ENABLE_MEMOPT=1",
2600 ],
2601 header_libs: [
2602 "fp16_headers",
2603 ],
2604 static_libs: [
2605 "libpthreadpool",
2606 "xnnpack_logging_utils",
2607 ],
2608}
2609
2610cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002611 name: "xnnpack_im2col",
2612 defaults: ["xnnpack_internal_default"],
2613 srcs: [
2614 "src/im2col.c",
2615 ],
2616}
2617
2618cc_library_static {
2619 name: "xnnpack_indirection",
2620 defaults: ["xnnpack_internal_default"],
2621 srcs: [
2622 "src/indirection.c",
2623 ],
2624 header_libs: [
2625 "fp16_headers",
2626 "fxdiv_headers",
2627 ],
2628 static_libs: [
2629 "libpthreadpool",
2630 ],
2631}
2632
2633cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07002634 name: "xnnpack_packing",
2635 defaults: ["xnnpack_internal_default"],
2636 srcs: [
2637 "src/packing.c",
2638 ],
2639 header_libs: [
2640 "fp16_headers",
2641 "fxdiv_headers",
2642 ],
2643 static_libs: [
2644 "libpthreadpool",
2645 ],
2646}
2647
2648cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002649 name: "xnnpack_operator_run",
2650 defaults: ["xnnpack_internal_default"],
2651 srcs: [
2652 "src/operator-run.c",
2653 ],
2654 cflags: [
2655 "-Wno-vla",
2656 ],
2657 header_libs: [
2658 "fp16_headers",
2659 "fxdiv_headers",
2660 ],
2661 static_libs: [
2662 "libclog",
2663 "libpthreadpool",
2664 ],
2665}
2666
2667cc_library_static {
2668 name: "xnnpack_operators",
2669 defaults: ["xnnpack_internal_default"],
2670 srcs: OPERATOR_SRCS + [
2671 "src/memory.c",
2672 "src/operator-delete.c",
2673 ],
2674 header_libs: [
2675 "fp16_headers",
2676 "fxdiv_headers",
2677 ],
2678 static_libs: [
2679 "libclog",
2680 "libpthreadpool",
Miao Wang86f5fbe2020-07-24 11:16:10 -07002681 "xnnpack_logging_utils",
2682 "xnnpack_packing",
Miao Wange9993472020-02-10 15:00:10 -08002683 ],
2684 whole_static_libs: [
2685 "xnnpack_indirection",
2686 ],
2687}
2688
2689cc_library_static {
2690 name: "xnnpack_scalar_ukernels",
2691 defaults: ["xnnpack_internal_default"],
2692 srcs: SCALAR_UKERNELS,
2693 header_libs: [
2694 "fp16_headers",
2695 "fxdiv_headers",
2696 ],
2697 static_libs: [
2698 "libpthreadpool",
2699 "xnnpack_tables",
2700 ],
2701}
2702
2703cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002704 name: "xnnpack_neon_ukernels",
2705 defaults: ["xnnpack_internal_default"],
2706 arch: {
2707 arm: {
2708 srcs: NEON_UKERNELS,
2709 cflags: [
2710 "-marm",
2711 "-mfpu=neon",
2712 ],
2713 },
2714 arm64: {
2715 srcs: NEON_UKERNELS,
2716 },
2717 x86: { enabled: false, },
2718 x86_64: { enabled: false, },
2719 },
2720 header_libs: [
2721 "fp16_headers",
2722 ],
2723 static_libs: [
2724 "libpthreadpool",
2725 "xnnpack_tables",
2726 ],
2727}
2728
2729cc_library_static {
Miao Wang5eea8312020-12-07 09:12:40 -08002730 name: "xnnpack_neondot_ukernels",
2731 defaults: ["xnnpack_internal_default"],
2732 arch: {
2733 arm: {
2734 srcs: NEONDOT_UKERNELS,
2735 cflags: [
2736 "-marm",
2737 "-march=armv8.2-a+dotprod",
2738 "-mfpu=neon-fp-armv8",
2739 ],
2740 },
2741 arm64: {
2742 srcs: NEONDOT_UKERNELS,
2743 cflags: [
2744 "-march=armv8.2-a+dotprod",
2745 ],
2746 },
2747 x86: { enabled: false, },
2748 x86_64: { enabled: false, },
2749 },
2750 header_libs: [
2751 "fp16_headers",
2752 ],
2753 static_libs: [
2754 "libpthreadpool",
2755 "xnnpack_tables",
2756 ],
2757}
2758
2759cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002760 name: "xnnpack_neonfma_ukernels",
2761 defaults: ["xnnpack_internal_default"],
2762 arch: {
2763 arm: {
2764 srcs: NEONFMA_UKERNELS,
2765 cflags: [
2766 "-marm",
Miao Wangc0aa11a2020-06-10 13:41:26 -07002767 "-march=armv7-a",
Miao Wange9993472020-02-10 15:00:10 -08002768 "-mfpu=neon-vfpv4",
2769 ],
2770 },
2771 arm64: {
2772 srcs: NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
2773 },
2774 x86: { enabled: false, },
2775 x86_64: { enabled: false, },
2776 },
2777 header_libs: [
2778 "fp16_headers",
2779 ],
2780 static_libs: [
2781 "libpthreadpool",
2782 "xnnpack_tables",
2783 ],
2784}
2785
2786cc_library_static {
Miao Wangc0aa11a2020-06-10 13:41:26 -07002787 name: "xnnpack_neonv8_ukernels",
2788 defaults: ["xnnpack_internal_default"],
2789 arch: {
2790 arm: {
2791 srcs: NEONV8_UKERNELS,
2792 cflags: [
2793 "-marm",
2794 "-march=armv8-a",
2795 "-mfpu=neon-fp-armv8",
2796 ],
2797 },
2798 arm64: {
2799 srcs: NEONV8_UKERNELS,
2800 },
2801 x86: { enabled: false, },
2802 x86_64: { enabled: false, },
2803 },
2804 header_libs: [
2805 "fp16_headers",
2806 ],
2807 static_libs: [
2808 "libpthreadpool",
2809 "xnnpack_tables",
2810 ],
2811}
2812
2813cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002814 name: "xnnpack_neonfp16arith_ukernels",
2815 defaults: ["xnnpack_internal_default"],
2816 arch: {
2817 arm: { enabled: false, },
2818 arm64: {
2819 srcs: AARCH64_NEONFP16ARITH_UKERNELS,
2820 cflags: [
2821 "-march=armv8.2-a+fp16",
2822 ],
2823 },
2824 x86: { enabled: false, },
2825 x86_64: { enabled: false, },
2826 },
2827 header_libs: [
2828 "fp16_headers",
2829 ],
2830 static_libs: [
2831 "libpthreadpool",
2832 "xnnpack_tables",
2833 ],
2834}
2835
2836cc_library_static {
2837 name: "xnnpack_asm_ukernels",
2838 defaults: ["xnnpack_internal_default"],
2839 arch: {
2840 arm: {
2841 srcs: AARCH32_ASM_UKERNELS,
2842 },
2843 arm64: {
2844 srcs: AARCH64_ASM_UKERNELS,
Miao Wang400e4042020-04-17 10:15:59 -07002845 clang_asflags: [
Miao Wang5eea8312020-12-07 09:12:40 -08002846 "-march=armv8.2-a+fp16+dotprod",
Miao Wang400e4042020-04-17 10:15:59 -07002847 ],
Miao Wange9993472020-02-10 15:00:10 -08002848 },
2849 x86: { enabled: false, },
2850 x86_64: { enabled: false, },
2851 },
2852}
2853
2854cc_library_static {
2855 name: "xnnpack_sse2_ukernels",
2856 defaults: ["xnnpack_internal_default"],
2857 arch: {
2858 arm: { enabled: false, },
2859 arm64: { enabled: false, },
2860 x86: {
2861 srcs: SSE_UKERNELS + SSE2_UKERNELS,
2862 cflags: [
2863 "-msse2",
2864 ],
2865 },
2866 x86_64: {
2867 srcs: SSE_UKERNELS + SSE2_UKERNELS,
2868 cflags: [
2869 "-msse2",
2870 ],
2871 },
2872 },
2873 header_libs: [
2874 "fp16_headers",
2875 ],
2876 static_libs: [
2877 "libpthreadpool",
2878 "xnnpack_tables",
2879 ],
2880}
2881
2882cc_library_static {
Miao Wang2534c2f2020-03-16 11:58:04 -07002883 name: "xnnpack_ssse3_ukernels",
2884 defaults: ["xnnpack_internal_default"],
2885 arch: {
2886 arm: { enabled: false, },
2887 arm64: { enabled: false, },
2888 x86: {
2889 srcs: SSSE3_UKERNELS,
2890 cflags: [
2891 "-mssse3",
2892 ],
2893 },
2894 x86_64: {
2895 srcs: SSSE3_UKERNELS,
2896 cflags: [
2897 "-mssse3",
2898 ],
2899 },
2900 },
2901 header_libs: [
2902 "fp16_headers",
2903 ],
2904 static_libs: [
2905 "libpthreadpool",
2906 "xnnpack_tables",
2907 ],
2908}
2909
2910cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002911 name: "xnnpack_sse41_ukernels",
2912 defaults: ["xnnpack_internal_default"],
2913 arch: {
2914 arm: { enabled: false, },
2915 arm64: { enabled: false, },
2916 x86: {
2917 srcs: SSE41_UKERNELS,
2918 cflags: [
2919 "-msse4.1",
2920 ],
2921 },
2922 x86_64: {
2923 srcs: SSE41_UKERNELS,
2924 cflags: [
2925 "-msse4.1",
2926 ],
2927 },
2928 },
2929 header_libs: [
2930 "fp16_headers",
2931 ],
2932 static_libs: [
2933 "libpthreadpool",
2934 "xnnpack_tables",
2935 ],
2936}
2937
2938cc_library_static {
2939 name: "xnnpack_avx_ukernels",
2940 defaults: ["xnnpack_internal_default"],
2941 arch: {
2942 arm: { enabled: false, },
2943 arm64: { enabled: false, },
2944 x86: {
2945 srcs: AVX_UKERNELS,
2946 cflags: [
2947 "-mavx",
2948 ],
2949 },
2950 x86_64: {
2951 srcs: AVX_UKERNELS,
2952 cflags: [
2953 "-mavx",
2954 ],
2955 },
2956 },
2957 header_libs: [
2958 "fp16_headers",
2959 ],
2960 static_libs: [
2961 "libpthreadpool",
2962 "xnnpack_tables",
2963 ],
2964}
2965
2966cc_library_static {
Miao Wang5eea8312020-12-07 09:12:40 -08002967 name: "xnnpack_xop_ukernels",
2968 defaults: ["xnnpack_internal_default"],
2969 arch: {
2970 arm: { enabled: false, },
2971 arm64: { enabled: false, },
2972 x86: {
2973 srcs: XOP_UKERNELS,
2974 cflags: [
2975 "-mxop",
2976 ],
2977 },
2978 x86_64: {
2979 srcs: XOP_UKERNELS,
2980 cflags: [
2981 "-mxop",
2982 ],
2983 },
2984 },
2985 header_libs: [
2986 "fp16_headers",
2987 ],
2988 static_libs: [
2989 "libpthreadpool",
2990 "xnnpack_tables",
2991 ],
2992}
2993
2994cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08002995 name: "xnnpack_fma3_ukernels",
2996 defaults: ["xnnpack_internal_default"],
2997 arch: {
2998 arm: { enabled: false, },
2999 arm64: { enabled: false, },
3000 x86: {
3001 srcs: FMA3_UKERNELS,
3002 cflags: [
3003 "-mfma",
3004 ],
3005 },
3006 x86_64: {
3007 srcs: FMA3_UKERNELS,
3008 cflags: [
3009 "-mfma",
3010 ],
3011 },
3012 },
3013 header_libs: [
3014 "fp16_headers",
3015 ],
3016 static_libs: [
3017 "libpthreadpool",
3018 "xnnpack_tables",
3019 ],
3020}
3021
3022cc_library_static {
3023 name: "xnnpack_avx2_ukernels",
3024 defaults: ["xnnpack_internal_default"],
3025 arch: {
3026 arm: { enabled: false, },
3027 arm64: { enabled: false, },
3028 x86: {
3029 srcs: AVX2_UKERNELS,
3030 cflags: [
3031 "-mfma",
3032 "-mavx2",
3033 ],
3034 },
3035 x86_64: {
3036 srcs: AVX2_UKERNELS,
3037 cflags: [
3038 "-mfma",
3039 "-mavx2",
3040 ],
3041 },
3042 },
3043 header_libs: [
3044 "fp16_headers",
3045 ],
3046 static_libs: [
3047 "libpthreadpool",
3048 "xnnpack_tables",
3049 ],
3050}
3051
3052cc_library_static {
Miao Wang5eea8312020-12-07 09:12:40 -08003053 name: "xnnpack_avx512skx_ukernels",
3054 defaults: ["xnnpack_internal_default"],
3055 arch: {
3056 arm: { enabled: false, },
3057 arm64: { enabled: false, },
3058 x86: {
3059 srcs: AVX512SKX_UKERNELS,
3060 cflags: [
3061 "-mavx512f",
3062 "-mavx512cd",
3063 "-mavx512bw",
3064 "-mavx512dq",
3065 "-mavx512vl",
3066 ],
3067 },
3068 x86_64: {
3069 srcs: AVX512SKX_UKERNELS,
3070 cflags: [
3071 "-mavx512f",
3072 "-mavx512cd",
3073 "-mavx512bw",
3074 "-mavx512dq",
3075 "-mavx512vl",
3076 ],
3077 },
3078 },
3079 header_libs: [
3080 "fp16_headers",
3081 ],
3082 static_libs: [
3083 "libpthreadpool",
3084 "xnnpack_tables",
3085 ],
3086}
3087
3088cc_library_static {
Miao Wange9993472020-02-10 15:00:10 -08003089 name: "xnnpack_avx512f_ukernels",
3090 defaults: ["xnnpack_internal_default"],
3091 arch: {
3092 arm: { enabled: false, },
3093 arm64: { enabled: false, },
3094 x86: {
3095 srcs: AVX512F_UKERNELS,
3096 cflags: [
3097 "-mavx512f",
3098 ],
3099 },
3100 x86_64: {
3101 srcs: AVX512F_UKERNELS,
3102 cflags: [
3103 "-mavx512f",
3104 ],
3105 },
3106 },
3107 header_libs: [
3108 "fp16_headers",
3109 ],
3110 static_libs: [
3111 "libpthreadpool",
3112 "xnnpack_tables",
3113 ],
3114}
3115
3116cc_library_static {
3117 name: "xnnpack_ukernels",
3118 defaults: ["xnnpack_internal_default"],
3119 arch: {
3120 arm: {
3121 whole_static_libs: [
Miao Wange9993472020-02-10 15:00:10 -08003122 "xnnpack_neon_ukernels",
3123 "xnnpack_neonfma_ukernels",
Miao Wangc0aa11a2020-06-10 13:41:26 -07003124 "xnnpack_neonv8_ukernels",
Miao Wang5eea8312020-12-07 09:12:40 -08003125 "xnnpack_neondot_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003126 "xnnpack_asm_ukernels",
3127 ],
3128 },
3129 arm64: {
3130 whole_static_libs: [
Miao Wange9993472020-02-10 15:00:10 -08003131 "xnnpack_neon_ukernels",
3132 "xnnpack_neonfma_ukernels",
Miao Wangc0aa11a2020-06-10 13:41:26 -07003133 "xnnpack_neonv8_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003134 "xnnpack_neonfp16arith_ukernels",
Miao Wang5eea8312020-12-07 09:12:40 -08003135 "xnnpack_neondot_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003136 "xnnpack_asm_ukernels",
3137 ],
3138 },
3139 x86: {
3140 whole_static_libs: [
Miao Wange9993472020-02-10 15:00:10 -08003141 "xnnpack_sse2_ukernels",
Miao Wang2534c2f2020-03-16 11:58:04 -07003142 "xnnpack_ssse3_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003143 "xnnpack_sse41_ukernels",
3144 "xnnpack_avx_ukernels",
Miao Wang5eea8312020-12-07 09:12:40 -08003145 "xnnpack_xop_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003146 "xnnpack_fma3_ukernels",
3147 "xnnpack_avx2_ukernels",
3148 "xnnpack_avx512f_ukernels",
Miao Wang5eea8312020-12-07 09:12:40 -08003149 "xnnpack_avx512skx_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003150 ],
3151 },
3152 x86_64: {
3153 whole_static_libs: [
Miao Wange9993472020-02-10 15:00:10 -08003154 "xnnpack_sse2_ukernels",
Miao Wang2534c2f2020-03-16 11:58:04 -07003155 "xnnpack_ssse3_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003156 "xnnpack_sse41_ukernels",
3157 "xnnpack_avx_ukernels",
Miao Wang5eea8312020-12-07 09:12:40 -08003158 "xnnpack_xop_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003159 "xnnpack_fma3_ukernels",
3160 "xnnpack_avx2_ukernels",
3161 "xnnpack_avx512f_ukernels",
Miao Wang5eea8312020-12-07 09:12:40 -08003162 "xnnpack_avx512skx_ukernels",
Miao Wange9993472020-02-10 15:00:10 -08003163 ],
3164 },
3165 },
3166 whole_static_libs: [
3167 "xnnpack_scalar_ukernels",
3168 "xnnpack_tables",
3169 ],
3170}
3171
3172cc_library_static {
3173 name: "libXNNPACK",
3174 defaults: ["xnnpack_internal_default"],
3175 export_include_dirs: ["include"],
3176 srcs: [
3177 "src/init.c",
3178 "src/runtime.c",
3179 "src/subgraph.c",
3180 "src/tensor.c",
Miao Wang86f5fbe2020-07-24 11:16:10 -07003181 ] + SUBGRAPH_SRCS,
3182 header_libs: [
3183 "fp16_headers",
Miao Wange9993472020-02-10 15:00:10 -08003184 ],
3185 whole_static_libs: [
3186 "libclog",
3187 "libcpuinfo",
3188 "libpthreadpool",
3189 "xnnpack_ukernels",
3190 "xnnpack_operator_run",
3191 "xnnpack_operators",
Miao Wang86f5fbe2020-07-24 11:16:10 -07003192 "xnnpack_logging_utils",
3193 "xnnpack_memory_planner",
3194 "xnnpack_packing",
Miao Wange9993472020-02-10 15:00:10 -08003195 ],
3196}
3197
3198// Tests and benchmarks
3199cc_defaults {
3200 name: "xnnpack_tests_default",
3201 vendor_available: true,
3202 stl: "libc++_static",
3203 local_include_dirs: [
3204 "bench",
3205 "models",
3206 "test",
3207 "src",
3208 ],
3209 cflags: [
Miao Wang400e4042020-04-17 10:15:59 -07003210 "-Wno-unused-function",
3211 "-Wno-unused-parameter",
Miao Wang86f5fbe2020-07-24 11:16:10 -07003212 "-Wno-unused-private-field",
Miao Wange9993472020-02-10 15:00:10 -08003213 ],
3214 header_libs: [
3215 "fp16_headers",
3216 ],
3217 static_libs: [
3218 "libXNNPACK",
3219 "libpthreadpool",
3220 "libgmock",
3221 ],
3222 shared_libs: [
3223 "liblog",
3224 ],
3225}
3226
3227cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003228 name: "xnnpack_mobilenet_v1_fp32",
Miao Wange9993472020-02-10 15:00:10 -08003229 defaults: ["xnnpack_tests_default"],
3230 srcs: [
Miao Wang86f5fbe2020-07-24 11:16:10 -07003231 "models/fp32-mobilenet-v1.cc",
Miao Wange9993472020-02-10 15:00:10 -08003232 ],
3233}
3234
3235cc_library_static {
Miao Wang5eea8312020-12-07 09:12:40 -08003236 name: "xnnpack_qs8_mobilenet_v1",
3237 defaults: ["xnnpack_tests_default"],
3238 srcs: [
3239 "models/qs8-mobilenet-v1.cc",
3240 ],
3241}
3242
3243cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003244 name: "xnnpack_mobilenet_v1_fp16",
Miao Wange9993472020-02-10 15:00:10 -08003245 defaults: ["xnnpack_tests_default"],
3246 srcs: [
Miao Wang86f5fbe2020-07-24 11:16:10 -07003247 "models/fp16-mobilenet-v1.cc",
Miao Wange9993472020-02-10 15:00:10 -08003248 ],
3249}
3250
3251cc_library_static {
Miao Wang5eea8312020-12-07 09:12:40 -08003252 name: "xnnpack_qs8_mobilenet_v2",
3253 defaults: ["xnnpack_tests_default"],
3254 srcs: [
3255 "models/qs8-mobilenet-v2.cc",
3256 ],
3257}
3258
3259cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003260 name: "xnnpack_mobilenet_v2_fp32",
Miao Wange9993472020-02-10 15:00:10 -08003261 defaults: ["xnnpack_tests_default"],
3262 srcs: [
Miao Wang86f5fbe2020-07-24 11:16:10 -07003263 "models/fp32-mobilenet-v2.cc",
Miao Wange9993472020-02-10 15:00:10 -08003264 ],
3265}
3266
3267cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003268 name: "xnnpack_mobilenet_v2_fp16",
Miao Wange9993472020-02-10 15:00:10 -08003269 defaults: ["xnnpack_tests_default"],
3270 srcs: [
Miao Wang86f5fbe2020-07-24 11:16:10 -07003271 "models/fp16-mobilenet-v2.cc",
3272 ],
3273}
3274
3275
3276cc_library_static {
3277 name: "xnnpack_mobilenet_v3_large_fp32",
3278 defaults: ["xnnpack_tests_default"],
3279 srcs: [
3280 "models/fp32-mobilenet-v3-large.cc",
3281 ],
3282}
3283
3284cc_library_static {
Miao Wang5eea8312020-12-07 09:12:40 -08003285 name: "xnnpack_mobilenet_v3_large_fp16",
3286 defaults: ["xnnpack_tests_default"],
3287 srcs: [
3288 "models/fp16-mobilenet-v3-large.cc",
3289 ],
3290}
3291
3292cc_library_static {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003293 name: "xnnpack_mobilenet_v3_small_fp32",
3294 defaults: ["xnnpack_tests_default"],
3295 srcs: [
3296 "models/fp32-mobilenet-v3-small.cc",
Miao Wange9993472020-02-10 15:00:10 -08003297 ],
3298}
3299
Miao Wang5eea8312020-12-07 09:12:40 -08003300cc_library_static {
3301 name: "xnnpack_mobilenet_v3_small_fp16",
3302 defaults: ["xnnpack_tests_default"],
3303 srcs: [
3304 "models/fp16-mobilenet-v3-small.cc",
3305 ],
3306}
3307
Miao Wange9993472020-02-10 15:00:10 -08003308cc_benchmark {
3309 name: "xnnpack_end2end_bench",
3310 defaults: ["xnnpack_tests_default"],
3311 srcs: [
3312 "bench/end2end.cc",
3313 "bench/utils.cc",
3314 ],
3315 cflags: [
3316 "-Wno-unused-result"
3317 ],
3318 static_libs: [
3319 "libcpuinfo",
3320 "libgoogle-benchmark",
Miao Wang5eea8312020-12-07 09:12:40 -08003321 "xnnpack_qs8_mobilenet_v1",
Miao Wang86f5fbe2020-07-24 11:16:10 -07003322 "xnnpack_mobilenet_v1_fp32",
3323 "xnnpack_mobilenet_v1_fp16",
Miao Wang5eea8312020-12-07 09:12:40 -08003324 "xnnpack_qs8_mobilenet_v2",
Miao Wang86f5fbe2020-07-24 11:16:10 -07003325 "xnnpack_mobilenet_v2_fp32",
3326 "xnnpack_mobilenet_v2_fp16",
3327 "xnnpack_mobilenet_v3_large_fp32",
Miao Wang5eea8312020-12-07 09:12:40 -08003328 "xnnpack_mobilenet_v3_large_fp16",
Miao Wang86f5fbe2020-07-24 11:16:10 -07003329 "xnnpack_mobilenet_v3_small_fp32",
Miao Wang5eea8312020-12-07 09:12:40 -08003330 "xnnpack_mobilenet_v3_small_fp16",
Miao Wange9993472020-02-10 15:00:10 -08003331 ],
3332}
3333
3334cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003335 name: "xnnpack_abs_nc_test",
Miao Wange9993472020-02-10 15:00:10 -08003336 defaults: ["xnnpack_tests_default"],
3337 srcs: [
Miao Wang86f5fbe2020-07-24 11:16:10 -07003338 "test/abs-nc.cc",
Miao Wange9993472020-02-10 15:00:10 -08003339 ],
3340 test_suites: [
3341 "general-tests",
3342 ],
3343}
3344
3345cc_test {
3346 name: "xnnpack_add_nd_test",
3347 defaults: ["xnnpack_tests_default"],
3348 srcs: [
3349 "test/add-nd.cc",
3350 ],
3351 test_suites: [
3352 "general-tests",
3353 ],
3354}
3355
3356cc_test {
3357 name: "xnnpack_argmax_pooling_nhwc_test",
3358 defaults: ["xnnpack_tests_default"],
3359 srcs: [
3360 "test/argmax-pooling-nhwc.cc",
3361 ],
3362 test_suites: [
3363 "general-tests",
3364 ],
3365}
3366
3367cc_test {
3368 name: "xnnpack_average_pooling_nhwc_test",
3369 defaults: ["xnnpack_tests_default"],
3370 srcs: [
3371 "test/average-pooling-nhwc.cc",
3372 ],
3373 test_suites: [
3374 "general-tests",
3375 ],
3376}
3377
3378cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003379 name: "xnnpack_bankers_rounding_nc_test",
Miao Wange9993472020-02-10 15:00:10 -08003380 defaults: ["xnnpack_tests_default"],
3381 srcs: [
Miao Wang86f5fbe2020-07-24 11:16:10 -07003382 "test/bankers-rounding-nc.cc",
3383 ],
3384 test_suites: [
3385 "general-tests",
3386 ],
3387}
3388
3389cc_test {
3390 name: "xnnpack_ceiling_nc_test",
3391 defaults: ["xnnpack_tests_default"],
3392 srcs: [
3393 "test/ceiling-nc.cc",
Miao Wange9993472020-02-10 15:00:10 -08003394 ],
3395 test_suites: [
3396 "general-tests",
3397 ],
3398}
3399
3400cc_test {
3401 name: "xnnpack_channel_shuffle_nc_test",
3402 defaults: ["xnnpack_tests_default"],
3403 srcs: [
3404 "test/channel-shuffle-nc.cc",
3405 ],
3406 test_suites: [
3407 "general-tests",
3408 ],
3409}
3410
3411cc_test {
3412 name: "xnnpack_clamp_nc_test",
3413 defaults: ["xnnpack_tests_default"],
3414 srcs: [
3415 "test/clamp-nc.cc",
3416 ],
3417 test_suites: [
3418 "general-tests",
3419 ],
3420}
3421
3422cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003423 name: "xnnpack_constant_pad_nd_test",
3424 defaults: ["xnnpack_tests_default"],
3425 srcs: [
3426 "test/constant-pad-nd.cc",
3427 ],
3428 test_suites: [
3429 "general-tests",
3430 ],
3431}
3432
3433cc_test {
Miao Wange9993472020-02-10 15:00:10 -08003434 name: "xnnpack_convolution_nhwc_test",
3435 defaults: ["xnnpack_tests_default"],
3436 srcs: [
3437 "test/convolution-nhwc.cc",
3438 ],
3439 test_suites: [
3440 "general-tests",
3441 ],
3442}
3443
3444cc_test {
3445 name: "xnnpack_convolution_nchw_test",
3446 defaults: ["xnnpack_tests_default"],
3447 srcs: [
3448 "test/convolution-nchw.cc",
3449 ],
3450 test_suites: [
3451 "general-tests",
3452 ],
3453}
3454
3455cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003456 name: "xnnpack_copy_nc_test",
3457 defaults: ["xnnpack_tests_default"],
3458 srcs: [
3459 "test/copy-nc.cc",
3460 ],
3461 test_suites: [
3462 "general-tests",
3463 ],
3464}
3465
3466cc_test {
Miao Wange9993472020-02-10 15:00:10 -08003467 name: "xnnpack_deconvolution_nhwc_test",
3468 defaults: ["xnnpack_tests_default"],
3469 srcs: [
3470 "test/deconvolution-nhwc.cc",
3471 ],
3472 test_suites: [
3473 "general-tests",
3474 ],
3475}
3476
3477cc_test {
3478 name: "xnnpack_divide_nd_test",
3479 defaults: ["xnnpack_tests_default"],
3480 srcs: [
3481 "test/divide-nd.cc",
3482 ],
3483 test_suites: [
3484 "general-tests",
3485 ],
3486}
3487
3488cc_test {
3489 name: "xnnpack_fully_connected_nc_test",
3490 defaults: ["xnnpack_tests_default"],
3491 srcs: [
3492 "test/fully-connected-nc.cc",
3493 ],
3494 test_suites: [
3495 "general-tests",
3496 ],
3497}
3498
3499cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003500 name: "xnnpack_floor_nc_test",
3501 defaults: ["xnnpack_tests_default"],
3502 srcs: [
3503 "test/floor-nc.cc",
3504 ],
3505 test_suites: [
3506 "general-tests",
3507 ],
3508}
3509
3510cc_test {
Miao Wange9993472020-02-10 15:00:10 -08003511 name: "xnnpack_global_average_pooling_nwc_test",
3512 defaults: ["xnnpack_tests_default"],
3513 srcs: [
3514 "test/global-average-pooling-nwc.cc",
3515 ],
3516 test_suites: [
3517 "general-tests",
3518 ],
3519}
3520
3521cc_test {
3522 name: "xnnpack_global_average_pooling_ncw_test",
3523 defaults: ["xnnpack_tests_default"],
3524 srcs: [
3525 "test/global-average-pooling-ncw.cc",
3526 ],
3527 test_suites: [
3528 "general-tests",
3529 ],
3530}
3531
3532cc_test {
3533 name: "xnnpack_hardswish_nc_test",
3534 defaults: ["xnnpack_tests_default"],
3535 srcs: [
3536 "test/hardswish-nc.cc",
3537 ],
3538 test_suites: [
3539 "general-tests",
3540 ],
3541}
3542
3543cc_test {
3544 name: "xnnpack_leaky_relu_nc_test",
3545 defaults: ["xnnpack_tests_default"],
3546 srcs: [
3547 "test/leaky-relu-nc.cc",
3548 ],
3549 test_suites: [
3550 "general-tests",
3551 ],
3552}
3553
3554cc_test {
3555 name: "xnnpack_max_pooling_nhwc_test",
3556 defaults: ["xnnpack_tests_default"],
3557 srcs: [
3558 "test/max-pooling-nhwc.cc",
3559 ],
3560 test_suites: [
3561 "general-tests",
3562 ],
3563}
3564
3565cc_test {
3566 name: "xnnpack_maximum_nd_test",
3567 defaults: ["xnnpack_tests_default"],
3568 srcs: [
3569 "test/maximum-nd.cc",
3570 ],
3571 test_suites: [
3572 "general-tests",
3573 ],
3574}
3575
3576cc_test {
3577 name: "xnnpack_minimum_nd_test",
3578 defaults: ["xnnpack_tests_default"],
3579 srcs: [
3580 "test/minimum-nd.cc",
3581 ],
3582 test_suites: [
3583 "general-tests",
3584 ],
3585}
3586
3587cc_test {
3588 name: "xnnpack_multiply_nd_test",
3589 defaults: ["xnnpack_tests_default"],
3590 srcs: [
3591 "test/multiply-nd.cc",
3592 ],
3593 test_suites: [
3594 "general-tests",
3595 ],
3596}
3597
3598cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003599 name: "xnnpack_negate_nc_test",
3600 defaults: ["xnnpack_tests_default"],
3601 srcs: [
3602 "test/negate-nc.cc",
3603 ],
3604 test_suites: [
3605 "general-tests",
3606 ],
3607}
3608
3609cc_test {
Miao Wange9993472020-02-10 15:00:10 -08003610 name: "xnnpack_prelu_nc_test",
3611 defaults: ["xnnpack_tests_default"],
3612 srcs: [
3613 "test/prelu-nc.cc",
3614 ],
3615 test_suites: [
3616 "general-tests",
3617 ],
3618}
3619
3620cc_test {
3621 name: "xnnpack_resize_bilinear_nhwc_test",
3622 defaults: ["xnnpack_tests_default"],
3623 srcs: [
3624 "test/resize-bilinear-nhwc.cc",
3625 ],
3626 test_suites: [
3627 "general-tests",
3628 ],
3629}
3630
3631cc_test {
3632 name: "xnnpack_sigmoid_nc_test",
3633 defaults: ["xnnpack_tests_default"],
3634 srcs: [
3635 "test/sigmoid-nc.cc",
3636 ],
3637 test_suites: [
3638 "general-tests",
3639 ],
3640}
3641
3642cc_test {
3643 name: "xnnpack_softmax_nc_test",
3644 defaults: ["xnnpack_tests_default"],
3645 srcs: [
3646 "test/softmax-nc.cc",
3647 ],
3648 test_suites: [
3649 "general-tests",
3650 ],
3651}
3652
3653cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003654 name: "xnnpack_square_nc_test",
3655 defaults: ["xnnpack_tests_default"],
3656 srcs: [
3657 "test/square-nc.cc",
3658 ],
3659 test_suites: [
3660 "general-tests",
3661 ],
3662}
3663
3664cc_test {
3665 name: "xnnpack_square_root_nc_test",
3666 defaults: ["xnnpack_tests_default"],
3667 srcs: [
3668 "test/square-root-nc.cc",
3669 ],
3670 test_suites: [
3671 "general-tests",
3672 ],
3673}
3674
3675cc_test {
3676 name: "xnnpack_square_difference_nd_test",
3677 defaults: ["xnnpack_tests_default"],
3678 srcs: [
3679 "test/squared-difference-nd.cc",
3680 ],
3681 test_suites: [
3682 "general-tests",
3683 ],
3684}
3685
3686cc_test {
Miao Wange9993472020-02-10 15:00:10 -08003687 name: "xnnpack_subtract_nd_test",
3688 defaults: ["xnnpack_tests_default"],
3689 srcs: [
3690 "test/subtract-nd.cc",
3691 ],
3692 test_suites: [
3693 "general-tests",
3694 ],
3695}
3696
3697cc_test {
Miao Wang86f5fbe2020-07-24 11:16:10 -07003698 name: "xnnpack_truncation_nc_test",
3699 defaults: ["xnnpack_tests_default"],
3700 srcs: [
3701 "test/truncation-nc.cc",
3702 ],
3703 test_suites: [
3704 "general-tests",
3705 ],
3706}
3707
3708cc_test {
Miao Wange9993472020-02-10 15:00:10 -08003709 name: "xnnpack_unpooling_nhwc_test",
3710 defaults: ["xnnpack_tests_default"],
3711 srcs: [
3712 "test/unpooling-nhwc.cc",
3713 ],
3714 test_suites: [
3715 "general-tests",
3716 ],
3717}