blob: 4a136c8cadde7e43a5fd16630f317869d830750a [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
Marat Dukhana84e40b2019-12-11 15:38:03 -08009load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_std_cxxopts", "xnnpack_unit_test", "xnnpack_visibility")
Marat Dukhan69c3f2c2019-11-06 12:30:01 -080010
Marat Dukhan08c4a432019-10-03 09:29:21 -070011licenses(["notice"])
12
13exports_files(["LICENSE"])
14
Marat Dukhan08c4a432019-10-03 09:29:21 -070015OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
22MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
Frank Barchard7e955972019-10-11 10:34:25 -070025 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070026 "@cpuinfo",
27 "@FP16",
28 "@pthreadpool",
29]
30
Marat Dukhan6adff4e2019-10-14 18:32:07 -070031ACCURACY_EVAL_DEPS = [
32 ":XNNPACK",
33 ":ukernels",
34 "@FP16",
35 "@pthreadpool",
36]
37
Marat Dukhan08c4a432019-10-03 09:29:21 -070038MICROKERNEL_TEST_DEPS = [
39 ":ukernels",
Frank Barchard7e955972019-10-11 10:34:25 -070040 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070041 "@cpuinfo",
42 "@FP16",
43 "@pthreadpool",
44]
45
46OPERATOR_TEST_DEPS = [
47 ":XNNPACK",
48 "@pthreadpool",
49 "@FP16",
50]
51
52OPERATOR_SRCS = [
Marat Dukhanefc47b82019-11-18 09:25:38 -080053 "src/add-nc.c",
54 "src/argmax-pooling-nhwc.c",
55 "src/average-pooling-nhwc.c",
Marat Dukhanb1a0fc32019-12-02 19:32:02 -080056 "src/binary-elementwise-nd.c",
Marat Dukhanefc47b82019-11-18 09:25:38 -080057 "src/channel-pad-nc.c",
58 "src/channel-shuffle-nc.c",
59 "src/clamp-nc.c",
60 "src/convolution-nchw.c",
61 "src/convolution-nhwc.c",
62 "src/deconvolution-nhwc.c",
63 "src/fully-connected-nc.c",
64 "src/global-average-pooling-ncw.c",
65 "src/global-average-pooling-nwc.c",
66 "src/hardswish-nc.c",
67 "src/leaky-relu-nc.c",
68 "src/max-pooling-nhwc.c",
Marat Dukhanefc47b82019-11-18 09:25:38 -080069 "src/prelu-nc.c",
70 "src/resize-bilinear-nhwc.c",
71 "src/sigmoid-nc.c",
72 "src/softargmax-nc.c",
73 "src/unpooling-nhwc.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070074]
75
Marat Dukhan3a77ea72019-12-23 12:10:24 -080076TABLE_SRCS = [
77 "src/tables/exp2-k-over-64.c",
78 "src/tables/exp2-k-over-2048.c",
79]
80
Marat Dukhan08c4a432019-10-03 09:29:21 -070081SCALAR_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -080082 "src/f32-argmaxpool/4x-scalar-c1.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -080083 "src/f32-argmaxpool/9p8x-scalar-c1.c",
Marat Dukhan329da642019-11-19 21:44:39 -080084 "src/f32-argmaxpool/9x-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070085 "src/f32-avgpool/mp9p8q-scalar.c",
86 "src/f32-avgpool/up9-scalar.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -080087 "src/f32-bilinear/gen/scalar-c1.c",
88 "src/f32-bilinear/gen/scalar-c2.c",
89 "src/f32-bilinear/gen/scalar-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070090 "src/f32-clamp/scalar.c",
Marat Dukhan441e2212019-12-04 18:30:49 -080091 "src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c",
Marat Dukhan6b7dfae2019-12-04 16:00:52 -080092 "src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c",
Erich Elsen563df5f2019-10-23 08:02:21 -070093 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c",
Erich Elsen0cc2c532019-10-15 04:44:18 -070094 "src/f32-dwconv-spchw/3x3p1-scalar.c",
Erich Elsenac4de802019-10-16 04:35:30 -070095 "src/f32-dwconv-spchw/3x3s2p1-scalar.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -080096 "src/f32-dwconv-spchw/5x5p2-scalar.c",
Erich Elsen38709a62019-11-08 11:58:45 -080097 "src/f32-dwconv-spchw/5x5s2p2-scalar.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -080098 "src/f32-dwconv/gen/up1x25-scalar-acc2.c",
99 "src/f32-dwconv/gen/up1x25-scalar.c",
100 "src/f32-dwconv/gen/up1x4-scalar-acc2.c",
101 "src/f32-dwconv/gen/up1x4-scalar.c",
102 "src/f32-dwconv/gen/up1x9-scalar-acc2.c",
103 "src/f32-dwconv/gen/up1x9-scalar.c",
104 "src/f32-dwconv/gen/up2x25-scalar-acc2.c",
105 "src/f32-dwconv/gen/up2x25-scalar.c",
106 "src/f32-dwconv/gen/up2x4-scalar-acc2.c",
107 "src/f32-dwconv/gen/up2x4-scalar.c",
108 "src/f32-dwconv/gen/up2x9-scalar-acc2.c",
109 "src/f32-dwconv/gen/up2x9-scalar.c",
Erich Elsen34dc2c02019-10-16 05:11:41 -0700110 "src/f32-gavgpool-spchw/scalar-x1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700111 "src/f32-gavgpool/mp7p7q-scalar.c",
112 "src/f32-gavgpool/up7-scalar.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800113 "src/f32-gemm/gen-inc/1x4-scalar.c",
114 "src/f32-gemm/gen-inc/2x4-scalar.c",
115 "src/f32-gemm/gen-inc/4x4-scalar.c",
116 "src/f32-gemm/gen/1x4-scalar.c",
117 "src/f32-gemm/gen/2x4-scalar.c",
118 "src/f32-gemm/gen/4x2-scalar.c",
119 "src/f32-gemm/gen/4x4-scalar.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800120 "src/f32-hswish/gen/scalar-x1.c",
121 "src/f32-hswish/gen/scalar-x2.c",
122 "src/f32-hswish/gen/scalar-x4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800123 "src/f32-igemm/gen/1x4-scalar.c",
124 "src/f32-igemm/gen/2x4-scalar.c",
125 "src/f32-igemm/gen/4x2-scalar.c",
126 "src/f32-igemm/gen/4x4-scalar.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800127 "src/f32-maxpool/9p8x-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700128 "src/f32-pavgpool/mp9p8q-scalar.c",
129 "src/f32-pavgpool/up9-scalar.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800130 "src/f32-ppmm/gen/2x4-scalar.c",
131 "src/f32-ppmm/gen/3x3-scalar.c",
132 "src/f32-ppmm/gen/4x2-scalar.c",
133 "src/f32-ppmm/gen/4x4-scalar.c",
134 "src/f32-prelu/gen/scalar-2x1.c",
135 "src/f32-prelu/gen/scalar-2x4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700136 "src/f32-rmax/scalar.c",
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800137 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c",
138 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c",
139 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c",
140 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c",
141 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c",
142 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c",
143 "src/f32-sigmoid/gen/scalar-p5-div-x1.c",
144 "src/f32-sigmoid/gen/scalar-p5-div-x2.c",
145 "src/f32-sigmoid/gen/scalar-p5-div-x4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800146 "src/f32-spmm/gen/1x1-scalar-pipelined.c",
147 "src/f32-spmm/gen/1x1-scalar.c",
148 "src/f32-spmm/gen/2x1-scalar-pipelined.c",
149 "src/f32-spmm/gen/2x1-scalar.c",
150 "src/f32-spmm/gen/4x1-scalar-pipelined.c",
151 "src/f32-spmm/gen/4x1-scalar.c",
152 "src/f32-spmm/gen/8x1-scalar-pipelined.c",
153 "src/f32-spmm/gen/8x1-scalar.c",
154 "src/f32-spmm/gen/8x2-scalar.c",
155 "src/f32-spmm/gen/8x4-scalar.c",
156 "src/f32-vbinary/gen/vadd-scalar-x1.c",
157 "src/f32-vbinary/gen/vadd-scalar-x2.c",
158 "src/f32-vbinary/gen/vadd-scalar-x4.c",
159 "src/f32-vbinary/gen/vaddc-scalar-x1.c",
160 "src/f32-vbinary/gen/vaddc-scalar-x2.c",
161 "src/f32-vbinary/gen/vaddc-scalar-x4.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800162 "src/f32-vbinary/gen/vdiv-scalar-x1.c",
163 "src/f32-vbinary/gen/vdiv-scalar-x2.c",
164 "src/f32-vbinary/gen/vdiv-scalar-x4.c",
165 "src/f32-vbinary/gen/vdivc-scalar-x1.c",
166 "src/f32-vbinary/gen/vdivc-scalar-x2.c",
167 "src/f32-vbinary/gen/vdivc-scalar-x4.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800168 "src/f32-vbinary/gen/vmax-scalar-x1.c",
169 "src/f32-vbinary/gen/vmax-scalar-x2.c",
170 "src/f32-vbinary/gen/vmax-scalar-x4.c",
171 "src/f32-vbinary/gen/vmaxc-scalar-x1.c",
172 "src/f32-vbinary/gen/vmaxc-scalar-x2.c",
173 "src/f32-vbinary/gen/vmaxc-scalar-x4.c",
174 "src/f32-vbinary/gen/vmin-scalar-x1.c",
175 "src/f32-vbinary/gen/vmin-scalar-x2.c",
176 "src/f32-vbinary/gen/vmin-scalar-x4.c",
177 "src/f32-vbinary/gen/vminc-scalar-x1.c",
178 "src/f32-vbinary/gen/vminc-scalar-x2.c",
179 "src/f32-vbinary/gen/vminc-scalar-x4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800180 "src/f32-vbinary/gen/vmul-scalar-x1.c",
181 "src/f32-vbinary/gen/vmul-scalar-x2.c",
182 "src/f32-vbinary/gen/vmul-scalar-x4.c",
183 "src/f32-vbinary/gen/vmulc-scalar-x1.c",
184 "src/f32-vbinary/gen/vmulc-scalar-x2.c",
185 "src/f32-vbinary/gen/vmulc-scalar-x4.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800186 "src/f32-vbinary/gen/vrdivc-scalar-x1.c",
187 "src/f32-vbinary/gen/vrdivc-scalar-x2.c",
188 "src/f32-vbinary/gen/vrdivc-scalar-x4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800189 "src/f32-vbinary/gen/vrsubc-scalar-x1.c",
190 "src/f32-vbinary/gen/vrsubc-scalar-x2.c",
191 "src/f32-vbinary/gen/vrsubc-scalar-x4.c",
192 "src/f32-vbinary/gen/vsub-scalar-x1.c",
193 "src/f32-vbinary/gen/vsub-scalar-x2.c",
194 "src/f32-vbinary/gen/vsub-scalar-x4.c",
195 "src/f32-vbinary/gen/vsubc-scalar-x1.c",
196 "src/f32-vbinary/gen/vsubc-scalar-x2.c",
197 "src/f32-vbinary/gen/vsubc-scalar-x4.c",
198 "src/f32-vmulcaddc/gen/c1-scalar-2x.c",
199 "src/f32-vmulcaddc/gen/c2-scalar-2x.c",
200 "src/f32-vmulcaddc/gen/c4-scalar-2x.c",
Marat Dukhan5739f702019-12-22 19:45:09 -0800201 "src/math/expminus-scalar-lut2048-p1.c",
202 "src/math/expminus-scalar-lut64-p2.c",
203 "src/math/expminus-scalar-p5.c",
204 "src/math/sigmoid-scalar-lut2048-p1-div.c",
205 "src/math/sigmoid-scalar-lut64-p2-div.c",
206 "src/math/sigmoid-scalar-p5-div.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700207 "src/q8-avgpool/mp9p8q-scalar.c",
208 "src/q8-avgpool/up9-scalar.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700209 "src/q8-dwconv/up1x9-scalar.c",
210 "src/q8-gavgpool/mp7p7q-scalar.c",
211 "src/q8-gavgpool/up7-scalar.c",
212 "src/q8-gemm/2x2-scalar.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -0800213 "src/q8-igemm/2x2-scalar.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700214 "src/q8-vadd/scalar.c",
215 "src/u8-clamp/scalar.c",
216 "src/u8-lut32norm/scalar.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800217 "src/u8-maxpool/9p8x-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700218 "src/u8-rmax/scalar.c",
219 "src/x32-packx/x2-scalar.c",
220 "src/x32-packx/x3-scalar.c",
221 "src/x32-packx/x4-scalar.c",
222 "src/x32-pad/x2-scalar.c",
223 "src/x32-unpool/scalar.c",
224 "src/x32-zip/x2-scalar.c",
225 "src/x32-zip/x3-scalar.c",
226 "src/x32-zip/x4-scalar.c",
227 "src/x32-zip/xm-scalar.c",
228 "src/x8-lut/scalar.c",
229 "src/x8-zip/x2-scalar.c",
230 "src/x8-zip/x3-scalar.c",
231 "src/x8-zip/x4-scalar.c",
232 "src/x8-zip/xm-scalar.c",
233]
234
Marat Dukhan436ebe62019-12-04 15:10:12 -0800235WASM_UKERNELS = [
236 "src/f32-avgpool/mp9p8q-wasm.c",
237 "src/f32-avgpool/up9-wasm.c",
238 "src/f32-clamp/wasm.c",
239 "src/f32-dwconv/gen/up1x25-wasm-acc2.c",
240 "src/f32-dwconv/gen/up1x25-wasm.c",
241 "src/f32-dwconv/gen/up1x4-wasm-acc2.c",
242 "src/f32-dwconv/gen/up1x4-wasm.c",
243 "src/f32-dwconv/gen/up1x9-wasm-acc2.c",
244 "src/f32-dwconv/gen/up1x9-wasm.c",
245 "src/f32-dwconv/gen/up2x25-wasm-acc2.c",
246 "src/f32-dwconv/gen/up2x25-wasm.c",
247 "src/f32-dwconv/gen/up2x4-wasm-acc2.c",
248 "src/f32-dwconv/gen/up2x4-wasm.c",
249 "src/f32-dwconv/gen/up2x9-wasm-acc2.c",
250 "src/f32-dwconv/gen/up2x9-wasm.c",
251 "src/f32-gavgpool/mp7p7q-wasm.c",
252 "src/f32-gavgpool/up7-wasm.c",
253 "src/f32-gemm/gen-inc/1x4-wasm.c",
254 "src/f32-gemm/gen-inc/2x4-wasm.c",
255 "src/f32-gemm/gen-inc/4x4-wasm.c",
256 "src/f32-gemm/gen/1x4-wasm.c",
257 "src/f32-gemm/gen/2x4-wasm.c",
258 "src/f32-gemm/gen/4x2-wasm.c",
259 "src/f32-gemm/gen/4x4-wasm.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800260 "src/f32-hswish/gen/wasm-x1.c",
261 "src/f32-hswish/gen/wasm-x2.c",
262 "src/f32-hswish/gen/wasm-x4.c",
Marat Dukhan436ebe62019-12-04 15:10:12 -0800263 "src/f32-igemm/gen/1x4-wasm.c",
264 "src/f32-igemm/gen/2x4-wasm.c",
265 "src/f32-igemm/gen/4x2-wasm.c",
266 "src/f32-igemm/gen/4x4-wasm.c",
267 "src/f32-maxpool/9p8x-wasm-c1.c",
268 "src/f32-pavgpool/mp9p8q-wasm.c",
269 "src/f32-pavgpool/up9-wasm.c",
270 "src/f32-prelu/gen/wasm-2x1.c",
271 "src/f32-prelu/gen/wasm-2x4.c",
272 "src/f32-vbinary/gen/vadd-wasm-x1.c",
273 "src/f32-vbinary/gen/vadd-wasm-x2.c",
274 "src/f32-vbinary/gen/vadd-wasm-x4.c",
275 "src/f32-vbinary/gen/vaddc-wasm-x1.c",
276 "src/f32-vbinary/gen/vaddc-wasm-x2.c",
277 "src/f32-vbinary/gen/vaddc-wasm-x4.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800278 "src/f32-vbinary/gen/vdiv-wasm-x1.c",
279 "src/f32-vbinary/gen/vdiv-wasm-x2.c",
280 "src/f32-vbinary/gen/vdiv-wasm-x4.c",
281 "src/f32-vbinary/gen/vdivc-wasm-x1.c",
282 "src/f32-vbinary/gen/vdivc-wasm-x2.c",
283 "src/f32-vbinary/gen/vdivc-wasm-x4.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800284 "src/f32-vbinary/gen/vmax-wasm-x1.c",
285 "src/f32-vbinary/gen/vmax-wasm-x2.c",
286 "src/f32-vbinary/gen/vmax-wasm-x4.c",
287 "src/f32-vbinary/gen/vmaxc-wasm-x1.c",
288 "src/f32-vbinary/gen/vmaxc-wasm-x2.c",
289 "src/f32-vbinary/gen/vmaxc-wasm-x4.c",
290 "src/f32-vbinary/gen/vmin-wasm-x1.c",
291 "src/f32-vbinary/gen/vmin-wasm-x2.c",
292 "src/f32-vbinary/gen/vmin-wasm-x4.c",
293 "src/f32-vbinary/gen/vminc-wasm-x1.c",
294 "src/f32-vbinary/gen/vminc-wasm-x2.c",
295 "src/f32-vbinary/gen/vminc-wasm-x4.c",
Marat Dukhan436ebe62019-12-04 15:10:12 -0800296 "src/f32-vbinary/gen/vmul-wasm-x1.c",
297 "src/f32-vbinary/gen/vmul-wasm-x2.c",
298 "src/f32-vbinary/gen/vmul-wasm-x4.c",
299 "src/f32-vbinary/gen/vmulc-wasm-x1.c",
300 "src/f32-vbinary/gen/vmulc-wasm-x2.c",
301 "src/f32-vbinary/gen/vmulc-wasm-x4.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800302 "src/f32-vbinary/gen/vrdivc-wasm-x1.c",
303 "src/f32-vbinary/gen/vrdivc-wasm-x2.c",
304 "src/f32-vbinary/gen/vrdivc-wasm-x4.c",
Marat Dukhan436ebe62019-12-04 15:10:12 -0800305 "src/f32-vbinary/gen/vrsubc-wasm-x1.c",
306 "src/f32-vbinary/gen/vrsubc-wasm-x2.c",
307 "src/f32-vbinary/gen/vrsubc-wasm-x4.c",
308 "src/f32-vbinary/gen/vsub-wasm-x1.c",
309 "src/f32-vbinary/gen/vsub-wasm-x2.c",
310 "src/f32-vbinary/gen/vsub-wasm-x4.c",
311 "src/f32-vbinary/gen/vsubc-wasm-x1.c",
312 "src/f32-vbinary/gen/vsubc-wasm-x2.c",
313 "src/f32-vbinary/gen/vsubc-wasm-x4.c",
314 "src/f32-vmulcaddc/gen/c1-wasm-2x.c",
315 "src/f32-vmulcaddc/gen/c2-wasm-2x.c",
316 "src/f32-vmulcaddc/gen/c4-wasm-2x.c",
317]
318
Marat Dukhan08c4a432019-10-03 09:29:21 -0700319PSIMD_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -0800320 "src/f32-argmaxpool/4x-psimd-c4.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -0800321 "src/f32-argmaxpool/9p8x-psimd-c4.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800322 "src/f32-argmaxpool/9x-psimd-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700323 "src/f32-avgpool/mp9p8q-psimd.c",
324 "src/f32-avgpool/up9-psimd.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800325 "src/f32-bilinear/gen/psimd-c4.c",
326 "src/f32-bilinear/gen/psimd-c8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700327 "src/f32-clamp/psimd.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800328 "src/f32-dwconv/gen/up4x25-psimd-acc2.c",
329 "src/f32-dwconv/gen/up4x25-psimd.c",
330 "src/f32-dwconv/gen/up4x4-psimd-acc2.c",
331 "src/f32-dwconv/gen/up4x4-psimd.c",
332 "src/f32-dwconv/gen/up4x9-psimd-acc2.c",
333 "src/f32-dwconv/gen/up4x9-psimd.c",
334 "src/f32-dwconv/gen/up8x25-psimd-acc2.c",
335 "src/f32-dwconv/gen/up8x25-psimd.c",
336 "src/f32-dwconv/gen/up8x4-psimd-acc2.c",
337 "src/f32-dwconv/gen/up8x4-psimd.c",
338 "src/f32-dwconv/gen/up8x9-psimd-acc2.c",
339 "src/f32-dwconv/gen/up8x9-psimd.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700340 "src/f32-gavgpool/mp7p7q-psimd.c",
341 "src/f32-gavgpool/up7-psimd.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800342 "src/f32-gemm/gen/1x8-psimd-loadsplat.c",
343 "src/f32-gemm/gen/1x8-psimd-splat.c",
344 "src/f32-gemm/gen/1x8s4-psimd.c",
345 "src/f32-gemm/gen/4x8-psimd-loadsplat.c",
346 "src/f32-gemm/gen/4x8-psimd-splat.c",
347 "src/f32-gemm/gen/4x8s4-psimd.c",
348 "src/f32-gemm/gen/6x8-psimd-loadsplat.c",
349 "src/f32-gemm/gen/6x8-psimd-splat.c",
350 "src/f32-gemm/gen/6x8s4-psimd.c",
351 "src/f32-gemm/gen-inc/1x8-psimd-loadsplat.c",
352 "src/f32-gemm/gen-inc/1x8-psimd-splat.c",
353 "src/f32-gemm/gen-inc/1x8s4-psimd.c",
354 "src/f32-gemm/gen-inc/4x8-psimd-loadsplat.c",
355 "src/f32-gemm/gen-inc/4x8-psimd-splat.c",
356 "src/f32-gemm/gen-inc/4x8s4-psimd.c",
357 "src/f32-gemm/gen-inc/6x8-psimd-loadsplat.c",
358 "src/f32-gemm/gen-inc/6x8-psimd-splat.c",
359 "src/f32-gemm/gen-inc/6x8s4-psimd.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800360 "src/f32-hswish/gen/psimd-x4.c",
361 "src/f32-hswish/gen/psimd-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800362 "src/f32-igemm/gen/1x8-psimd-loadsplat.c",
363 "src/f32-igemm/gen/1x8-psimd-splat.c",
364 "src/f32-igemm/gen/1x8s4-psimd.c",
365 "src/f32-igemm/gen/4x2c4-psimd.c",
366 "src/f32-igemm/gen/4x8-psimd-loadsplat.c",
367 "src/f32-igemm/gen/4x8-psimd-splat.c",
368 "src/f32-igemm/gen/4x8s4-psimd.c",
369 "src/f32-igemm/gen/6x8-psimd-loadsplat.c",
370 "src/f32-igemm/gen/6x8-psimd-splat.c",
371 "src/f32-igemm/gen/6x8s4-psimd.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800372 "src/f32-maxpool/9p8x-psimd-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700373 "src/f32-pavgpool/mp9p8q-psimd.c",
374 "src/f32-pavgpool/up9-psimd.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800375 "src/f32-ppmm/gen/4x8-psimd.c",
376 "src/f32-prelu/gen/psimd-2x4.c",
377 "src/f32-prelu/gen/psimd-2x8.c",
378 "src/f32-vbinary/gen/vadd-psimd-x4.c",
379 "src/f32-vbinary/gen/vadd-psimd-x8.c",
380 "src/f32-vbinary/gen/vaddc-psimd-x4.c",
381 "src/f32-vbinary/gen/vaddc-psimd-x8.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800382 "src/f32-vbinary/gen/vdiv-psimd-x4.c",
383 "src/f32-vbinary/gen/vdiv-psimd-x8.c",
384 "src/f32-vbinary/gen/vdivc-psimd-x4.c",
385 "src/f32-vbinary/gen/vdivc-psimd-x8.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800386 "src/f32-vbinary/gen/vmax-psimd-x4.c",
387 "src/f32-vbinary/gen/vmax-psimd-x8.c",
388 "src/f32-vbinary/gen/vmaxc-psimd-x4.c",
389 "src/f32-vbinary/gen/vmaxc-psimd-x8.c",
390 "src/f32-vbinary/gen/vmin-psimd-x4.c",
391 "src/f32-vbinary/gen/vmin-psimd-x8.c",
392 "src/f32-vbinary/gen/vminc-psimd-x4.c",
393 "src/f32-vbinary/gen/vminc-psimd-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800394 "src/f32-vbinary/gen/vmul-psimd-x4.c",
395 "src/f32-vbinary/gen/vmul-psimd-x8.c",
396 "src/f32-vbinary/gen/vmulc-psimd-x4.c",
397 "src/f32-vbinary/gen/vmulc-psimd-x8.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800398 "src/f32-vbinary/gen/vrdivc-psimd-x4.c",
399 "src/f32-vbinary/gen/vrdivc-psimd-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800400 "src/f32-vbinary/gen/vrsubc-psimd-x4.c",
401 "src/f32-vbinary/gen/vrsubc-psimd-x8.c",
402 "src/f32-vbinary/gen/vsub-psimd-x4.c",
403 "src/f32-vbinary/gen/vsub-psimd-x8.c",
404 "src/f32-vbinary/gen/vsubc-psimd-x4.c",
405 "src/f32-vbinary/gen/vsubc-psimd-x8.c",
406 "src/f32-vmulcaddc/gen/c4-psimd-2x.c",
407 "src/f32-vmulcaddc/gen/c8-psimd-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700408 "src/x32-packx/x4-psimd.c",
409 "src/x32-pad/x2-psimd.c",
410 "src/x32-unpool/psimd.c",
411 "src/x32-zip/x2-psimd.c",
412 "src/x32-zip/x3-psimd.c",
413 "src/x32-zip/x4-psimd.c",
414 "src/x32-zip/xm-psimd.c",
415]
416
417# ISA-specific micro-kernels
418NEON_UKERNELS = [
419 "src/f32-avgpool/mp9p8q-neon.c",
420 "src/f32-avgpool/up9-neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800421 "src/f32-bilinear/gen/neon-c4.c",
422 "src/f32-bilinear/gen/neon-c8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700423 "src/f32-clamp/neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800424 "src/f32-dwconv/gen/up4x9-neon.c",
425 "src/f32-dwconv/gen/up4x9-neon-acc2.c",
426 "src/f32-dwconv/gen/up8x9-neon.c",
427 "src/f32-dwconv/gen/up8x9-neon-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700428 "src/f32-gavgpool-spchw/neon-x4.c",
429 "src/f32-gavgpool/mp7p7q-neon.c",
430 "src/f32-gavgpool/up7-neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800431 "src/f32-gemm/gen/1x8-neon-lane-ld64.c",
432 "src/f32-gemm/gen/4x2-neon-lane-ld64.c",
433 "src/f32-gemm/gen/4x8-neon-lane-ld128.c",
434 "src/f32-gemm/gen/4x8-neon-lane-ld64.c",
435 "src/f32-gemm/gen/5x8-neon-lane-ld64.c",
436 "src/f32-gemm/gen/6x8-neon-lane-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800437 "src/f32-gemm/gen/6x8-neon-lane-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800438 "src/f32-gemm/gen/1x8-neon-dup-ld64.c",
439 "src/f32-gemm/gen/4x8-neon-dup-ld128.c",
440 "src/f32-gemm/gen/4x8-neon-dup-ld64.c",
441 "src/f32-gemm/gen/6x8-neon-dup-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800442 "src/f32-gemm/gen/6x8-neon-dup-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800443 "src/f32-gemm/gen/1x8s4-neon.c",
444 "src/f32-gemm/gen/4x8s4-neon.c",
445 "src/f32-gemm/gen/6x8s4-neon.c",
446 "src/f32-gemm/gen/8x8s4-neon.c",
447 "src/f32-gemm/gen-inc/1x8-neon-lane-ld64.c",
448 "src/f32-gemm/gen-inc/4x8-neon-lane-ld128.c",
449 "src/f32-gemm/gen-inc/4x8-neon-lane-ld64.c",
450 "src/f32-gemm/gen-inc/5x8-neon-lane-ld64.c",
451 "src/f32-gemm/gen-inc/6x8-neon-lane-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800452 "src/f32-gemm/gen-inc/6x8-neon-lane-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800453 "src/f32-gemm/gen-inc/1x8-neon-dup-ld64.c",
454 "src/f32-gemm/gen-inc/4x8-neon-dup-ld128.c",
455 "src/f32-gemm/gen-inc/4x8-neon-dup-ld64.c",
456 "src/f32-gemm/gen-inc/6x8-neon-dup-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800457 "src/f32-gemm/gen-inc/6x8-neon-dup-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800458 "src/f32-gemm/gen-inc/1x8s4-neon.c",
459 "src/f32-gemm/gen-inc/4x8s4-neon.c",
460 "src/f32-gemm/gen-inc/6x8s4-neon.c",
461 "src/f32-gemm/gen-inc/8x8s4-neon.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800462 "src/f32-hswish/gen/neon-x4.c",
463 "src/f32-hswish/gen/neon-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800464 "src/f32-igemm/gen/1x8-neon-lane-ld64.c",
465 "src/f32-igemm/gen/4x2-neon-lane-ld64.c",
466 "src/f32-igemm/gen/4x4-neon-lane-ld64.c",
467 "src/f32-igemm/gen/4x8-neon-lane-ld128.c",
468 "src/f32-igemm/gen/4x8-neon-lane-ld64.c",
469 "src/f32-igemm/gen/6x8-neon-lane-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800470 "src/f32-igemm/gen/6x8-neon-lane-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800471 "src/f32-igemm/gen/1x8-neon-dup-ld64.c",
472 "src/f32-igemm/gen/4x8-neon-dup-ld128.c",
473 "src/f32-igemm/gen/4x8-neon-dup-ld64.c",
474 "src/f32-igemm/gen/6x8-neon-dup-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800475 "src/f32-igemm/gen/6x8-neon-dup-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800476 "src/f32-igemm/gen/1x8s4-neon.c",
477 "src/f32-igemm/gen/4x8s4-neon.c",
478 "src/f32-igemm/gen/6x8s4-neon.c",
479 "src/f32-igemm/gen/8x8s4-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700480 "src/f32-pavgpool/mp9p8q-neon.c",
481 "src/f32-pavgpool/up9-neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800482 "src/f32-ppmm/gen/4x8-neon.c",
483 "src/f32-ppmm/gen/8x8-neon.c",
484 "src/f32-prelu/gen/neon-2x4.c",
485 "src/f32-prelu/gen/neon-2x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700486 "src/f32-rmax/neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800487 "src/f32-sigmoid/gen/neon-frac-p9-p10-nr1recps-x16.c",
488 "src/f32-vbinary/gen/vadd-neon-x4.c",
489 "src/f32-vbinary/gen/vadd-neon-x8.c",
490 "src/f32-vbinary/gen/vaddc-neon-x4.c",
491 "src/f32-vbinary/gen/vaddc-neon-x8.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800492 "src/f32-vbinary/gen/vmax-neon-x4.c",
493 "src/f32-vbinary/gen/vmax-neon-x8.c",
494 "src/f32-vbinary/gen/vmaxc-neon-x4.c",
495 "src/f32-vbinary/gen/vmaxc-neon-x8.c",
496 "src/f32-vbinary/gen/vmin-neon-x4.c",
497 "src/f32-vbinary/gen/vmin-neon-x8.c",
498 "src/f32-vbinary/gen/vminc-neon-x4.c",
499 "src/f32-vbinary/gen/vminc-neon-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800500 "src/f32-vbinary/gen/vmul-neon-x4.c",
501 "src/f32-vbinary/gen/vmul-neon-x8.c",
502 "src/f32-vbinary/gen/vmulc-neon-x4.c",
503 "src/f32-vbinary/gen/vmulc-neon-x8.c",
504 "src/f32-vbinary/gen/vrsubc-neon-x4.c",
505 "src/f32-vbinary/gen/vrsubc-neon-x8.c",
506 "src/f32-vbinary/gen/vsub-neon-x4.c",
507 "src/f32-vbinary/gen/vsub-neon-x8.c",
508 "src/f32-vbinary/gen/vsubc-neon-x4.c",
509 "src/f32-vbinary/gen/vsubc-neon-x8.c",
510 "src/f32-vmulcaddc/gen/c4-neon-2x.c",
511 "src/f32-vmulcaddc/gen/c8-neon-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700512 "src/q8-avgpool/mp9p8q-neon.c",
513 "src/q8-avgpool/up9-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700514 "src/q8-dwconv/up8x9-neon.c",
515 "src/q8-gavgpool/mp7p7q-neon.c",
516 "src/q8-gavgpool/up7-neon.c",
517 "src/q8-gemm/4x8-neon.c",
518 "src/q8-gemm/8x8-neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800519 "src/q8-igemm/4x8-neon.c",
520 "src/q8-igemm/8x8-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700521 "src/q8-vadd/neon.c",
522 "src/u8-clamp/neon.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800523 "src/u8-maxpool/9p8x-neon-c16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700524 "src/u8-rmax/neon.c",
525 "src/x32-packx/x4-neon-st4.c",
526 "src/x32-pad/x2-neon.c",
527 "src/x32-zip/x2-neon.c",
528 "src/x32-zip/x3-neon.c",
529 "src/x32-zip/x4-neon.c",
530 "src/x32-zip/xm-neon.c",
531 "src/x8-zip/x2-neon.c",
532 "src/x8-zip/x3-neon.c",
533 "src/x8-zip/x4-neon.c",
534 "src/x8-zip/xm-neon.c",
535]
536
537NEONFMA_UKERNELS = [
Marat Dukhan40a672f2019-11-25 03:08:22 -0800538 "src/f32-bilinear/gen/neonfma-c4.c",
539 "src/f32-bilinear/gen/neonfma-c8.c",
540 "src/f32-igemm/gen/1x8-neonfma-dup-ld64.c",
541 "src/f32-igemm/gen/4x8-neonfma-dup-ld128.c",
542 "src/f32-igemm/gen/4x8-neonfma-dup-ld64.c",
543 "src/f32-igemm/gen/6x8-neonfma-dup-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800544 "src/f32-igemm/gen/6x8-neonfma-dup-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800545 "src/f32-igemm/gen/1x8s4-neonfma.c",
546 "src/f32-igemm/gen/4x8s4-neonfma.c",
547 "src/f32-igemm/gen/6x8s4-neonfma.c",
548 "src/f32-igemm/gen/8x8s4-neonfma.c",
549 "src/f32-dwconv/gen/up4x9-neonfma.c",
550 "src/f32-dwconv/gen/up4x9-neonfma-acc2.c",
551 "src/f32-dwconv/gen/up8x9-neonfma.c",
552 "src/f32-dwconv/gen/up8x9-neonfma-acc2.c",
553 "src/f32-gemm/gen/1x8-neonfma-dup-ld64.c",
554 "src/f32-gemm/gen/4x8-neonfma-dup-ld128.c",
555 "src/f32-gemm/gen/4x8-neonfma-dup-ld64.c",
556 "src/f32-gemm/gen/6x8-neonfma-dup-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800557 "src/f32-gemm/gen/6x8-neonfma-dup-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800558 "src/f32-gemm/gen/1x8s4-neonfma.c",
559 "src/f32-gemm/gen/4x8s4-neonfma.c",
560 "src/f32-gemm/gen/6x8s4-neonfma.c",
561 "src/f32-gemm/gen/8x8s4-neonfma.c",
562 "src/f32-gemm/gen-inc/1x8-neonfma-dup-ld64.c",
563 "src/f32-gemm/gen-inc/4x8-neonfma-dup-ld128.c",
564 "src/f32-gemm/gen-inc/4x8-neonfma-dup-ld64.c",
565 "src/f32-gemm/gen-inc/6x8-neonfma-dup-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800566 "src/f32-gemm/gen-inc/6x8-neonfma-dup-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800567 "src/f32-gemm/gen-inc/1x8s4-neonfma.c",
568 "src/f32-gemm/gen-inc/4x8s4-neonfma.c",
569 "src/f32-gemm/gen-inc/6x8s4-neonfma.c",
570 "src/f32-gemm/gen-inc/8x8s4-neonfma.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800571 "src/f32-hswish/gen/neonfma-x4.c",
572 "src/f32-hswish/gen/neonfma-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800573 "src/f32-ppmm/gen/4x8-neonfma.c",
574 "src/f32-ppmm/gen/8x8-neonfma.c",
575 "src/f32-sigmoid/gen/neonfma-p5-nr2fma-x16.c",
576 "src/f32-vmulcaddc/gen/c4-neonfma-2x.c",
577 "src/f32-vmulcaddc/gen/c8-neonfma-2x.c",
Marat Dukhan797a8fe2019-11-14 20:21:57 -0800578 "src/math/exp-neonfma-lut64-p2.c",
579 "src/math/exp-neonfma-p5.c",
Marat Dukhan189ae802019-11-26 11:28:44 -0800580 "src/math/expminus-neonfma-lut2048-p1.c",
581 "src/math/expminus-neonfma-lut64-p2.c",
Marat Dukhan346a9e52019-11-15 09:06:30 -0800582 "src/math/expminus-neonfma-p5.c",
Marat Dukhan91f8d862019-11-27 12:25:42 -0800583 "src/math/sigmoid-neonfma-lut2048-p1-nr1recps1fma.c",
584 "src/math/sigmoid-neonfma-lut2048-p1-nr2fma.c",
585 "src/math/sigmoid-neonfma-lut2048-p1-nr2recps.c",
Marat Dukhan22aae132019-11-22 17:10:29 -0800586 "src/math/sigmoid-neonfma-p5-nr1recps1fma.c",
Marat Dukhan80bafd22019-11-18 10:16:01 -0800587 "src/math/sigmoid-neonfma-p5-nr2fma.c",
Marat Dukhan22aae132019-11-22 17:10:29 -0800588 "src/math/sigmoid-neonfma-p5-nr2recps.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700589]
590
591AARCH64_NEONFMA_UKERNELS = [
Marat Dukhan77ca6302019-12-06 12:48:15 -0800592 "src/f32-vbinary/gen/vdiv-neon-x4.c",
593 "src/f32-vbinary/gen/vdiv-neon-x8.c",
594 "src/f32-vbinary/gen/vdivc-neon-x4.c",
595 "src/f32-vbinary/gen/vdivc-neon-x8.c",
596 "src/f32-vbinary/gen/vrdivc-neon-x4.c",
597 "src/f32-vbinary/gen/vrdivc-neon-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800598 "src/f32-gemm/gen/1x8-neonfma-lane-ld64.c",
599 "src/f32-gemm/gen/4x2-neonfma-lane-ld64.c",
600 "src/f32-gemm/gen/4x8-neonfma-lane-ld128.c",
601 "src/f32-gemm/gen/4x8-neonfma-lane-ld64.c",
602 "src/f32-gemm/gen/5x8-neonfma-lane-ld64.c",
603 "src/f32-gemm/gen/6x8-neonfma-lane-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800604 "src/f32-gemm/gen/6x8-neonfma-lane-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800605 "src/f32-gemm/gen-inc/1x8-neonfma-lane-ld64.c",
606 "src/f32-gemm/gen-inc/4x8-neonfma-lane-ld128.c",
607 "src/f32-gemm/gen-inc/4x8-neonfma-lane-ld64.c",
608 "src/f32-gemm/gen-inc/5x8-neonfma-lane-ld64.c",
609 "src/f32-gemm/gen-inc/6x8-neonfma-lane-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800610 "src/f32-gemm/gen-inc/6x8-neonfma-lane-ld128.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800611 "src/f32-igemm/gen/1x8-neonfma-lane-ld64.c",
612 "src/f32-igemm/gen/4x2-neonfma-lane-ld64.c",
613 "src/f32-igemm/gen/4x4-neonfma-lane-ld64.c",
614 "src/f32-igemm/gen/4x8-neonfma-lane-ld128.c",
615 "src/f32-igemm/gen/4x8-neonfma-lane-ld64.c",
616 "src/f32-igemm/gen/6x8-neonfma-lane-ld64.c",
Frank Barchard69172d92019-11-26 16:22:39 -0800617 "src/f32-igemm/gen/6x8-neonfma-lane-ld128.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700618 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
619 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
620 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
621 "src/f32-dwconv-spchw/3x3p1-neonfma.c",
622 "src/f32-dwconv-spchw/5x5p2-neonfma.c",
623 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
624 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800625 "src/f32-spmm/gen/12x1-neonfma.c",
626 "src/f32-spmm/gen/12x2-neonfma.c",
627 "src/f32-spmm/gen/12x4-neonfma.c",
628 "src/f32-spmm/gen/16x1-neonfma-pipelined.c",
629 "src/f32-spmm/gen/16x1-neonfma-unroll2.c",
630 "src/f32-spmm/gen/16x1-neonfma.c",
631 "src/f32-spmm/gen/16x2-neonfma.c",
632 "src/f32-spmm/gen/16x4-neonfma.c",
633 "src/f32-spmm/gen/4x1-neonfma-pipelined.c",
634 "src/f32-spmm/gen/4x1-neonfma-unroll2.c",
635 "src/f32-spmm/gen/4x1-neonfma.c",
636 "src/f32-spmm/gen/4x2-neonfma.c",
637 "src/f32-spmm/gen/4x4-neonfma.c",
638 "src/f32-spmm/gen/8x1-neonfma-pipelined.c",
639 "src/f32-spmm/gen/8x1-neonfma-unroll2.c",
640 "src/f32-spmm/gen/8x1-neonfma.c",
641 "src/f32-spmm/gen/8x2-neonfma.c",
642 "src/f32-spmm/gen/8x4-neonfma.c",
Marat Dukhan22aae132019-11-22 17:10:29 -0800643 "src/math/sigmoid-neonfma-p5-div.c",
Marat Dukhan91f8d862019-11-27 12:25:42 -0800644 "src/math/sigmoid-neonfma-lut2048-p1-div.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700645]
646
647AARCH64_NEONFP16ARITH_UKERNELS = [
Marat Dukhan40a672f2019-11-25 03:08:22 -0800648 "src/f16-gemm/gen/4x8-neonfp16arith-ld64.c",
649 "src/f16-gemm/gen/6x8-neonfp16arith-ld64.c",
650 "src/f16-gemm/gen/8x8-neonfp16arith-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700651]
652
653SSE_UKERNELS = [
654 "src/f32-avgpool/mp9p8q-sse.c",
655 "src/f32-avgpool/up9-sse.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800656 "src/f32-bilinear/gen/sse-c4.c",
657 "src/f32-bilinear/gen/sse-c8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700658 "src/f32-clamp/sse.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -0800659 "src/f32-dwconv-spchw/3x3p1-sse.c",
660 "src/f32-dwconv-spchw/3x3s2p1-sse.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800661 "src/f32-dwconv/gen/up4x25-sse-acc2.c",
662 "src/f32-dwconv/gen/up4x25-sse.c",
663 "src/f32-dwconv/gen/up4x4-sse-acc2.c",
664 "src/f32-dwconv/gen/up4x4-sse.c",
665 "src/f32-dwconv/gen/up4x9-sse-acc2.c",
666 "src/f32-dwconv/gen/up4x9-sse.c",
667 "src/f32-dwconv/gen/up8x25-sse-acc2.c",
668 "src/f32-dwconv/gen/up8x25-sse.c",
669 "src/f32-dwconv/gen/up8x4-sse-acc2.c",
670 "src/f32-dwconv/gen/up8x4-sse.c",
671 "src/f32-dwconv/gen/up8x9-sse-acc2.c",
672 "src/f32-dwconv/gen/up8x9-sse.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700673 "src/f32-gavgpool-spchw/sse-x4.c",
674 "src/f32-gavgpool/mp7p7q-sse.c",
675 "src/f32-gavgpool/up7-sse.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800676 "src/f32-gemm/gen/1x8-sse-dup.c",
677 "src/f32-gemm/gen/1x8-sse-load1.c",
678 "src/f32-gemm/gen/1x8s4-sse.c",
679 "src/f32-gemm/gen/4x8-sse-dup.c",
680 "src/f32-gemm/gen/4x8-sse-load1.c",
681 "src/f32-gemm/gen/4x8s4-sse.c",
682 "src/f32-gemm/gen-inc/1x8-sse-dup.c",
683 "src/f32-gemm/gen-inc/1x8-sse-load1.c",
684 "src/f32-gemm/gen-inc/1x8s4-sse.c",
685 "src/f32-gemm/gen-inc/4x8-sse-dup.c",
686 "src/f32-gemm/gen-inc/4x8-sse-load1.c",
687 "src/f32-gemm/gen-inc/4x8s4-sse.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800688 "src/f32-hswish/gen/sse-x4.c",
689 "src/f32-hswish/gen/sse-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800690 "src/f32-igemm/gen/1x8-sse-dup.c",
691 "src/f32-igemm/gen/1x8-sse-load1.c",
692 "src/f32-igemm/gen/1x8s4-sse.c",
693 "src/f32-igemm/gen/4x2c4-sse.c",
694 "src/f32-igemm/gen/4x8-sse-dup.c",
695 "src/f32-igemm/gen/4x8-sse-load1.c",
696 "src/f32-igemm/gen/4x8s4-sse.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800697 "src/f32-maxpool/9p8x-sse-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700698 "src/f32-pavgpool/mp9p8q-sse.c",
699 "src/f32-pavgpool/up9-sse.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800700 "src/f32-ppmm/gen/4x8-sse.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700701 "src/f32-rmax/sse.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800702 "src/f32-spmm/gen/4x1-sse.c",
703 "src/f32-spmm/gen/8x1-sse.c",
704 "src/f32-vbinary/gen/vadd-sse-x4.c",
705 "src/f32-vbinary/gen/vadd-sse-x8.c",
706 "src/f32-vbinary/gen/vaddc-sse-x4.c",
707 "src/f32-vbinary/gen/vaddc-sse-x8.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800708 "src/f32-vbinary/gen/vdiv-sse-x4.c",
709 "src/f32-vbinary/gen/vdiv-sse-x8.c",
710 "src/f32-vbinary/gen/vdivc-sse-x4.c",
711 "src/f32-vbinary/gen/vdivc-sse-x8.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800712 "src/f32-vbinary/gen/vmax-sse-x4.c",
713 "src/f32-vbinary/gen/vmax-sse-x8.c",
714 "src/f32-vbinary/gen/vmaxc-sse-x4.c",
715 "src/f32-vbinary/gen/vmaxc-sse-x8.c",
716 "src/f32-vbinary/gen/vmin-sse-x4.c",
717 "src/f32-vbinary/gen/vmin-sse-x8.c",
718 "src/f32-vbinary/gen/vminc-sse-x4.c",
719 "src/f32-vbinary/gen/vminc-sse-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800720 "src/f32-vbinary/gen/vmul-sse-x4.c",
721 "src/f32-vbinary/gen/vmul-sse-x8.c",
722 "src/f32-vbinary/gen/vmulc-sse-x4.c",
723 "src/f32-vbinary/gen/vmulc-sse-x8.c",
Marat Dukhan77ca6302019-12-06 12:48:15 -0800724 "src/f32-vbinary/gen/vrdivc-sse-x4.c",
725 "src/f32-vbinary/gen/vrdivc-sse-x8.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800726 "src/f32-vbinary/gen/vrsubc-sse-x4.c",
727 "src/f32-vbinary/gen/vrsubc-sse-x8.c",
728 "src/f32-vbinary/gen/vsub-sse-x4.c",
729 "src/f32-vbinary/gen/vsub-sse-x8.c",
730 "src/f32-vbinary/gen/vsubc-sse-x4.c",
731 "src/f32-vbinary/gen/vsubc-sse-x8.c",
732 "src/f32-vmulcaddc/gen/c4-sse-2x.c",
733 "src/f32-vmulcaddc/gen/c8-sse-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700734 "src/x32-packx/x4-sse.c",
735]
736
737SSE2_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -0800738 "src/f32-argmaxpool/9p8x-sse2-c4.c",
739 "src/f32-argmaxpool/4x-sse2-c4.c",
740 "src/f32-argmaxpool/9x-sse2-c4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800741 "src/f32-prelu/gen/sse2-2x4.c",
742 "src/f32-prelu/gen/sse2-2x8.c",
743 "src/f32-sigmoid/gen/sse2-p5-div-x8.c",
744 "src/f32-sigmoid/gen/sse2-p5-div-x16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700745 "src/q8-avgpool/mp9p8q-sse2.c",
746 "src/q8-avgpool/up9-sse2.c",
747 "src/q8-igemm/4x4c2-sse2.c",
748 "src/q8-dwconv/up8x9-sse2.c",
749 "src/q8-gavgpool/mp7p7q-sse2.c",
750 "src/q8-gavgpool/up7-sse2.c",
751 "src/q8-gemm/2x4c8-sse2.c",
752 "src/q8-gemm/4x4c2-sse2.c",
753 "src/q8-vadd/sse2.c",
754 "src/u8-clamp/sse2.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800755 "src/u8-maxpool/9p8x-sse2-c16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700756 "src/u8-rmax/sse2.c",
757 "src/x32-pad/x2-sse2.c",
758 "src/x32-zip/x2-sse2.c",
759 "src/x32-zip/x3-sse2.c",
760 "src/x32-zip/x4-sse2.c",
761 "src/x32-zip/xm-sse2.c",
762 "src/x8-zip/x2-sse2.c",
763 "src/x8-zip/x3-sse2.c",
764 "src/x8-zip/x4-sse2.c",
765 "src/x8-zip/xm-sse2.c",
Marat Dukhanffd68402019-11-15 15:19:11 -0800766 "src/math/exp-sse2-p5.c",
767 "src/math/expminus-sse2-p5.c",
Marat Dukhan80bafd22019-11-18 10:16:01 -0800768 "src/math/sigmoid-sse2-p5-div.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700769]
770
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800771SSE41_UKERNELS = [
Marat Dukhan40a672f2019-11-25 03:08:22 -0800772 "src/f32-prelu/gen/sse41-2x4.c",
773 "src/f32-prelu/gen/sse41-2x8.c",
774 "src/f32-sigmoid/gen/sse41-p5-div-x8.c",
775 "src/f32-sigmoid/gen/sse41-p5-div-x16.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800776]
777
Marat Dukhan08c4a432019-10-03 09:29:21 -0700778AVX_UKERNELS = [
Marat Dukhane2c3f292019-11-27 15:40:54 -0800779 "src/f32-clamp/avx.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800780 "src/f32-dwconv/gen/up16x4-avx-acc2.c",
781 "src/f32-dwconv/gen/up16x4-avx.c",
782 "src/f32-dwconv/gen/up8x4-avx-acc2.c",
783 "src/f32-dwconv/gen/up8x4-avx.c",
784 "src/f32-dwconv/gen/up16x9-avx-acc2.c",
785 "src/f32-dwconv/gen/up16x9-avx.c",
786 "src/f32-dwconv/gen/up8x9-avx-acc2.c",
787 "src/f32-dwconv/gen/up8x9-avx.c",
788 "src/f32-dwconv/gen/up16x25-avx-acc2.c",
789 "src/f32-dwconv/gen/up16x25-avx.c",
790 "src/f32-dwconv/gen/up8x25-avx-acc2.c",
791 "src/f32-dwconv/gen/up8x25-avx.c",
792 "src/f32-gemm/gen/1x8-avx-broadcast.c",
793 "src/f32-gemm/gen/4x8-avx-broadcast.c",
794 "src/f32-gemm/gen/5x8-avx-broadcast.c",
795 "src/f32-gemm/gen/6x8-avx-broadcast.c",
796 "src/f32-gemm/gen/7x8-avx-broadcast.c",
Marat Dukhaneccfd712019-12-08 16:49:27 -0800797 "src/f32-gemm/gen/1x16-avx-broadcast.c",
798 "src/f32-gemm/gen/3x16-avx-broadcast.c",
799 "src/f32-gemm/gen/4x16-avx-broadcast.c",
800 "src/f32-gemm/gen/5x16-avx-broadcast.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800801 "src/f32-gemm/gen-inc/1x8-avx-broadcast.c",
802 "src/f32-gemm/gen-inc/4x8-avx-broadcast.c",
803 "src/f32-gemm/gen-inc/5x8-avx-broadcast.c",
804 "src/f32-gemm/gen-inc/6x8-avx-broadcast.c",
805 "src/f32-gemm/gen-inc/7x8-avx-broadcast.c",
Marat Dukhaneccfd712019-12-08 16:49:27 -0800806 "src/f32-gemm/gen-inc/1x16-avx-broadcast.c",
807 "src/f32-gemm/gen-inc/3x16-avx-broadcast.c",
808 "src/f32-gemm/gen-inc/4x16-avx-broadcast.c",
809 "src/f32-gemm/gen-inc/5x16-avx-broadcast.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800810 "src/f32-hswish/gen/avx-x8.c",
811 "src/f32-hswish/gen/avx-x16.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800812 "src/f32-igemm/gen/1x8-avx-broadcast.c",
813 "src/f32-igemm/gen/4x8-avx-broadcast.c",
814 "src/f32-igemm/gen/5x8-avx-broadcast.c",
815 "src/f32-igemm/gen/6x8-avx-broadcast.c",
816 "src/f32-igemm/gen/7x8-avx-broadcast.c",
Marat Dukhaneccfd712019-12-08 16:49:27 -0800817 "src/f32-igemm/gen/1x16-avx-broadcast.c",
818 "src/f32-igemm/gen/3x16-avx-broadcast.c",
819 "src/f32-igemm/gen/4x16-avx-broadcast.c",
820 "src/f32-igemm/gen/5x16-avx-broadcast.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700821 "src/f32-rmax/avx.c",
Marat Dukhan9a88efe2019-12-10 15:54:24 -0800822 "src/f32-vbinary/gen/vadd-avx-x8.c",
823 "src/f32-vbinary/gen/vadd-avx-x16.c",
824 "src/f32-vbinary/gen/vaddc-avx-x8.c",
825 "src/f32-vbinary/gen/vaddc-avx-x16.c",
826 "src/f32-vbinary/gen/vdiv-avx-x8.c",
827 "src/f32-vbinary/gen/vdiv-avx-x16.c",
828 "src/f32-vbinary/gen/vdivc-avx-x8.c",
829 "src/f32-vbinary/gen/vdivc-avx-x16.c",
830 "src/f32-vbinary/gen/vmax-avx-x8.c",
831 "src/f32-vbinary/gen/vmax-avx-x16.c",
832 "src/f32-vbinary/gen/vmaxc-avx-x8.c",
833 "src/f32-vbinary/gen/vmaxc-avx-x16.c",
834 "src/f32-vbinary/gen/vmin-avx-x8.c",
835 "src/f32-vbinary/gen/vmin-avx-x16.c",
836 "src/f32-vbinary/gen/vminc-avx-x8.c",
837 "src/f32-vbinary/gen/vminc-avx-x16.c",
838 "src/f32-vbinary/gen/vmul-avx-x8.c",
839 "src/f32-vbinary/gen/vmul-avx-x16.c",
840 "src/f32-vbinary/gen/vmulc-avx-x8.c",
841 "src/f32-vbinary/gen/vmulc-avx-x16.c",
842 "src/f32-vbinary/gen/vrdivc-avx-x8.c",
843 "src/f32-vbinary/gen/vrdivc-avx-x16.c",
844 "src/f32-vbinary/gen/vrsubc-avx-x8.c",
845 "src/f32-vbinary/gen/vrsubc-avx-x16.c",
846 "src/f32-vbinary/gen/vsub-avx-x8.c",
847 "src/f32-vbinary/gen/vsub-avx-x16.c",
848 "src/f32-vbinary/gen/vsubc-avx-x8.c",
849 "src/f32-vbinary/gen/vsubc-avx-x16.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700850 "src/f32-vscale/avx-unroll32.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700851]
852
Marat Dukhanfda12b82019-11-21 12:27:59 -0800853FMA3_UKERNELS = [
Marat Dukhan40a672f2019-11-25 03:08:22 -0800854 "src/f32-dwconv/gen/up16x4-fma3-acc2.c",
855 "src/f32-dwconv/gen/up16x4-fma3.c",
856 "src/f32-dwconv/gen/up8x4-fma3-acc2.c",
857 "src/f32-dwconv/gen/up8x4-fma3.c",
858 "src/f32-dwconv/gen/up16x9-fma3-acc2.c",
859 "src/f32-dwconv/gen/up16x9-fma3.c",
860 "src/f32-dwconv/gen/up8x9-fma3-acc2.c",
861 "src/f32-dwconv/gen/up8x9-fma3.c",
862 "src/f32-dwconv/gen/up16x25-fma3-acc2.c",
863 "src/f32-dwconv/gen/up16x25-fma3.c",
864 "src/f32-dwconv/gen/up8x25-fma3-acc2.c",
865 "src/f32-dwconv/gen/up8x25-fma3.c",
866 "src/f32-gemm/gen/1x8-fma3-broadcast.c",
867 "src/f32-gemm/gen/4x8-fma3-broadcast.c",
868 "src/f32-gemm/gen/5x8-fma3-broadcast.c",
869 "src/f32-gemm/gen/6x8-fma3-broadcast.c",
870 "src/f32-gemm/gen/7x8-fma3-broadcast.c",
871 "src/f32-gemm/gen/8x8-fma3-broadcast.c",
Marat Dukhaneccfd712019-12-08 16:49:27 -0800872 "src/f32-gemm/gen/1x16-fma3-broadcast.c",
873 "src/f32-gemm/gen/3x16-fma3-broadcast.c",
874 "src/f32-gemm/gen/4x16-fma3-broadcast.c",
875 "src/f32-gemm/gen/5x16-fma3-broadcast.c",
Ashkan Aliabadid94b8562019-12-10 11:33:51 -0800876 "src/f32-gemm/gen/1x16s4-fma3-broadcast.c",
877 "src/f32-gemm/gen/3x16s4-fma3-broadcast.c",
878 "src/f32-gemm/gen/4x16s4-fma3-broadcast.c",
879 "src/f32-gemm/gen/5x16s4-fma3-broadcast.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800880 "src/f32-gemm/gen-inc/1x8-fma3-broadcast.c",
881 "src/f32-gemm/gen-inc/4x8-fma3-broadcast.c",
882 "src/f32-gemm/gen-inc/5x8-fma3-broadcast.c",
883 "src/f32-gemm/gen-inc/6x8-fma3-broadcast.c",
884 "src/f32-gemm/gen-inc/7x8-fma3-broadcast.c",
885 "src/f32-gemm/gen-inc/8x8-fma3-broadcast.c",
Marat Dukhaneccfd712019-12-08 16:49:27 -0800886 "src/f32-gemm/gen-inc/1x16-fma3-broadcast.c",
887 "src/f32-gemm/gen-inc/3x16-fma3-broadcast.c",
888 "src/f32-gemm/gen-inc/4x16-fma3-broadcast.c",
889 "src/f32-gemm/gen-inc/5x16-fma3-broadcast.c",
Ashkan Aliabadid94b8562019-12-10 11:33:51 -0800890 "src/f32-gemm/gen-inc/1x16s4-fma3-broadcast.c",
891 "src/f32-gemm/gen-inc/3x16s4-fma3-broadcast.c",
892 "src/f32-gemm/gen-inc/4x16s4-fma3-broadcast.c",
893 "src/f32-gemm/gen-inc/5x16s4-fma3-broadcast.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800894 "src/f32-hswish/gen/fma3-x8.c",
895 "src/f32-hswish/gen/fma3-x16.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800896 "src/f32-igemm/gen/1x8-fma3-broadcast.c",
897 "src/f32-igemm/gen/4x8-fma3-broadcast.c",
898 "src/f32-igemm/gen/5x8-fma3-broadcast.c",
899 "src/f32-igemm/gen/6x8-fma3-broadcast.c",
900 "src/f32-igemm/gen/7x8-fma3-broadcast.c",
901 "src/f32-igemm/gen/8x8-fma3-broadcast.c",
Marat Dukhaneccfd712019-12-08 16:49:27 -0800902 "src/f32-igemm/gen/1x16-fma3-broadcast.c",
903 "src/f32-igemm/gen/3x16-fma3-broadcast.c",
904 "src/f32-igemm/gen/4x16-fma3-broadcast.c",
905 "src/f32-igemm/gen/5x16-fma3-broadcast.c",
Ashkan Aliabadid94b8562019-12-10 11:33:51 -0800906 "src/f32-igemm/gen/1x16s4-fma3-broadcast.c",
907 "src/f32-igemm/gen/3x16s4-fma3-broadcast.c",
908 "src/f32-igemm/gen/4x16s4-fma3-broadcast.c",
909 "src/f32-igemm/gen/5x16s4-fma3-broadcast.c",
Marat Dukhanfda12b82019-11-21 12:27:59 -0800910]
911
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700912AVX2_UKERNELS = [
Marat Dukhan4c4eb002019-12-08 21:27:49 -0800913 "src/f32-raddexpminusmax/gen/avx2-p5-x64.c",
914 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc2.c",
915 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc4.c",
916 "src/f32-raddexpminusmax/gen/avx2-p5-x72.c",
917 "src/f32-raddexpminusmax/gen/avx2-p5-x72-acc3.c",
918 "src/f32-raddexpminusmax/gen/avx2-p5-x80.c",
919 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc2.c",
920 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc5.c",
921 "src/f32-raddexpminusmax/gen/avx2-p5-x96.c",
922 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc2.c",
923 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc3.c",
924 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc6.c",
925 "src/f32-raddextexp/gen/avx2-p5-x64.c",
926 "src/f32-raddextexp/gen/avx2-p5-x64-acc2.c",
927 "src/f32-raddextexp/gen/avx2-p5-x64-acc4.c",
928 "src/f32-raddextexp/gen/avx2-p5-x72.c",
929 "src/f32-raddextexp/gen/avx2-p5-x72-acc3.c",
930 "src/f32-raddextexp/gen/avx2-p5-x80.c",
931 "src/f32-raddextexp/gen/avx2-p5-x80-acc2.c",
932 "src/f32-raddextexp/gen/avx2-p5-x80-acc5.c",
933 "src/f32-raddextexp/gen/avx2-p5-x96.c",
934 "src/f32-raddextexp/gen/avx2-p5-x96-acc2.c",
935 "src/f32-raddextexp/gen/avx2-p5-x96-acc3.c",
936 "src/f32-raddextexp/gen/avx2-p5-x96-acc6.c",
937 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64.c",
938 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc2.c",
939 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc4.c",
940 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72.c",
941 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72-acc3.c",
942 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80.c",
943 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc2.c",
944 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc5.c",
945 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96.c",
946 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc2.c",
947 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc3.c",
948 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc6.c",
949 "src/f32-vscaleexpminusmax/gen/avx2-p5-x8.c",
950 "src/f32-vscaleexpminusmax/gen/avx2-p5-x16.c",
951 "src/f32-vscaleexpminusmax/gen/avx2-p5-x24.c",
952 "src/f32-vscaleexpminusmax/gen/avx2-p5-x32.c",
953 "src/f32-vscaleexpminusmax/gen/avx2-p5-x40.c",
954 "src/f32-vscaleexpminusmax/gen/avx2-p5-x48.c",
955 "src/f32-vscaleexpminusmax/gen/avx2-p5-x56.c",
956 "src/f32-vscaleexpminusmax/gen/avx2-p5-x64.c",
957 "src/f32-vscaleexpminusmax/gen/avx2-p5-x72.c",
958 "src/f32-vscaleexpminusmax/gen/avx2-p5-x80.c",
959 "src/f32-vscaleexpminusmax/gen/avx2-p5-x88.c",
960 "src/f32-vscaleexpminusmax/gen/avx2-p5-x96.c",
961 "src/f32-vscaleextexp/gen/avx2-p5-x8.c",
962 "src/f32-vscaleextexp/gen/avx2-p5-x16.c",
963 "src/f32-vscaleextexp/gen/avx2-p5-x24.c",
964 "src/f32-vscaleextexp/gen/avx2-p5-x32.c",
965 "src/f32-vscaleextexp/gen/avx2-p5-x40.c",
966 "src/f32-vscaleextexp/gen/avx2-p5-x48.c",
967 "src/f32-vscaleextexp/gen/avx2-p5-x56.c",
968 "src/f32-vscaleextexp/gen/avx2-p5-x64.c",
969 "src/f32-vscaleextexp/gen/avx2-p5-x72.c",
970 "src/f32-vscaleextexp/gen/avx2-p5-x80.c",
971 "src/f32-vscaleextexp/gen/avx2-p5-x88.c",
972 "src/f32-vscaleextexp/gen/avx2-p5-x96.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700973 "src/math/exp-avx2-p5.c",
974 "src/math/exp-avx2-perm-p3.c",
975 "src/math/exp-avx2-perm-p4.c",
Marat Dukhan515c9772019-10-17 18:07:57 -0700976 "src/math/expminus-avx2-p5.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -0700977 "src/math/extexp-avx2-p5.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700978]
979
Marat Dukhan08c4a432019-10-03 09:29:21 -0700980AVX512F_UKERNELS = [
Marat Dukhane2c3f292019-11-27 15:40:54 -0800981 "src/f32-clamp/avx512f.c",
Marat Dukhan479f87e2019-11-27 15:17:06 -0800982 "src/f32-dwconv/gen/up32x4-avx512f-acc2.c",
983 "src/f32-dwconv/gen/up32x4-avx512f.c",
984 "src/f32-dwconv/gen/up16x4-avx512f-acc2.c",
985 "src/f32-dwconv/gen/up16x4-avx512f.c",
986 "src/f32-dwconv/gen/up32x9-avx512f-acc2.c",
987 "src/f32-dwconv/gen/up32x9-avx512f.c",
988 "src/f32-dwconv/gen/up16x9-avx512f-acc2.c",
989 "src/f32-dwconv/gen/up16x9-avx512f.c",
990 "src/f32-dwconv/gen/up32x25-avx512f-acc2.c",
991 "src/f32-dwconv/gen/up32x25-avx512f.c",
992 "src/f32-dwconv/gen/up16x25-avx512f-acc2.c",
993 "src/f32-dwconv/gen/up16x25-avx512f.c",
Marat Dukhan0f349c42019-11-27 11:58:54 -0800994 "src/f32-gemm/gen/1x16-avx512f-broadcast.c",
995 "src/f32-gemm/gen/4x16-avx512f-broadcast.c",
996 "src/f32-gemm/gen/5x16-avx512f-broadcast.c",
997 "src/f32-gemm/gen/6x16-avx512f-broadcast.c",
998 "src/f32-gemm/gen/7x16-avx512f-broadcast.c",
999 "src/f32-gemm/gen/8x16-avx512f-broadcast.c",
1000 "src/f32-gemm/gen-inc/1x16-avx512f-broadcast.c",
1001 "src/f32-gemm/gen-inc/4x16-avx512f-broadcast.c",
1002 "src/f32-gemm/gen-inc/5x16-avx512f-broadcast.c",
1003 "src/f32-gemm/gen-inc/6x16-avx512f-broadcast.c",
1004 "src/f32-gemm/gen-inc/7x16-avx512f-broadcast.c",
1005 "src/f32-gemm/gen-inc/8x16-avx512f-broadcast.c",
Marat Dukhan662faa02019-12-09 22:48:16 -08001006 "src/f32-hswish/gen/avx512f-x16.c",
1007 "src/f32-hswish/gen/avx512f-x32.c",
Marat Dukhan0f349c42019-11-27 11:58:54 -08001008 "src/f32-igemm/gen/1x16-avx512f-broadcast.c",
1009 "src/f32-igemm/gen/4x16-avx512f-broadcast.c",
1010 "src/f32-igemm/gen/5x16-avx512f-broadcast.c",
1011 "src/f32-igemm/gen/6x16-avx512f-broadcast.c",
1012 "src/f32-igemm/gen/7x16-avx512f-broadcast.c",
1013 "src/f32-igemm/gen/8x16-avx512f-broadcast.c",
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001014 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128.c",
1015 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
1016 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
1017 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144.c",
1018 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
1019 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160.c",
1020 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
1021 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
1022 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192.c",
1023 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
1024 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
1025 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
1026 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128.c",
1027 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc2.c",
1028 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc4.c",
1029 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144.c",
1030 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144-acc3.c",
1031 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160.c",
1032 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc2.c",
1033 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc5.c",
1034 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192.c",
1035 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc2.c",
1036 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc3.c",
1037 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc6.c",
1038 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128.c",
1039 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
1040 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
1041 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144.c",
1042 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
1043 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160.c",
1044 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
1045 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
1046 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192.c",
1047 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
1048 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
1049 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001050 "src/f32-rmax/avx512f.c",
Marat Dukhan9a88efe2019-12-10 15:54:24 -08001051 "src/f32-vbinary/gen/vadd-avx512f-x16.c",
1052 "src/f32-vbinary/gen/vadd-avx512f-x32.c",
1053 "src/f32-vbinary/gen/vaddc-avx512f-x16.c",
1054 "src/f32-vbinary/gen/vaddc-avx512f-x32.c",
1055 "src/f32-vbinary/gen/vdiv-avx512f-x16.c",
1056 "src/f32-vbinary/gen/vdiv-avx512f-x32.c",
1057 "src/f32-vbinary/gen/vdivc-avx512f-x16.c",
1058 "src/f32-vbinary/gen/vdivc-avx512f-x32.c",
1059 "src/f32-vbinary/gen/vmax-avx512f-x16.c",
1060 "src/f32-vbinary/gen/vmax-avx512f-x32.c",
1061 "src/f32-vbinary/gen/vmaxc-avx512f-x16.c",
1062 "src/f32-vbinary/gen/vmaxc-avx512f-x32.c",
1063 "src/f32-vbinary/gen/vmin-avx512f-x16.c",
1064 "src/f32-vbinary/gen/vmin-avx512f-x32.c",
1065 "src/f32-vbinary/gen/vminc-avx512f-x16.c",
1066 "src/f32-vbinary/gen/vminc-avx512f-x32.c",
1067 "src/f32-vbinary/gen/vmul-avx512f-x16.c",
1068 "src/f32-vbinary/gen/vmul-avx512f-x32.c",
1069 "src/f32-vbinary/gen/vmulc-avx512f-x16.c",
1070 "src/f32-vbinary/gen/vmulc-avx512f-x32.c",
1071 "src/f32-vbinary/gen/vrdivc-avx512f-x16.c",
1072 "src/f32-vbinary/gen/vrdivc-avx512f-x32.c",
1073 "src/f32-vbinary/gen/vrsubc-avx512f-x16.c",
1074 "src/f32-vbinary/gen/vrsubc-avx512f-x32.c",
1075 "src/f32-vbinary/gen/vsub-avx512f-x16.c",
1076 "src/f32-vbinary/gen/vsub-avx512f-x32.c",
1077 "src/f32-vbinary/gen/vsubc-avx512f-x16.c",
1078 "src/f32-vbinary/gen/vsubc-avx512f-x32.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001079 "src/f32-vscale/avx512f-unroll64.c",
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001080 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x16.c",
1081 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x32.c",
1082 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x48.c",
1083 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x64.c",
1084 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x80.c",
1085 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x96.c",
1086 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x112.c",
1087 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x128.c",
1088 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x144.c",
1089 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x160.c",
1090 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x176.c",
1091 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x192.c",
1092 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x16.c",
1093 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x32.c",
1094 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x48.c",
1095 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x64.c",
1096 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x80.c",
1097 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x96.c",
1098 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x112.c",
1099 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x128.c",
1100 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x144.c",
1101 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x160.c",
1102 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x176.c",
1103 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x192.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001104 "src/math/exp-avx512f-p5-scalef.c",
1105 "src/math/exp-avx512f-p5.c",
1106 "src/math/exp-avx512f-perm-p3.c",
Marat Dukhanfeb49232019-10-28 11:03:31 -07001107 "src/math/exp-avx512f-perm2-p2.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -07001108 "src/math/extexp-avx512f-p5.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001109]
1110
1111AARCH32_ASM_UKERNELS = [
1112 "src/q8-dwconv/up8x9-aarch32-neon.S",
Frank Barchard13916042019-12-11 10:56:34 -08001113 "src/f32-gemm/4x8-aarch32-neon-cortex-a53.S",
Frank Barchardabf81542019-12-13 16:18:30 -08001114 "src/f32-gemm/gen/4x8-aarch32-neon-cortex-a75.S",
1115 "src/f32-gemm/gen/4x8-aarch32-neon-pld-cortex-a75.S",
Frank Barchard8b0f0262019-11-27 23:18:40 -08001116 "src/f32-gemm/4x8-aarch32-neon-ld64.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001117]
1118
1119AARCH64_ASM_UKERNELS = [
1120 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
1121 "src/f32-dwconv/up4x9-aarch64-neonfma.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001122 "src/f32-gemm/gen/1x12-aarch64-neonfma-cortex-a53.S",
1123 "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a53.S",
1124 "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a57.S",
1125 "src/f32-gemm/gen/1x8-aarch64-neonfma-cortex-a75.S",
1126 "src/f32-gemm/gen/4x12-aarch64-neonfma-cortex-a53.S",
1127 "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a53.S",
1128 "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a57.S",
1129 "src/f32-gemm/gen/4x8-aarch64-neonfma-cortex-a75.S",
1130 "src/f32-gemm/gen/4x8-aarch64-neonfma-ld128.S",
1131 "src/f32-gemm/gen/4x8-aarch64-neonfma-ld64.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001132 "src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a57.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001133 "src/f32-gemm/gen/5x8-aarch64-neonfma-cortex-a75.S",
1134 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001135 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a73.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001136 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a57.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001137 "src/f32-gemm/gen/6x8-aarch64-neonfma-cortex-a75.S",
1138 "src/f32-gemm/gen/6x8-aarch64-neonfma-ld128.S",
1139 "src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S",
1140 "src/f32-gemm/gen-inc/1x12-aarch64-neonfma-cortex-a53.S",
1141 "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a53.S",
1142 "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a57.S",
1143 "src/f32-gemm/gen-inc/1x8-aarch64-neonfma-cortex-a75.S",
1144 "src/f32-gemm/gen-inc/4x12-aarch64-neonfma-cortex-a53.S",
1145 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a53.S",
1146 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a57.S",
1147 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-cortex-a75.S",
1148 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld128.S",
1149 "src/f32-gemm/gen-inc/4x8-aarch64-neonfma-ld64.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001150 "src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a57.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001151 "src/f32-gemm/gen-inc/5x8-aarch64-neonfma-cortex-a75.S",
1152 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001153 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a73.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001154 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a57.S",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001155 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-cortex-a75.S",
1156 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld128.S",
1157 "src/f32-gemm/gen-inc/6x8-aarch64-neonfma-ld64.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001158 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -07001159 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001160 "src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a57.S",
1161 "src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a75.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001162 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -07001163 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001164 "src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a57.S",
1165 "src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a75.S",
1166 "src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a57.S",
1167 "src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -07001168 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001169 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
Frank Barchard387c2d12019-12-16 19:14:07 -08001170 "src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a57.S",
1171 "src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a75.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001172]
1173
1174INTERNAL_MICROKERNEL_HDRS = [
1175 "src/xnnpack/argmaxpool.h",
1176 "src/xnnpack/avgpool.h",
Marat Dukhan35dacfb2019-11-07 19:18:16 -08001177 "src/xnnpack/bilinear.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001178 "src/xnnpack/clamp.h",
1179 "src/xnnpack/common.h",
1180 "src/xnnpack/conv.h",
1181 "src/xnnpack/dwconv.h",
1182 "src/xnnpack/gavgpool.h",
1183 "src/xnnpack/gemm.h",
1184 "src/xnnpack/hswish.h",
1185 "src/xnnpack/igemm.h",
Marat Dukhancfb31342019-12-05 10:42:57 -08001186 "src/xnnpack/intrinsics-polyfill.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001187 "src/xnnpack/lut.h",
1188 "src/xnnpack/math.h",
1189 "src/xnnpack/maxpool.h",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001190 "src/xnnpack/memory.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001191 "src/xnnpack/packx.h",
1192 "src/xnnpack/pad.h",
1193 "src/xnnpack/params.h",
1194 "src/xnnpack/pavgpool.h",
1195 "src/xnnpack/ppmm.h",
1196 "src/xnnpack/prelu.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001197 "src/xnnpack/raddexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001198 "src/xnnpack/raddextexp.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001199 "src/xnnpack/raddstoreexpminusmax.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001200 "src/xnnpack/rmax.h",
1201 "src/xnnpack/scalar-utils.h",
1202 "src/xnnpack/spmm.h",
1203 "src/xnnpack/unpool.h",
1204 "src/xnnpack/vadd.h",
Marat Dukhan1e782c42019-11-21 17:02:40 -08001205 "src/xnnpack/vbinary.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001206 "src/xnnpack/vmulcaddc.h",
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001207 "src/xnnpack/vscale.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001208 "src/xnnpack/vscaleexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001209 "src/xnnpack/vscaleextexp.h",
Marat Dukhan1e782c42019-11-21 17:02:40 -08001210 "src/xnnpack/vunary.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001211 "src/xnnpack/zip.h",
1212]
1213
1214INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
1215 "include/xnnpack.h",
1216 "src/xnnpack/allocator.h",
1217 "src/xnnpack/compute.h",
1218 "src/xnnpack/im2col.h",
1219 "src/xnnpack/indirection.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001220 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001221 "src/xnnpack/operator.h",
1222 "src/xnnpack/pack.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -07001223 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001224 "src/xnnpack/requantization-stubs.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001225 "src/xnnpack/requantization.h",
1226]
1227
1228ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
1229 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001230]
1231
1232MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -07001233 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001234 "include/xnnpack.h",
1235]
1236
1237MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
1238 "src/xnnpack/isa-checks.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -07001239 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001240 "src/xnnpack/requantization.h",
1241 "include/xnnpack.h",
1242]
1243
1244OPERATOR_TEST_PARAMS_HDRS = [
1245 "src/xnnpack/params.h",
1246 "src/xnnpack/common.h",
1247]
1248
1249WEIGHTS_PACK_HDRS = [
1250 "src/xnnpack/pack.h",
1251 "src/xnnpack/operator.h",
1252 "src/xnnpack/compute.h",
1253]
1254
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07001255LOGGING_COPTS = select({
1256 # No logging in optimized mode
1257 ":optimized_build": ["-DXNN_LOG_LEVEL=0"],
1258 # Full logging in debug mode
1259 ":debug_build": ["-DXNN_LOG_LEVEL=5"],
1260 # Error-only logging in default (fastbuild) mode
1261 "//conditions:default": ["-DXNN_LOG_LEVEL=2"],
1262})
1263
1264LOGGING_HDRS = [
1265 "src/xnnpack/log.h",
1266]
1267
Marat Dukhan08c4a432019-10-03 09:29:21 -07001268xnnpack_cc_library(
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001269 name = "tables",
1270 srcs = TABLE_SRCS,
1271 hdrs = INTERNAL_HDRS,
1272 copts = xnnpack_std_copts(),
1273)
1274
1275xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001276 name = "scalar_ukernels",
1277 srcs = SCALAR_UKERNELS,
1278 hdrs = INTERNAL_HDRS,
1279 aarch32_copts = ["-marm"],
1280 copts = xnnpack_std_copts(),
1281 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001282 ":tables",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001283 "@FP16",
1284 "@FXdiv",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001285 "@pthreadpool",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001286 ],
1287)
1288
1289xnnpack_cc_library(
Marat Dukhan436ebe62019-12-04 15:10:12 -08001290 name = "wasm_ukernels",
1291 hdrs = INTERNAL_HDRS,
1292 copts = xnnpack_std_copts(),
1293 wasm_srcs = WASM_UKERNELS,
1294 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001295 ":tables",
Marat Dukhan436ebe62019-12-04 15:10:12 -08001296 "@FP16",
1297 "@FXdiv",
1298 "@pthreadpool",
1299 ],
1300)
1301
1302xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001303 name = "psimd_ukernels",
1304 srcs = PSIMD_UKERNELS,
1305 hdrs = INTERNAL_HDRS,
1306 aarch32_copts = [
1307 "-marm",
1308 "-mfpu=neon",
1309 ],
1310 copts = xnnpack_std_copts(),
1311 optimized_copts = [
1312 "-O3",
1313 "-ffast-math",
1314 ],
1315 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001316 ":tables",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001317 "@FP16",
1318 "@psimd",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001319 "@pthreadpool",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001320 ],
1321)
1322
1323xnnpack_cc_library(
1324 name = "neon_ukernels",
1325 hdrs = INTERNAL_HDRS,
1326 aarch32_copts = [
1327 "-marm",
1328 "-mfpu=neon",
1329 ],
1330 aarch32_srcs = NEON_UKERNELS,
1331 aarch64_srcs = NEON_UKERNELS,
1332 copts = xnnpack_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -08001333 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001334 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001335 "@FP16",
1336 "@pthreadpool",
1337 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001338)
1339
1340xnnpack_cc_library(
1341 name = "neonfma_ukernels",
1342 hdrs = INTERNAL_HDRS,
1343 aarch32_copts = [
1344 "-marm",
1345 "-mfpu=neon-vfpv4",
1346 ],
1347 aarch32_srcs = NEONFMA_UKERNELS,
1348 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
1349 copts = xnnpack_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -08001350 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001351 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001352 "@FP16",
1353 "@pthreadpool",
1354 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001355)
1356
1357xnnpack_cc_library(
1358 name = "neonfp16arith_ukernels",
1359 hdrs = INTERNAL_HDRS,
1360 aarch64_copts = ["-march=armv8.2-a+fp16"],
1361 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
1362 copts = xnnpack_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -08001363 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001364 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001365 "@FP16",
1366 "@pthreadpool",
1367 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001368)
1369
1370xnnpack_cc_library(
1371 name = "sse2_ukernels",
1372 hdrs = INTERNAL_HDRS,
1373 copts = xnnpack_std_copts(),
1374 x86_copts = ["-msse2"],
1375 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08001376 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001377 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001378 "@FP16",
1379 "@pthreadpool",
1380 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001381)
1382
1383xnnpack_cc_library(
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001384 name = "sse41_ukernels",
1385 hdrs = INTERNAL_HDRS,
1386 copts = xnnpack_std_copts(),
1387 x86_copts = ["-msse4.1"],
1388 x86_srcs = SSE41_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08001389 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001390 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001391 "@FP16",
1392 "@pthreadpool",
1393 ],
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001394)
1395
1396xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001397 name = "avx_ukernels",
1398 hdrs = INTERNAL_HDRS,
1399 copts = xnnpack_std_copts(),
1400 x86_copts = ["-mavx"],
1401 x86_srcs = AVX_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08001402 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001403 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001404 "@FP16",
1405 "@pthreadpool",
1406 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001407)
1408
1409xnnpack_cc_library(
Marat Dukhanfda12b82019-11-21 12:27:59 -08001410 name = "fma3_ukernels",
1411 hdrs = INTERNAL_HDRS,
1412 copts = xnnpack_std_copts(),
1413 x86_copts = [
1414 "-mfma",
1415 ],
1416 x86_srcs = FMA3_UKERNELS,
1417 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001418 ":tables",
Marat Dukhanfda12b82019-11-21 12:27:59 -08001419 "@FP16",
1420 "@pthreadpool",
1421 ],
1422)
1423
1424xnnpack_cc_library(
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001425 name = "avx2_ukernels",
1426 hdrs = INTERNAL_HDRS,
1427 copts = xnnpack_std_copts(),
1428 x86_copts = [
1429 "-mfma",
1430 "-mavx2",
1431 ],
1432 x86_srcs = AVX2_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08001433 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001434 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001435 "@FP16",
1436 "@pthreadpool",
1437 ],
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001438)
1439
1440xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001441 name = "avx512f_ukernels",
1442 hdrs = INTERNAL_HDRS,
1443 copts = xnnpack_std_copts(),
1444 x86_copts = ["-mavx512f"],
1445 x86_srcs = AVX512F_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08001446 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08001447 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001448 "@FP16",
1449 "@pthreadpool",
1450 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001451)
1452
1453xnnpack_cc_library(
1454 name = "asm_ukernels",
1455 hdrs = ["src/xnnpack/assembly.h"],
1456 aarch32_srcs = AARCH32_ASM_UKERNELS,
1457 aarch64_srcs = AARCH64_ASM_UKERNELS,
1458)
1459
1460xnnpack_aggregate_library(
1461 name = "ukernels",
1462 aarch32_deps = [
1463 ":psimd_ukernels",
1464 ":neon_ukernels",
1465 ":neonfma_ukernels",
1466 ":asm_ukernels",
1467 ],
1468 aarch64_deps = [
1469 ":psimd_ukernels",
1470 ":neon_ukernels",
1471 ":neonfma_ukernels",
1472 ":neonfp16arith_ukernels",
1473 ":asm_ukernels",
1474 ],
1475 generic_deps = [":scalar_ukernels"],
Marat Dukhan436ebe62019-12-04 15:10:12 -08001476 wasm_deps = [
1477 ":wasm_ukernels",
1478 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001479 wasmsimd_deps = [
Marat Dukhan436ebe62019-12-04 15:10:12 -08001480 ":wasm_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001481 ":psimd_ukernels",
1482 ],
1483 x86_deps = [
1484 ":psimd_ukernels",
1485 ":sse2_ukernels",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001486 ":sse41_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001487 ":avx_ukernels",
Marat Dukhanfda12b82019-11-21 12:27:59 -08001488 ":fma3_ukernels",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001489 ":avx2_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001490 ":avx512f_ukernels",
1491 ],
1492)
1493
1494xnnpack_cc_library(
1495 name = "im2col",
1496 srcs = ["src/im2col.c"],
1497 hdrs = [
1498 "src/xnnpack/common.h",
1499 "src/xnnpack/im2col.h",
1500 ],
1501 copts = xnnpack_std_copts(),
1502)
1503
1504xnnpack_cc_library(
1505 name = "indirection",
1506 srcs = ["src/indirection.c"],
1507 hdrs = INTERNAL_HDRS,
1508 copts = xnnpack_std_copts(),
1509 deps = [
1510 "@FP16",
1511 "@FXdiv",
1512 "@pthreadpool",
1513 ],
1514)
1515
1516xnnpack_cc_library(
1517 name = "operator_run",
1518 srcs = ["src/operator-run.c"],
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07001519 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
1520 copts = xnnpack_std_copts() + LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001521 # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
1522 "-Wno-vla",
1523 ],
1524 deps = [
1525 "@FP16",
1526 "@FXdiv",
1527 "@clog",
1528 "@pthreadpool",
1529 ],
1530)
1531
1532cc_library(
1533 name = "enable_assembly",
1534 defines = select({
1535 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
1536 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
Frank Barchard810171d2019-10-10 10:34:51 -07001537 "//conditions:default": ["XNN_ENABLE_ASSEMBLY=1"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001538 }),
1539)
1540
Marat Dukhancf056b22019-10-07 10:26:29 -07001541xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001542 name = "operators",
1543 srcs = OPERATOR_SRCS + [
Marat Dukhan04f03be2019-11-19 12:36:47 -08001544 "src/memory.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001545 "src/operator-delete.c",
Marat Dukhancf056b22019-10-07 10:26:29 -07001546 ],
1547 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07001548 copts = xnnpack_std_copts() + LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001549 "-Isrc",
1550 "-Iinclude",
1551 ] + select({
1552 ":debug_build": [],
1553 "//conditions:default": xnnpack_min_size_copts(),
1554 }),
Marat Dukhancf056b22019-10-07 10:26:29 -07001555 wasm_srcs = ["src/wasm-stubs.c"],
1556 wasmsimd_srcs = ["src/wasm-stubs.c"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001557 deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001558 ":indirection",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001559 "@FP16",
1560 "@FXdiv",
1561 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001562 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001563 ],
1564)
1565
1566cc_library(
1567 name = "XNNPACK",
1568 srcs = [
1569 "src/init.c",
1570 ],
1571 copts = xnnpack_std_copts() + LOGGING_COPTS + [
1572 "-Isrc",
1573 "-Iinclude",
1574 ] + select({
1575 ":debug_build": [],
1576 "//conditions:default": xnnpack_min_size_copts(),
1577 }),
1578 includes = ["include"],
1579 linkstatic = True,
1580 textual_hdrs = ["include/xnnpack.h"],
1581 visibility = xnnpack_visibility(),
1582 deps = [
1583 ":enable_assembly",
1584 ":ukernels",
1585 ":operator_run",
1586 ":operators",
1587 "@clog",
1588 "@pthreadpool",
Marat Dukhand343c222019-10-07 09:22:14 -07001589 ] + select({
1590 ":emscripten": [],
1591 "//conditions:default": ["@cpuinfo"],
1592 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001593)
1594
1595cc_library(
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001596 name = "xnnpack_operators_nhwc_f32",
1597 srcs = [
1598 "src/init.c",
1599 ],
1600 copts = xnnpack_std_copts() + LOGGING_COPTS + [
1601 "-Isrc",
1602 "-Iinclude",
1603 ] + select({
1604 ":debug_build": [],
1605 "//conditions:default": xnnpack_min_size_copts(),
1606 }),
1607 defines = [
1608 "XNN_NO_Q8_OPERATORS",
1609 "XNN_NO_U8_OPERATORS",
1610 "XNN_NO_X8_OPERATORS",
Marat Dukhanefc47b82019-11-18 09:25:38 -08001611 "XNN_NO_NCHW_OPERATORS",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001612 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001613 includes = ["include"],
1614 linkstatic = True,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001615 textual_hdrs = ["include/xnnpack.h"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001616 visibility = xnnpack_visibility(),
1617 deps = [
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001618 ":enable_assembly",
1619 ":ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001620 ":operator_run",
1621 ":operators",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001622 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001623 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001624 ] + select({
1625 ":emscripten": [],
1626 "//conditions:default": ["@cpuinfo"],
1627 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001628)
1629
Marat Dukhancf056b22019-10-07 10:26:29 -07001630xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001631 name = "bench_utils",
1632 srcs = ["bench/utils.cc"],
1633 hdrs = ["bench/utils.h"],
1634 copts = ["-Wno-unused-result"],
Marat Dukhanbad48fe2019-11-04 10:35:22 -08001635 deps = [
1636 "@com_google_benchmark//:benchmark",
1637 "@cpuinfo",
1638 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001639)
1640
Frank Barchard7e955972019-10-11 10:34:25 -07001641######################### Benchmarks for micro-kernels #########################
Marat Dukhan08c4a432019-10-03 09:29:21 -07001642
1643xnnpack_benchmark(
1644 name = "q8_gemm_bench",
1645 srcs = [
1646 "bench/gemm.h",
1647 "bench/q8-gemm.cc",
1648 "src/xnnpack/AlignedAllocator.h",
1649 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1650 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
1651 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
1652)
1653
1654xnnpack_benchmark(
1655 name = "f16_gemm_bench",
1656 srcs = [
1657 "bench/f16-gemm.cc",
1658 "bench/gemm.h",
1659 "src/xnnpack/AlignedAllocator.h",
1660 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1661 copts = ["-Wno-unused-function"],
1662 deps = MICROKERNEL_BENCHMARK_DEPS,
1663)
1664
1665xnnpack_benchmark(
1666 name = "f32_igemm_bench",
1667 srcs = [
1668 "bench/f32-igemm.cc",
1669 "bench/conv.h",
1670 "src/xnnpack/AlignedAllocator.h",
1671 ] + MICROKERNEL_BENCHMARK_HDRS,
Frank Barchard7e955972019-10-11 10:34:25 -07001672 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001673)
1674
1675xnnpack_benchmark(
1676 name = "f32_conv_hwc_bench",
1677 srcs = [
1678 "bench/f32-conv-hwc.cc",
1679 "bench/dconv.h",
1680 "src/xnnpack/AlignedAllocator.h",
1681 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1682 copts = ["-Wno-unused-function"],
1683 deps = MICROKERNEL_BENCHMARK_DEPS,
1684)
1685
1686xnnpack_benchmark(
Erich Elsen563df5f2019-10-23 08:02:21 -07001687 name = "f32_conv_hwc2spchw_bench",
1688 srcs = [
1689 "bench/f32-conv-hwc2spchw.cc",
1690 "bench/dconv.h",
1691 "src/xnnpack/AlignedAllocator.h",
1692 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1693 copts = ["-Wno-unused-function"],
1694 deps = MICROKERNEL_BENCHMARK_DEPS,
1695)
1696
1697xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001698 name = "f32_dwconv_bench",
1699 srcs = [
1700 "bench/f32-dwconv.cc",
1701 "bench/dwconv.h",
1702 "src/xnnpack/AlignedAllocator.h",
1703 ] + MICROKERNEL_BENCHMARK_HDRS,
1704 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
1705)
1706
1707xnnpack_benchmark(
1708 name = "f32_dwconv_spchw_bench",
1709 srcs = [
1710 "bench/f32-dwconv-spchw.cc",
1711 "bench/dwconv.h",
1712 "src/xnnpack/AlignedAllocator.h",
1713 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1714 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
1715)
1716
1717xnnpack_benchmark(
1718 name = "f32_gemm_bench",
1719 srcs = [
1720 "bench/f32-gemm.cc",
1721 "bench/gemm.h",
1722 "src/xnnpack/AlignedAllocator.h",
1723 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1724 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
Frank Barchard7e955972019-10-11 10:34:25 -07001725 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001726)
1727
1728xnnpack_benchmark(
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001729 name = "f32_raddexpminusmax_bench",
1730 srcs = [
1731 "bench/f32-raddexpminusmax.cc",
1732 "src/xnnpack/AlignedAllocator.h",
1733 ] + MICROKERNEL_BENCHMARK_HDRS,
1734 deps = MICROKERNEL_BENCHMARK_DEPS,
1735)
1736
1737xnnpack_benchmark(
1738 name = "f32_raddextexp_bench",
1739 srcs = [
1740 "bench/f32-raddextexp.cc",
1741 "src/xnnpack/AlignedAllocator.h",
1742 ] + MICROKERNEL_BENCHMARK_HDRS,
1743 deps = MICROKERNEL_BENCHMARK_DEPS,
1744)
1745
1746xnnpack_benchmark(
1747 name = "f32_raddstoreexpminusmax_bench",
1748 srcs = [
1749 "bench/f32-raddstoreexpminusmax.cc",
1750 "src/xnnpack/AlignedAllocator.h",
1751 ] + MICROKERNEL_BENCHMARK_HDRS,
1752 deps = MICROKERNEL_BENCHMARK_DEPS,
1753)
1754
1755xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001756 name = "f32_rmax_bench",
1757 srcs = [
1758 "bench/f32-rmax.cc",
1759 "src/xnnpack/AlignedAllocator.h",
1760 ] + MICROKERNEL_BENCHMARK_HDRS,
1761 deps = MICROKERNEL_BENCHMARK_DEPS,
1762)
1763
1764xnnpack_benchmark(
Marat Dukhan14bec502019-11-18 11:35:31 -08001765 name = "f32_sigmoid_bench",
1766 srcs = [
1767 "bench/f32-sigmoid.cc",
1768 "src/xnnpack/AlignedAllocator.h",
1769 ] + MICROKERNEL_BENCHMARK_HDRS,
1770 copts = ["-Wno-unused-function"],
1771 deps = MICROKERNEL_BENCHMARK_DEPS,
1772)
1773
1774xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001775 name = "f32_spmm_bench",
1776 srcs = [
1777 "bench/f32-spmm.cc",
1778 "bench/gemm.h",
1779 "src/xnnpack/AlignedAllocator.h",
1780 ] + MICROKERNEL_BENCHMARK_HDRS,
1781 copts = ["-Wno-unused-function"],
1782 deps = MICROKERNEL_BENCHMARK_DEPS,
1783)
1784
1785xnnpack_benchmark(
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -07001786 name = "f32_softargmax_bench",
1787 srcs = [
1788 "bench/f32-softargmax.cc",
1789 ] + MICROKERNEL_BENCHMARK_HDRS,
1790 copts = ["-Wno-unused-function"],
1791 deps = MICROKERNEL_BENCHMARK_DEPS,
1792)
1793
1794xnnpack_benchmark(
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001795 name = "f32_vscaleexpminusmax_bench",
1796 srcs = [
1797 "bench/f32-vscaleexpminusmax.cc",
1798 "src/xnnpack/AlignedAllocator.h",
1799 ] + MICROKERNEL_BENCHMARK_HDRS,
1800 deps = MICROKERNEL_BENCHMARK_DEPS,
1801)
1802
1803xnnpack_benchmark(
1804 name = "f32_vscaleextexp_bench",
1805 srcs = [
1806 "bench/f32-vscaleextexp.cc",
1807 "src/xnnpack/AlignedAllocator.h",
1808 ] + MICROKERNEL_BENCHMARK_HDRS,
1809 deps = MICROKERNEL_BENCHMARK_DEPS,
1810)
1811
1812xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001813 name = "f32_im2col_gemm_bench",
1814 srcs = [
1815 "bench/f32-im2col-gemm.cc",
1816 "bench/conv.h",
1817 "src/xnnpack/AlignedAllocator.h",
1818 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1819 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
1820)
1821
1822########################### Benchmarks for operators ###########################
1823
1824xnnpack_benchmark(
1825 name = "add_bench",
1826 srcs = ["bench/add.cc"],
1827 deps = OPERATOR_BENCHMARK_DEPS,
1828)
1829
1830xnnpack_benchmark(
1831 name = "average_pooling_bench",
1832 srcs = ["bench/average-pooling.cc"],
1833 deps = OPERATOR_BENCHMARK_DEPS,
1834)
1835
1836xnnpack_benchmark(
1837 name = "channel_shuffle_bench",
1838 srcs = ["bench/channel-shuffle.cc"],
1839 deps = OPERATOR_BENCHMARK_DEPS,
1840)
1841
1842xnnpack_benchmark(
1843 name = "convolution_bench",
1844 srcs = ["bench/convolution.cc"],
1845 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
1846 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
1847)
1848
1849xnnpack_benchmark(
1850 name = "deconvolution_bench",
1851 srcs = ["bench/deconvolution.cc"],
1852 copts = xnnpack_optional_tflite_copts(),
1853 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
1854)
1855
1856xnnpack_benchmark(
1857 name = "global_average_pooling_bench",
1858 srcs = ["bench/global-average-pooling.cc"],
1859 deps = OPERATOR_BENCHMARK_DEPS,
1860)
1861
1862xnnpack_benchmark(
1863 name = "max_pooling_bench",
1864 srcs = ["bench/max-pooling.cc"],
1865 deps = OPERATOR_BENCHMARK_DEPS,
1866)
1867
1868xnnpack_benchmark(
1869 name = "sigmoid_bench",
1870 srcs = ["bench/sigmoid.cc"],
Marat Dukhanc3b9e862019-11-17 13:18:54 -08001871 copts = xnnpack_optional_tflite_copts(),
1872 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001873)
1874
1875xnnpack_benchmark(
Marat Dukhan95b22432019-10-30 16:30:14 -07001876 name = "prelu_bench",
1877 srcs = ["bench/prelu.cc"],
1878 copts = xnnpack_optional_tflite_copts(),
1879 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
1880)
1881
1882xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001883 name = "softargmax_bench",
1884 srcs = ["bench/softargmax.cc"],
1885 deps = OPERATOR_BENCHMARK_DEPS,
1886)
1887
Marat Dukhanc068bb62019-10-04 13:24:39 -07001888############################# End-to-end benchmarks ############################
1889
1890cc_library(
1891 name = "mobilenet_v1",
1892 srcs = ["models/mobilenet-v1.cc"],
1893 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08001894 copts = xnnpack_std_cxxopts(),
Marat Dukhanc068bb62019-10-04 13:24:39 -07001895 linkstatic = True,
1896 deps = [
1897 ":XNNPACK",
1898 "@pthreadpool",
1899 ],
1900)
1901
1902cc_library(
1903 name = "mobilenet_v2",
1904 srcs = ["models/mobilenet-v2.cc"],
1905 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08001906 copts = xnnpack_std_cxxopts(),
Marat Dukhanc068bb62019-10-04 13:24:39 -07001907 linkstatic = True,
1908 deps = [
1909 ":XNNPACK",
1910 "@pthreadpool",
1911 ],
1912)
1913
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001914cc_library(
1915 name = "mobilenet_v3_large",
1916 srcs = ["models/mobilenet-v3-large.cc"],
1917 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08001918 copts = xnnpack_std_cxxopts(),
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001919 linkstatic = True,
1920 deps = [
1921 ":XNNPACK",
1922 "@pthreadpool",
1923 ],
1924)
1925
1926cc_library(
1927 name = "mobilenet_v3_small",
1928 srcs = ["models/mobilenet-v3-small.cc"],
1929 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08001930 copts = xnnpack_std_cxxopts(),
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001931 linkstatic = True,
1932 deps = [
1933 ":XNNPACK",
1934 "@pthreadpool",
1935 ],
1936)
1937
Marat Dukhanc068bb62019-10-04 13:24:39 -07001938xnnpack_benchmark(
Marat Dukhanef4416e2019-10-31 13:44:40 -07001939 name = "f32_dwconv_e2e_bench",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001940 srcs = [
1941 "bench/f32-dwconv-e2e.cc",
1942 "bench/end2end.h",
1943 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhanef4416e2019-10-31 13:44:40 -07001944 copts = ["-Wno-unused-function"],
1945 deps = MICROKERNEL_BENCHMARK_DEPS + [
1946 ":XNNPACK",
1947 ":mobilenet_v1",
1948 ":mobilenet_v2",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001949 ":mobilenet_v3_large",
1950 ":mobilenet_v3_small",
Marat Dukhanef4416e2019-10-31 13:44:40 -07001951 ],
1952)
1953
1954xnnpack_benchmark(
Marat Dukhan5f18d262019-10-31 10:24:14 -07001955 name = "f32_gemm_e2e_bench",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001956 srcs = [
1957 "bench/f32-gemm-e2e.cc",
1958 "bench/end2end.h",
1959 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan5f18d262019-10-31 10:24:14 -07001960 copts = ["-Wno-unused-function"],
1961 deps = MICROKERNEL_BENCHMARK_DEPS + [
1962 ":XNNPACK",
1963 ":mobilenet_v1",
1964 ":mobilenet_v2",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001965 ":mobilenet_v3_large",
1966 ":mobilenet_v3_small",
Marat Dukhan5f18d262019-10-31 10:24:14 -07001967 ],
1968)
1969
1970xnnpack_benchmark(
Marat Dukhanc068bb62019-10-04 13:24:39 -07001971 name = "end2end_bench",
1972 srcs = ["bench/end2end.cc"],
1973 deps = [
1974 ":XNNPACK",
Frank Barchardc712fa42019-10-31 14:00:21 -07001975 ":bench_utils",
Marat Dukhanc068bb62019-10-04 13:24:39 -07001976 ":mobilenet_v1",
1977 ":mobilenet_v2",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001978 ":mobilenet_v3_large",
1979 ":mobilenet_v3_small",
Marat Dukhanc068bb62019-10-04 13:24:39 -07001980 "@pthreadpool",
1981 ],
1982)
1983
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001984#################### Accuracy evaluation for math functions ####################
1985
1986xnnpack_benchmark(
1987 name = "f32_exp_eval",
1988 srcs = [
1989 "eval/f32-exp.cc",
1990 "src/xnnpack/AlignedAllocator.h",
1991 ] + ACCURACY_EVAL_HDRS,
1992 deps = ACCURACY_EVAL_DEPS,
1993)
1994
Marat Dukhan515c9772019-10-17 18:07:57 -07001995xnnpack_benchmark(
1996 name = "f32_expminus_eval",
1997 srcs = [
1998 "eval/f32-expminus.cc",
1999 "src/xnnpack/AlignedAllocator.h",
2000 ] + ACCURACY_EVAL_HDRS,
2001 deps = ACCURACY_EVAL_DEPS,
2002)
2003
Marat Dukhan98ba4412019-10-23 02:14:28 -07002004xnnpack_benchmark(
2005 name = "f32_extexp_eval",
2006 srcs = [
2007 "eval/f32-extexp.cc",
2008 "src/xnnpack/AlignedAllocator.h",
2009 ] + ACCURACY_EVAL_HDRS,
2010 deps = ACCURACY_EVAL_DEPS,
2011)
2012
Marat Dukhan346a9e52019-11-15 09:06:30 -08002013xnnpack_benchmark(
2014 name = "f32_sigmoid_eval",
2015 srcs = [
2016 "eval/f32-sigmoid.cc",
2017 "src/xnnpack/AlignedAllocator.h",
2018 ] + ACCURACY_EVAL_HDRS,
2019 deps = ACCURACY_EVAL_DEPS,
2020)
2021
Marat Dukhan08c4a432019-10-03 09:29:21 -07002022######################### Unit tests for micro-kernels #########################
2023
2024xnnpack_unit_test(
2025 name = "f16_gemm_test",
2026 srcs = [
2027 "test/f16-gemm.cc",
2028 "test/gemm-microkernel-tester.h",
2029 "src/xnnpack/AlignedAllocator.h",
2030 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2031 deps = MICROKERNEL_TEST_DEPS,
2032)
2033
2034xnnpack_unit_test(
2035 name = "f32_argmaxpool_test",
2036 srcs = [
2037 "test/f32-argmaxpool.cc",
2038 "test/argmaxpool-microkernel-tester.h",
2039 "src/xnnpack/AlignedAllocator.h",
2040 ] + MICROKERNEL_TEST_HDRS,
2041 deps = MICROKERNEL_TEST_DEPS,
2042)
2043
2044xnnpack_unit_test(
2045 name = "f32_avgpool_test",
2046 srcs = [
2047 "test/f32-avgpool.cc",
2048 "test/avgpool-microkernel-tester.h",
2049 "src/xnnpack/AlignedAllocator.h",
2050 ] + MICROKERNEL_TEST_HDRS,
2051 deps = MICROKERNEL_TEST_DEPS,
2052)
2053
2054xnnpack_unit_test(
Marat Dukhan35dacfb2019-11-07 19:18:16 -08002055 name = "f32_bilinear_test",
2056 srcs = [
2057 "test/f32-bilinear.cc",
2058 "test/bilinear-microkernel-tester.h",
2059 "src/xnnpack/AlignedAllocator.h",
2060 ] + MICROKERNEL_TEST_HDRS,
2061 deps = MICROKERNEL_TEST_DEPS,
2062)
2063
2064xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002065 name = "f32_clamp_test",
2066 srcs = [
2067 "test/f32-clamp.cc",
2068 "test/clamp-microkernel-tester.h",
2069 ] + MICROKERNEL_TEST_HDRS,
2070 deps = MICROKERNEL_TEST_DEPS,
2071)
2072
2073xnnpack_unit_test(
2074 name = "f32_igemm_test",
2075 srcs = [
2076 "test/f32-igemm.cc",
2077 "test/gemm-microkernel-tester.h",
2078 "src/xnnpack/AlignedAllocator.h",
2079 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2080 deps = MICROKERNEL_TEST_DEPS,
2081)
2082
2083xnnpack_unit_test(
2084 name = "f32_conv_hwc_test",
2085 srcs = [
2086 "test/f32-conv-hwc.cc",
2087 "test/conv-hwc-microkernel-tester.h",
2088 "src/xnnpack/AlignedAllocator.h",
2089 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2090 deps = MICROKERNEL_TEST_DEPS,
2091)
2092
2093xnnpack_unit_test(
2094 name = "f32_conv_hwc2spchw_test",
2095 srcs = [
2096 "test/f32-conv-hwc2spchw.cc",
2097 "test/conv-hwc2spchw-microkernel-tester.h",
2098 "src/xnnpack/AlignedAllocator.h",
2099 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2100 deps = MICROKERNEL_TEST_DEPS,
2101)
2102
2103xnnpack_unit_test(
2104 name = "f32_dwconv_test",
2105 srcs = [
2106 "test/f32-dwconv.cc",
2107 "test/dwconv-microkernel-tester.h",
2108 "src/xnnpack/AlignedAllocator.h",
2109 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2110 deps = MICROKERNEL_TEST_DEPS,
2111)
2112
2113xnnpack_unit_test(
2114 name = "f32_dwconv_spchw_test",
2115 srcs = [
2116 "test/f32-dwconv-spchw.cc",
2117 "test/dwconv-spchw-microkernel-tester.h",
2118 "src/xnnpack/AlignedAllocator.h",
2119 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2120 deps = MICROKERNEL_TEST_DEPS,
2121)
2122
2123xnnpack_unit_test(
2124 name = "f32_gavgpool_test",
2125 srcs = [
2126 "test/f32-gavgpool.cc",
2127 "test/gavgpool-microkernel-tester.h",
2128 "src/xnnpack/AlignedAllocator.h",
2129 ] + MICROKERNEL_TEST_HDRS,
2130 deps = MICROKERNEL_TEST_DEPS,
2131)
2132
2133xnnpack_unit_test(
2134 name = "f32_gavgpool_spchw_test",
2135 srcs = [
2136 "test/f32-gavgpool-spchw.cc",
2137 "test/gavgpool-spchw-microkernel-tester.h",
2138 "src/xnnpack/AlignedAllocator.h",
2139 ] + MICROKERNEL_TEST_HDRS,
2140 deps = MICROKERNEL_TEST_DEPS,
2141)
2142
2143xnnpack_unit_test(
2144 name = "f32_gemm_test",
2145 srcs = [
2146 "test/f32-gemm.cc",
2147 "test/gemm-microkernel-tester.h",
2148 "src/xnnpack/AlignedAllocator.h",
2149 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2150 deps = MICROKERNEL_TEST_DEPS,
2151)
2152
2153xnnpack_unit_test(
2154 name = "f32_gemminc_test",
2155 srcs = [
2156 "test/f32-gemminc.cc",
2157 "test/gemm-microkernel-tester.h",
2158 "src/xnnpack/AlignedAllocator.h",
2159 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2160 deps = MICROKERNEL_TEST_DEPS,
2161)
2162
2163xnnpack_unit_test(
2164 name = "f32_hswish_test",
2165 srcs = [
2166 "test/f32-hswish.cc",
2167 "test/hswish-microkernel-tester.h",
2168 ] + MICROKERNEL_TEST_HDRS,
2169 deps = MICROKERNEL_TEST_DEPS,
2170)
2171
2172xnnpack_unit_test(
2173 name = "f32_maxpool_test",
2174 srcs = [
2175 "test/f32-maxpool.cc",
2176 "test/maxpool-microkernel-tester.h",
2177 ] + MICROKERNEL_TEST_HDRS,
2178 deps = MICROKERNEL_TEST_DEPS,
2179)
2180
2181xnnpack_unit_test(
2182 name = "f32_pavgpool_test",
2183 srcs = [
2184 "test/f32-pavgpool.cc",
2185 "test/avgpool-microkernel-tester.h",
2186 "src/xnnpack/AlignedAllocator.h",
2187 ] + MICROKERNEL_TEST_HDRS,
2188 deps = MICROKERNEL_TEST_DEPS,
2189)
2190
2191xnnpack_unit_test(
2192 name = "f32_ppmm_test",
2193 srcs = [
2194 "test/f32-ppmm.cc",
2195 "test/gemm-microkernel-tester.h",
2196 "src/xnnpack/AlignedAllocator.h",
2197 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2198 deps = MICROKERNEL_TEST_DEPS,
2199)
2200
2201xnnpack_unit_test(
2202 name = "f32_prelu_test",
2203 srcs = [
2204 "test/f32-prelu.cc",
2205 "test/prelu-microkernel-tester.h",
2206 "src/xnnpack/AlignedAllocator.h",
2207 ] + MICROKERNEL_TEST_HDRS,
2208 deps = MICROKERNEL_TEST_DEPS,
2209)
2210
2211xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07002212 name = "f32_raddexpminusmax_test",
2213 srcs = [
2214 "test/f32-raddexpminusmax.cc",
2215 "test/raddexpminusmax-microkernel-tester.h",
2216 ] + MICROKERNEL_TEST_HDRS,
2217 deps = MICROKERNEL_TEST_DEPS,
2218)
2219
2220xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07002221 name = "f32_raddextexp_test",
2222 srcs = [
2223 "test/f32-raddextexp.cc",
2224 "test/raddextexp-microkernel-tester.h",
2225 ] + MICROKERNEL_TEST_HDRS,
2226 deps = MICROKERNEL_TEST_DEPS,
2227)
2228
2229xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07002230 name = "f32_raddstoreexpminusmax_test",
2231 srcs = [
2232 "test/f32-raddstoreexpminusmax.cc",
2233 "test/raddstoreexpminusmax-microkernel-tester.h",
2234 ] + MICROKERNEL_TEST_HDRS,
2235 deps = MICROKERNEL_TEST_DEPS,
2236)
2237
2238xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002239 name = "f32_rmax_test",
2240 srcs = [
2241 "test/f32-rmax.cc",
2242 "test/rmax-microkernel-tester.h",
2243 ] + MICROKERNEL_TEST_HDRS,
2244 deps = MICROKERNEL_TEST_DEPS,
2245)
2246
2247xnnpack_unit_test(
Marat Dukhan346a9e52019-11-15 09:06:30 -08002248 name = "f32_sigmoid_test",
2249 srcs = [
2250 "test/f32-sigmoid.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002251 "test/vunary-microkernel-tester.h",
Marat Dukhan346a9e52019-11-15 09:06:30 -08002252 ] + MICROKERNEL_TEST_HDRS,
2253 deps = MICROKERNEL_TEST_DEPS,
2254)
2255
2256xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002257 name = "f32_spmm_test",
2258 srcs = [
2259 "test/f32-spmm.cc",
2260 "test/spmm-microkernel-tester.h",
2261 "src/xnnpack/AlignedAllocator.h",
2262 ] + MICROKERNEL_TEST_HDRS,
2263 deps = MICROKERNEL_TEST_DEPS,
2264)
2265
2266xnnpack_unit_test(
2267 name = "f32_vadd_test",
2268 srcs = [
2269 "test/f32-vadd.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002270 "test/vbinary-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08002271 ] + MICROKERNEL_TEST_HDRS,
2272 deps = MICROKERNEL_TEST_DEPS,
2273)
2274
2275xnnpack_unit_test(
2276 name = "f32_vaddc_test",
2277 srcs = [
2278 "test/f32-vaddc.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002279 "test/vbinaryc-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002280 ] + MICROKERNEL_TEST_HDRS,
2281 deps = MICROKERNEL_TEST_DEPS,
2282)
2283
2284xnnpack_unit_test(
Marat Dukhan77ca6302019-12-06 12:48:15 -08002285 name = "f32_vdiv_test",
2286 srcs = [
2287 "test/f32-vdiv.cc",
2288 "test/vbinary-microkernel-tester.h",
2289 ] + MICROKERNEL_TEST_HDRS,
2290 deps = MICROKERNEL_TEST_DEPS,
2291)
2292
2293xnnpack_unit_test(
2294 name = "f32_vdivc_test",
2295 srcs = [
2296 "test/f32-vdivc.cc",
2297 "test/vbinaryc-microkernel-tester.h",
2298 ] + MICROKERNEL_TEST_HDRS,
2299 deps = MICROKERNEL_TEST_DEPS,
2300)
2301
2302xnnpack_unit_test(
2303 name = "f32_vrdivc_test",
2304 srcs = [
2305 "test/f32-vrdivc.cc",
2306 "test/vbinaryc-microkernel-tester.h",
2307 ] + MICROKERNEL_TEST_HDRS,
2308 deps = MICROKERNEL_TEST_DEPS,
2309)
2310
2311xnnpack_unit_test(
Marat Dukhan403b7d42019-12-05 12:49:11 -08002312 name = "f32_vmax_test",
2313 srcs = [
2314 "test/f32-vmax.cc",
2315 "test/vbinary-microkernel-tester.h",
2316 ] + MICROKERNEL_TEST_HDRS,
2317 deps = MICROKERNEL_TEST_DEPS,
2318)
2319
2320xnnpack_unit_test(
2321 name = "f32_vmaxc_test",
2322 srcs = [
2323 "test/f32-vmaxc.cc",
2324 "test/vbinaryc-microkernel-tester.h",
2325 ] + MICROKERNEL_TEST_HDRS,
2326 deps = MICROKERNEL_TEST_DEPS,
2327)
2328
2329xnnpack_unit_test(
2330 name = "f32_vmin_test",
2331 srcs = [
2332 "test/f32-vmin.cc",
2333 "test/vbinary-microkernel-tester.h",
2334 ] + MICROKERNEL_TEST_HDRS,
2335 deps = MICROKERNEL_TEST_DEPS,
2336)
2337
2338xnnpack_unit_test(
2339 name = "f32_vminc_test",
2340 srcs = [
2341 "test/f32-vminc.cc",
2342 "test/vbinaryc-microkernel-tester.h",
2343 ] + MICROKERNEL_TEST_HDRS,
2344 deps = MICROKERNEL_TEST_DEPS,
2345)
2346
2347xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002348 name = "f32_vmul_test",
2349 srcs = [
2350 "test/f32-vmul.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002351 "test/vbinary-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08002352 ] + MICROKERNEL_TEST_HDRS,
2353 deps = MICROKERNEL_TEST_DEPS,
2354)
2355
2356xnnpack_unit_test(
2357 name = "f32_vmulc_test",
2358 srcs = [
2359 "test/f32-vmulc.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002360 "test/vbinaryc-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002361 ] + MICROKERNEL_TEST_HDRS,
2362 deps = MICROKERNEL_TEST_DEPS,
2363)
2364
2365xnnpack_unit_test(
2366 name = "f32_vmulcaddc_test",
2367 srcs = [
2368 "test/f32-vmulcaddc.cc",
2369 "test/vmulcaddc-microkernel-tester.h",
2370 "src/xnnpack/AlignedAllocator.h",
2371 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2372 deps = MICROKERNEL_TEST_DEPS,
2373)
2374
2375xnnpack_unit_test(
Marat Dukhan05ac8e32019-10-21 15:39:33 -07002376 name = "f32_vscale_test",
2377 srcs = [
2378 "test/f32-vscale.cc",
2379 "test/vscale-microkernel-tester.h",
2380 ] + MICROKERNEL_TEST_HDRS,
2381 deps = MICROKERNEL_TEST_DEPS,
2382)
2383
2384xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07002385 name = "f32_vscaleexpminusmax_test",
2386 srcs = [
2387 "test/f32-vscaleexpminusmax.cc",
2388 "test/vscaleexpminusmax-microkernel-tester.h",
2389 ] + MICROKERNEL_TEST_HDRS,
2390 deps = MICROKERNEL_TEST_DEPS,
2391)
2392
2393xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07002394 name = "f32_vscaleextexp_test",
2395 srcs = [
2396 "test/f32-vscaleextexp.cc",
2397 "test/vscaleextexp-microkernel-tester.h",
2398 ] + MICROKERNEL_TEST_HDRS,
2399 deps = MICROKERNEL_TEST_DEPS,
2400)
2401
2402xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07002403 name = "f32_vsub_test",
2404 srcs = [
2405 "test/f32-vsub.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002406 "test/vbinary-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08002407 ] + MICROKERNEL_TEST_HDRS,
2408 deps = MICROKERNEL_TEST_DEPS,
2409)
2410
2411xnnpack_unit_test(
2412 name = "f32_vsubc_test",
2413 srcs = [
2414 "test/f32-vsubc.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002415 "test/vbinaryc-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08002416 ] + MICROKERNEL_TEST_HDRS,
2417 deps = MICROKERNEL_TEST_DEPS,
2418)
2419
2420xnnpack_unit_test(
2421 name = "f32_vrsubc_test",
2422 srcs = [
2423 "test/f32-vrsubc.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08002424 "test/vbinaryc-microkernel-tester.h",
Marat Dukhan97579532019-10-18 16:40:39 -07002425 ] + MICROKERNEL_TEST_HDRS,
2426 deps = MICROKERNEL_TEST_DEPS,
2427)
2428
2429xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002430 name = "q8_avgpool_test",
2431 srcs = [
2432 "test/q8-avgpool.cc",
2433 "test/avgpool-microkernel-tester.h",
2434 "src/xnnpack/AlignedAllocator.h",
2435 ] + MICROKERNEL_TEST_HDRS,
2436 deps = MICROKERNEL_TEST_DEPS,
2437)
2438
2439xnnpack_unit_test(
2440 name = "q8_igemm_test",
2441 srcs = [
2442 "test/q8-igemm.cc",
2443 "test/gemm-microkernel-tester.h",
2444 "src/xnnpack/AlignedAllocator.h",
2445 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2446 deps = MICROKERNEL_TEST_DEPS,
2447)
2448
2449xnnpack_unit_test(
2450 name = "q8_dwconv_test",
2451 srcs = [
2452 "test/q8-dwconv.cc",
2453 "test/dwconv-microkernel-tester.h",
2454 "src/xnnpack/AlignedAllocator.h",
2455 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2456 deps = MICROKERNEL_TEST_DEPS,
2457)
2458
2459xnnpack_unit_test(
2460 name = "q8_gavgpool_test",
2461 srcs = [
2462 "test/q8-gavgpool.cc",
2463 "test/gavgpool-microkernel-tester.h",
2464 "src/xnnpack/AlignedAllocator.h",
2465 ] + MICROKERNEL_TEST_HDRS,
2466 deps = MICROKERNEL_TEST_DEPS,
2467)
2468
2469xnnpack_unit_test(
2470 name = "q8_gemm_test",
2471 srcs = [
2472 "test/q8-gemm.cc",
2473 "test/gemm-microkernel-tester.h",
2474 "src/xnnpack/AlignedAllocator.h",
2475 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
2476 deps = MICROKERNEL_TEST_DEPS,
2477)
2478
2479xnnpack_unit_test(
2480 name = "q8_vadd_test",
2481 srcs = [
2482 "test/q8-vadd.cc",
2483 "test/vadd-microkernel-tester.h",
2484 ] + MICROKERNEL_TEST_HDRS,
2485 deps = MICROKERNEL_TEST_DEPS,
2486)
2487
2488xnnpack_unit_test(
2489 name = "u8_clamp_test",
2490 srcs = [
2491 "test/u8-clamp.cc",
2492 "test/clamp-microkernel-tester.h",
2493 ] + MICROKERNEL_TEST_HDRS,
2494 deps = MICROKERNEL_TEST_DEPS,
2495)
2496
2497xnnpack_unit_test(
2498 name = "u8_lut32norm_test",
2499 srcs = [
2500 "test/u8-lut32norm.cc",
2501 "test/lut-norm-microkernel-tester.h",
2502 ] + MICROKERNEL_TEST_HDRS,
2503 deps = MICROKERNEL_TEST_DEPS,
2504)
2505
2506xnnpack_unit_test(
2507 name = "u8_maxpool_test",
2508 srcs = [
2509 "test/u8-maxpool.cc",
2510 "test/maxpool-microkernel-tester.h",
2511 ] + MICROKERNEL_TEST_HDRS,
2512 deps = MICROKERNEL_TEST_DEPS,
2513)
2514
2515xnnpack_unit_test(
2516 name = "u8_rmax_test",
2517 srcs = [
2518 "test/u8-rmax.cc",
2519 "test/rmax-microkernel-tester.h",
2520 ] + MICROKERNEL_TEST_HDRS,
2521 deps = MICROKERNEL_TEST_DEPS,
2522)
2523
2524xnnpack_unit_test(
2525 name = "x32_packx_test",
2526 srcs = [
2527 "test/x32-packx.cc",
2528 "test/pack-microkernel-tester.h",
2529 "src/xnnpack/AlignedAllocator.h",
2530 ] + MICROKERNEL_TEST_HDRS,
2531 deps = MICROKERNEL_TEST_DEPS,
2532)
2533
2534xnnpack_unit_test(
2535 name = "x32_pad_test",
2536 srcs = [
2537 "test/x32-pad.cc",
2538 "test/pad-microkernel-tester.h",
2539 ] + MICROKERNEL_TEST_HDRS,
2540 deps = MICROKERNEL_TEST_DEPS,
2541)
2542
2543xnnpack_unit_test(
2544 name = "x32_unpool_test",
2545 srcs = [
2546 "test/x32-unpool.cc",
2547 "test/unpool-microkernel-tester.h",
2548 ] + MICROKERNEL_TEST_HDRS,
2549 deps = MICROKERNEL_TEST_DEPS,
2550)
2551
2552xnnpack_unit_test(
2553 name = "x32_zip_test",
2554 srcs = [
2555 "test/x32-zip.cc",
2556 "test/zip-microkernel-tester.h",
2557 ] + MICROKERNEL_TEST_HDRS,
2558 deps = MICROKERNEL_TEST_DEPS,
2559)
2560
2561xnnpack_unit_test(
2562 name = "x8_lut_test",
2563 srcs = [
2564 "test/x8-lut.cc",
2565 "test/lut-microkernel-tester.h",
2566 ] + MICROKERNEL_TEST_HDRS,
2567 deps = MICROKERNEL_TEST_DEPS,
2568)
2569
2570xnnpack_unit_test(
2571 name = "x8_zip_test",
2572 srcs = [
2573 "test/x8-zip.cc",
2574 "test/zip-microkernel-tester.h",
2575 ] + MICROKERNEL_TEST_HDRS,
2576 deps = MICROKERNEL_TEST_DEPS,
2577)
2578
2579########################### Size test for the library ##########################
2580
2581xnnpack_binary(
2582 name = "size_test",
2583 srcs = ["test/size.c"],
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002584 deps = [":xnnpack_operators_nhwc_f32"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002585)
2586
2587########################### Unit tests for operators ###########################
2588
2589xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002590 name = "add_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002591 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002592 "test/add-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002593 "test/add-operator-tester.h",
2594 ],
2595 deps = OPERATOR_TEST_DEPS,
2596)
2597
2598xnnpack_unit_test(
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08002599 name = "add_nd_test",
2600 srcs = [
2601 "test/add-nd.cc",
2602 "test/binary-elementwise-operator-tester.h",
2603 ],
2604 deps = OPERATOR_TEST_DEPS,
2605)
2606
2607xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002608 name = "argmax_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002609 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002610 "test/argmax-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002611 "test/argmax-pooling-operator-tester.h",
2612 ] + OPERATOR_TEST_PARAMS_HDRS,
2613 deps = OPERATOR_TEST_DEPS,
2614)
2615
2616xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002617 name = "average_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002618 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002619 "test/average-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002620 "test/average-pooling-operator-tester.h",
2621 ] + OPERATOR_TEST_PARAMS_HDRS,
2622 deps = OPERATOR_TEST_DEPS,
2623)
2624
2625xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002626 name = "channel_pad_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002627 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002628 "test/channel-pad-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002629 "test/channel-pad-operator-tester.h",
2630 ] + OPERATOR_TEST_PARAMS_HDRS,
2631 deps = OPERATOR_TEST_DEPS,
2632)
2633
2634xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002635 name = "channel_shuffle_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002636 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002637 "test/channel-shuffle-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002638 "test/channel-shuffle-operator-tester.h",
2639 ],
2640 deps = OPERATOR_TEST_DEPS,
2641)
2642
2643xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002644 name = "clamp_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002645 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002646 "test/clamp-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002647 "test/clamp-operator-tester.h",
2648 ],
2649 deps = OPERATOR_TEST_DEPS,
2650)
2651
2652xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002653 name = "convolution_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002654 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002655 "test/convolution-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002656 "test/convolution-operator-tester.h",
2657 ],
2658 deps = OPERATOR_TEST_DEPS,
2659)
2660
2661xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002662 name = "convolution_nchw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002663 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002664 "test/convolution-nchw.cc",
2665 "test/convolution-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002666 ],
2667 deps = OPERATOR_TEST_DEPS,
2668)
2669
2670xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002671 name = "deconvolution_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002672 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002673 "test/deconvolution-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002674 "test/deconvolution-operator-tester.h",
2675 ] + OPERATOR_TEST_PARAMS_HDRS,
2676 deps = OPERATOR_TEST_DEPS,
2677)
2678
2679xnnpack_unit_test(
Marat Dukhan69180502019-12-06 15:00:31 -08002680 name = "divide_nd_test",
2681 srcs = [
2682 "test/binary-elementwise-operator-tester.h",
2683 "test/divide-nd.cc",
2684 ],
2685 deps = OPERATOR_TEST_DEPS,
2686)
2687
2688xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002689 name = "fully_connected_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002690 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002691 "test/fully-connected-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002692 "test/fully-connected-operator-tester.h",
2693 ],
2694 deps = OPERATOR_TEST_DEPS,
2695)
2696
2697xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002698 name = "global_average_pooling_nwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002699 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002700 "test/global-average-pooling-nwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002701 "test/global-average-pooling-operator-tester.h",
2702 ] + OPERATOR_TEST_PARAMS_HDRS,
2703 deps = OPERATOR_TEST_DEPS,
2704)
2705
2706xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002707 name = "global_average_pooling_ncw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002708 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002709 "test/global-average-pooling-ncw.cc",
2710 "test/global-average-pooling-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002711 ],
2712 deps = OPERATOR_TEST_DEPS,
2713)
2714
2715xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002716 name = "hardswish_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002717 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002718 "test/hardswish-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002719 "test/hardswish-operator-tester.h",
2720 ],
2721 deps = OPERATOR_TEST_DEPS,
2722)
2723
2724xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002725 name = "leaky_relu_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002726 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002727 "test/leaky-relu-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002728 "test/leaky-relu-operator-tester.h",
2729 ],
2730 deps = OPERATOR_TEST_DEPS,
2731)
2732
2733xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002734 name = "max_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002735 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002736 "test/max-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002737 "test/max-pooling-operator-tester.h",
2738 ] + OPERATOR_TEST_PARAMS_HDRS,
2739 deps = OPERATOR_TEST_DEPS,
2740)
2741
2742xnnpack_unit_test(
Marat Dukhan79e7f842019-12-05 14:35:50 -08002743 name = "maximum_nd_test",
2744 srcs = [
2745 "test/binary-elementwise-operator-tester.h",
2746 "test/maximum-nd.cc",
2747 ],
2748 deps = OPERATOR_TEST_DEPS,
2749)
2750
2751xnnpack_unit_test(
2752 name = "minimum_nd_test",
2753 srcs = [
2754 "test/binary-elementwise-operator-tester.h",
2755 "test/minimum-nd.cc",
2756 ],
2757 deps = OPERATOR_TEST_DEPS,
2758)
2759
2760xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002761 name = "multiply_nd_test",
Marat Dukhanca2733c2019-11-15 23:21:17 -08002762 srcs = [
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08002763 "test/binary-elementwise-operator-tester.h",
Marat Dukhanefc47b82019-11-18 09:25:38 -08002764 "test/multiply-nd.cc",
Marat Dukhanca2733c2019-11-15 23:21:17 -08002765 ],
2766 deps = OPERATOR_TEST_DEPS,
2767)
2768
2769xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002770 name = "prelu_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002771 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002772 "test/prelu-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002773 "test/prelu-operator-tester.h",
2774 ] + OPERATOR_TEST_PARAMS_HDRS,
2775 deps = OPERATOR_TEST_DEPS,
2776)
2777
2778xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002779 name = "resize_bilinear_nhwc_test",
Marat Dukhan69722492019-11-11 19:55:50 -08002780 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002781 "test/resize-bilinear-nhwc.cc",
Marat Dukhan69722492019-11-11 19:55:50 -08002782 "test/resize-bilinear-operator-tester.h",
2783 ] + OPERATOR_TEST_PARAMS_HDRS,
2784 deps = OPERATOR_TEST_DEPS,
2785)
2786
2787xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002788 name = "sigmoid_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002789 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002790 "test/sigmoid-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002791 "test/sigmoid-operator-tester.h",
2792 ],
2793 deps = OPERATOR_TEST_DEPS,
2794)
2795
2796xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002797 name = "softargmax_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002798 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002799 "test/softargmax-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002800 "test/softargmax-operator-tester.h",
2801 ],
2802 deps = OPERATOR_TEST_DEPS,
2803)
2804
2805xnnpack_unit_test(
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08002806 name = "subtract_nd_test",
2807 srcs = [
2808 "test/binary-elementwise-operator-tester.h",
2809 "test/subtract-nd.cc",
2810 ],
2811 deps = OPERATOR_TEST_DEPS,
2812)
2813
2814xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002815 name = "unpooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002816 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002817 "test/unpooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002818 "test/unpooling-operator-tester.h",
2819 ],
2820 deps = OPERATOR_TEST_DEPS,
2821)
2822
2823############################# Build configurations #############################
2824
Marat Dukhanb8642352019-10-30 15:43:02 -07002825# Enables usage of assembly kernels.
Marat Dukhan08c4a432019-10-03 09:29:21 -07002826config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07002827 name = "xnn_enable_assembly_explicit_true",
2828 define_values = {"xnn_enable_assembly": "true"},
2829)
2830
2831# Disables usage of assembly kernels.
2832config_setting(
2833 name = "xnn_enable_assembly_explicit_false",
2834 define_values = {"xnn_enable_assembly": "false"},
2835)
2836
2837# Builds with -c dbg
2838config_setting(
2839 name = "debug_build",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002840 values = {
Marat Dukhanb8642352019-10-30 15:43:02 -07002841 "compilation_mode": "dbg",
2842 },
2843)
2844
2845# Builds with -c opt
2846config_setting(
2847 name = "optimized_build",
2848 values = {
2849 "compilation_mode": "opt",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002850 },
2851)
2852
2853config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07002854 name = "linux_k8",
2855 values = {"cpu": "k8"},
2856)
2857
2858config_setting(
Marat Dukhan4e45e662019-10-03 15:40:24 -07002859 name = "linux_aarch64",
Marat Dukhanb8642352019-10-30 15:43:02 -07002860 values = {"cpu": "aarch64"},
Marat Dukhan4e45e662019-10-03 15:40:24 -07002861)
2862
2863config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002864 name = "android",
2865 values = {"crosstool_top": "//external:android/crosstool"},
2866)
2867
2868config_setting(
2869 name = "android_armv7",
2870 values = {
2871 "crosstool_top": "//external:android/crosstool",
2872 "cpu": "armeabi-v7a",
2873 },
2874)
2875
2876config_setting(
2877 name = "android_arm64",
2878 values = {
2879 "crosstool_top": "//external:android/crosstool",
2880 "cpu": "arm64-v8a",
2881 },
2882)
2883
2884config_setting(
2885 name = "android_x86",
2886 values = {
2887 "crosstool_top": "//external:android/crosstool",
2888 "cpu": "x86",
2889 },
2890)
2891
2892config_setting(
2893 name = "android_x86_64",
2894 values = {
2895 "crosstool_top": "//external:android/crosstool",
2896 "cpu": "x86_64",
2897 },
2898)
2899
2900config_setting(
Marat Dukhan885ca242019-10-07 09:17:32 -07002901 name = "macos_x86_64",
2902 values = {
2903 "apple_platform_type": "macos",
2904 "cpu": "darwin",
2905 },
2906)
2907
2908config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002909 name = "emscripten",
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002910 values = {"crosstool_top": "//toolchain:emscripten"},
Marat Dukhan08c4a432019-10-03 09:29:21 -07002911)
2912
2913config_setting(
2914 name = "emscripten_wasm",
2915 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002916 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002917 "cpu": "wasm",
2918 },
2919)
2920
2921config_setting(
2922 name = "emscripten_wasmsimd",
2923 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002924 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002925 "cpu": "wasm",
Marat Dukhan8c19e3c2019-10-30 12:14:58 -07002926 "features": "wasm_simd",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002927 },
2928)
2929
2930config_setting(
2931 name = "emscripten_asmjs",
2932 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002933 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002934 "cpu": "asmjs",
2935 },
2936)