blob: 8de69a67918e3978f8d0008477e6cf6699840e24 [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
Marat Dukhan10a38082020-04-17 03:58:35 -07009load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_gcc_std_copts", "xnnpack_min_size_copts", "xnnpack_msvc_std_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_dnnl_copts", "xnnpack_optional_dnnl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_cxxopts", "xnnpack_unit_test", "xnnpack_visibility")
Marat Dukhan69c3f2c2019-11-06 12:30:01 -080010
Marat Dukhan08c4a432019-10-03 09:29:21 -070011licenses(["notice"])
12
13exports_files(["LICENSE"])
14
Marat Dukhan1b354632020-03-23 12:50:22 -070015OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
Marat Dukhan08c4a432019-10-03 09:29:21 -070022MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
Frank Barchard7e955972019-10-11 10:34:25 -070025 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070026 "@cpuinfo",
27 "@FP16",
28 "@pthreadpool",
29]
30
Marat Dukhan6adff4e2019-10-14 18:32:07 -070031ACCURACY_EVAL_DEPS = [
32 ":XNNPACK",
33 ":ukernels",
34 "@FP16",
35 "@pthreadpool",
36]
37
Marat Dukhan08c4a432019-10-03 09:29:21 -070038MICROKERNEL_TEST_DEPS = [
Marat Dukhan33fcf782020-05-24 14:27:15 -070039 ":ukernels_test_mode",
Frank Barchard7e955972019-10-11 10:34:25 -070040 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070041 "@cpuinfo",
42 "@FP16",
43 "@pthreadpool",
44]
45
Marat Dukhan1b354632020-03-23 12:50:22 -070046OPERATOR_TEST_DEPS = [
Marat Dukhan33fcf782020-05-24 14:27:15 -070047 ":XNNPACK_test_mode",
Marat Dukhan1b354632020-03-23 12:50:22 -070048 "@pthreadpool",
49 "@FP16",
50]
51
Marat Dukhan08c4a432019-10-03 09:29:21 -070052OPERATOR_SRCS = [
Marat Dukhane8265432020-04-28 18:42:59 -070053 "src/operators/argmax-pooling-nhwc.c",
54 "src/operators/average-pooling-nhwc.c",
55 "src/operators/binary-elementwise-nd.c",
Marat Dukhane8265432020-04-28 18:42:59 -070056 "src/operators/channel-shuffle-nc.c",
Marat Dukhan065b11e2020-05-22 09:49:41 -070057 "src/operators/constant-pad-nd.c",
Marat Dukhane8265432020-04-28 18:42:59 -070058 "src/operators/convolution-nchw.c",
59 "src/operators/convolution-nhwc.c",
60 "src/operators/deconvolution-nhwc.c",
61 "src/operators/fully-connected-nc.c",
62 "src/operators/global-average-pooling-ncw.c",
63 "src/operators/global-average-pooling-nwc.c",
Marat Dukhane8265432020-04-28 18:42:59 -070064 "src/operators/leaky-relu-nc.c",
65 "src/operators/max-pooling-nhwc.c",
66 "src/operators/prelu-nc.c",
67 "src/operators/resize-bilinear-nhwc.c",
68 "src/operators/sigmoid-nc.c",
69 "src/operators/softmax-nc.c",
Marat Dukhanc3065f52020-06-04 13:33:32 -070070 "src/operators/unary-elementwise-nc.c",
Marat Dukhane8265432020-04-28 18:42:59 -070071 "src/operators/unpooling-nhwc.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070072]
73
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070074SUBGRAPH_SRCS = [
Marat Dukhan5fab4092020-06-10 01:28:28 -070075 "src/subgraph/abs.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070076 "src/subgraph/add2.c",
77 "src/subgraph/argmax-pooling-2d.c",
78 "src/subgraph/average-pooling-2d.c",
Marat Dukhan5fab4092020-06-10 01:28:28 -070079 "src/subgraph/bankers-rounding.c",
80 "src/subgraph/ceiling.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070081 "src/subgraph/clamp.c",
82 "src/subgraph/convolution-2d.c",
83 "src/subgraph/deconvolution-2d.c",
84 "src/subgraph/depthwise-convolution-2d.c",
Marat Dukhan9d3a4592020-06-05 16:52:42 -070085 "src/subgraph/divide.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070086 "src/subgraph/fully-connected.c",
Marat Dukhan5fab4092020-06-10 01:28:28 -070087 "src/subgraph/floor.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070088 "src/subgraph/hardswish.c",
89 "src/subgraph/max-pooling-2d.c",
Marat Dukhan9d3a4592020-06-05 16:52:42 -070090 "src/subgraph/maximum2.c",
91 "src/subgraph/minimum2.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070092 "src/subgraph/multiply2.c",
Marat Dukhan5fab4092020-06-10 01:28:28 -070093 "src/subgraph/negate.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -070094 "src/subgraph/prelu.c",
95 "src/subgraph/sigmoid.c",
96 "src/subgraph/softmax.c",
97 "src/subgraph/static-constant-pad.c",
Marat Dukhan5fab4092020-06-10 01:28:28 -070098 "src/subgraph/square.c",
Marat Dukhan9d3a4592020-06-05 16:52:42 -070099 "src/subgraph/squared-difference.c",
100 "src/subgraph/subtract.c",
Marat Dukhan95e8b7a2020-06-03 12:46:26 -0700101 "src/subgraph/unpooling-2d.c",
102]
103
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800104TABLE_SRCS = [
105 "src/tables/exp2-k-over-64.c",
106 "src/tables/exp2-k-over-2048.c",
107]
108
Marat Dukhan08c4a432019-10-03 09:29:21 -0700109SCALAR_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -0800110 "src/f32-argmaxpool/4x-scalar-c1.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -0800111 "src/f32-argmaxpool/9p8x-scalar-c1.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800112 "src/f32-argmaxpool/9x-scalar-c1.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700113 "src/f32-avgpool/9p8x-minmax-scalar-c1.c",
114 "src/f32-avgpool/9x-minmax-scalar-c1.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700115 "src/f32-clamp/gen/scalar-x1.c",
116 "src/f32-clamp/gen/scalar-x2.c",
117 "src/f32-clamp/gen/scalar-x4.c",
Marat Dukhan441e2212019-12-04 18:30:49 -0800118 "src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c",
Marat Dukhan6b7dfae2019-12-04 16:00:52 -0800119 "src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c",
Marat Dukhan1f29b802020-05-15 23:46:39 -0700120 "src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c",
121 "src/f32-dwconv-chw/3x3p1-scalar.c",
122 "src/f32-dwconv-chw/3x3s2p1-scalar.c",
123 "src/f32-dwconv-chw/5x5p2-scalar.c",
124 "src/f32-dwconv-chw/5x5s2p2-scalar.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700125 "src/f32-dwconv/gen/up1x4-scalar-acc2.c",
126 "src/f32-dwconv/gen/up1x4-scalar.c",
127 "src/f32-dwconv/gen/up1x9-scalar-acc2.c",
128 "src/f32-dwconv/gen/up1x9-scalar.c",
129 "src/f32-dwconv/gen/up1x25-scalar-acc2.c",
130 "src/f32-dwconv/gen/up1x25-scalar.c",
131 "src/f32-dwconv/gen/up2x4-scalar-acc2.c",
132 "src/f32-dwconv/gen/up2x4-scalar.c",
133 "src/f32-dwconv/gen/up2x9-scalar-acc2.c",
134 "src/f32-dwconv/gen/up2x9-scalar.c",
135 "src/f32-dwconv/gen/up2x25-scalar-acc2.c",
136 "src/f32-dwconv/gen/up2x25-scalar.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700137 "src/f32-dwconv/gen/up1x4-minmax-scalar-acc2.c",
138 "src/f32-dwconv/gen/up1x4-minmax-scalar.c",
139 "src/f32-dwconv/gen/up1x9-minmax-scalar-acc2.c",
140 "src/f32-dwconv/gen/up1x9-minmax-scalar.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700141 "src/f32-dwconv/gen/up1x25-minmax-scalar-acc2.c",
142 "src/f32-dwconv/gen/up1x25-minmax-scalar.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700143 "src/f32-dwconv/gen/up2x4-minmax-scalar-acc2.c",
144 "src/f32-dwconv/gen/up2x4-minmax-scalar.c",
145 "src/f32-dwconv/gen/up2x9-minmax-scalar-acc2.c",
146 "src/f32-dwconv/gen/up2x9-minmax-scalar.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700147 "src/f32-dwconv/gen/up2x25-minmax-scalar-acc2.c",
148 "src/f32-dwconv/gen/up2x25-minmax-scalar.c",
Marat Dukhan1f29b802020-05-15 23:46:39 -0700149 "src/f32-gavgpool-cw/scalar-x1.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700150 "src/f32-gavgpool/7p7x-minmax-scalar-c1.c",
151 "src/f32-gavgpool/7x-minmax-scalar-c1.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700152 "src/f32-gemm/gen-inc/1x4inc-minmax-scalar.c",
153 "src/f32-gemm/gen-inc/2x4inc-minmax-scalar.c",
154 "src/f32-gemm/gen-inc/4x4inc-minmax-scalar.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700155 "src/f32-gemm/gen/1x4-scalar.c",
156 "src/f32-gemm/gen/2x4-scalar.c",
157 "src/f32-gemm/gen/4x2-scalar.c",
158 "src/f32-gemm/gen/4x4-scalar.c",
Marat Dukhan467f6362020-05-22 23:21:55 -0700159 "src/f32-gemm/gen/1x4-relu-scalar.c",
160 "src/f32-gemm/gen/2x4-relu-scalar.c",
161 "src/f32-gemm/gen/4x2-relu-scalar.c",
162 "src/f32-gemm/gen/4x4-relu-scalar.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700163 "src/f32-gemm/gen/1x4-minmax-scalar.c",
164 "src/f32-gemm/gen/2x4-minmax-scalar.c",
165 "src/f32-gemm/gen/4x2-minmax-scalar.c",
166 "src/f32-gemm/gen/4x4-minmax-scalar.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800167 "src/f32-hswish/gen/scalar-x1.c",
168 "src/f32-hswish/gen/scalar-x2.c",
169 "src/f32-hswish/gen/scalar-x4.c",
Marat Dukhan660fd192020-03-10 04:55:30 -0700170 "src/f32-ibilinear/gen/scalar-c1.c",
171 "src/f32-ibilinear/gen/scalar-c2.c",
172 "src/f32-ibilinear/gen/scalar-c4.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700173 "src/f32-igemm/gen/1x4-scalar.c",
174 "src/f32-igemm/gen/2x4-scalar.c",
175 "src/f32-igemm/gen/4x2-scalar.c",
176 "src/f32-igemm/gen/4x4-scalar.c",
Marat Dukhan467f6362020-05-22 23:21:55 -0700177 "src/f32-igemm/gen/1x4-relu-scalar.c",
178 "src/f32-igemm/gen/2x4-relu-scalar.c",
179 "src/f32-igemm/gen/4x2-relu-scalar.c",
180 "src/f32-igemm/gen/4x4-relu-scalar.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700181 "src/f32-igemm/gen/1x4-minmax-scalar.c",
182 "src/f32-igemm/gen/2x4-minmax-scalar.c",
183 "src/f32-igemm/gen/4x2-minmax-scalar.c",
184 "src/f32-igemm/gen/4x4-minmax-scalar.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700185 "src/f32-maxpool/9p8x-minmax-scalar-c1.c",
186 "src/f32-pavgpool/9p8x-minmax-scalar-c1.c",
187 "src/f32-pavgpool/9x-minmax-scalar-c1.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700188 "src/f32-ppmm/gen/2x4-minmax-scalar.c",
189 "src/f32-ppmm/gen/3x3-minmax-scalar.c",
190 "src/f32-ppmm/gen/4x2-minmax-scalar.c",
191 "src/f32-ppmm/gen/4x4-minmax-scalar.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800192 "src/f32-prelu/gen/scalar-2x1.c",
193 "src/f32-prelu/gen/scalar-2x4.c",
Marat Dukhanf46f6752020-01-21 11:03:49 -0800194 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x1.c",
195 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2.c",
196 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x2-acc2.c",
197 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4.c",
198 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc2.c",
199 "src/f32-raddstoreexpminusmax/gen/scalar-lut64-p2-x4-acc4.c",
200 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x1.c",
201 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2.c",
202 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x2-acc2.c",
203 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4.c",
204 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc2.c",
205 "src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700206 "src/f32-rmax/scalar.c",
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800207 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x1.c",
208 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x2.c",
209 "src/f32-sigmoid/gen/scalar-lut2048-p1-div-x4.c",
210 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x1.c",
211 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x2.c",
212 "src/f32-sigmoid/gen/scalar-lut64-p2-div-x4.c",
213 "src/f32-sigmoid/gen/scalar-p5-div-x1.c",
214 "src/f32-sigmoid/gen/scalar-p5-div-x2.c",
215 "src/f32-sigmoid/gen/scalar-p5-div-x4.c",
Marat Dukhan355ab432020-04-09 19:01:52 -0700216 "src/f32-spmm/gen/1x1-minmax-scalar-pipelined.c",
217 "src/f32-spmm/gen/1x1-minmax-scalar.c",
218 "src/f32-spmm/gen/2x1-minmax-scalar-pipelined.c",
219 "src/f32-spmm/gen/2x1-minmax-scalar.c",
220 "src/f32-spmm/gen/4x1-minmax-scalar-pipelined.c",
221 "src/f32-spmm/gen/4x1-minmax-scalar.c",
222 "src/f32-spmm/gen/8x1-minmax-scalar-pipelined.c",
223 "src/f32-spmm/gen/8x1-minmax-scalar.c",
224 "src/f32-spmm/gen/8x2-minmax-scalar.c",
225 "src/f32-spmm/gen/8x4-minmax-scalar.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700226 "src/f32-vbinary/gen/vadd-minmax-scalar-x1.c",
227 "src/f32-vbinary/gen/vadd-minmax-scalar-x2.c",
228 "src/f32-vbinary/gen/vadd-minmax-scalar-x4.c",
229 "src/f32-vbinary/gen/vaddc-minmax-scalar-x1.c",
230 "src/f32-vbinary/gen/vaddc-minmax-scalar-x2.c",
231 "src/f32-vbinary/gen/vaddc-minmax-scalar-x4.c",
232 "src/f32-vbinary/gen/vdiv-minmax-scalar-x1.c",
233 "src/f32-vbinary/gen/vdiv-minmax-scalar-x2.c",
234 "src/f32-vbinary/gen/vdiv-minmax-scalar-x4.c",
235 "src/f32-vbinary/gen/vdivc-minmax-scalar-x1.c",
236 "src/f32-vbinary/gen/vdivc-minmax-scalar-x2.c",
237 "src/f32-vbinary/gen/vdivc-minmax-scalar-x4.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800238 "src/f32-vbinary/gen/vmax-scalar-x1.c",
239 "src/f32-vbinary/gen/vmax-scalar-x2.c",
240 "src/f32-vbinary/gen/vmax-scalar-x4.c",
241 "src/f32-vbinary/gen/vmaxc-scalar-x1.c",
242 "src/f32-vbinary/gen/vmaxc-scalar-x2.c",
243 "src/f32-vbinary/gen/vmaxc-scalar-x4.c",
244 "src/f32-vbinary/gen/vmin-scalar-x1.c",
245 "src/f32-vbinary/gen/vmin-scalar-x2.c",
246 "src/f32-vbinary/gen/vmin-scalar-x4.c",
247 "src/f32-vbinary/gen/vminc-scalar-x1.c",
248 "src/f32-vbinary/gen/vminc-scalar-x2.c",
249 "src/f32-vbinary/gen/vminc-scalar-x4.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700250 "src/f32-vbinary/gen/vmul-minmax-scalar-x1.c",
251 "src/f32-vbinary/gen/vmul-minmax-scalar-x2.c",
252 "src/f32-vbinary/gen/vmul-minmax-scalar-x4.c",
253 "src/f32-vbinary/gen/vmulc-minmax-scalar-x1.c",
254 "src/f32-vbinary/gen/vmulc-minmax-scalar-x2.c",
255 "src/f32-vbinary/gen/vmulc-minmax-scalar-x4.c",
256 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x1.c",
257 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x2.c",
258 "src/f32-vbinary/gen/vrdivc-minmax-scalar-x4.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -0700259 "src/f32-vbinary/gen/vrsqrdiffc-scalar-x1.c",
260 "src/f32-vbinary/gen/vrsqrdiffc-scalar-x2.c",
261 "src/f32-vbinary/gen/vrsqrdiffc-scalar-x4.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700262 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x1.c",
263 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x2.c",
264 "src/f32-vbinary/gen/vrsubc-minmax-scalar-x4.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -0700265 "src/f32-vbinary/gen/vsqrdiff-scalar-x1.c",
266 "src/f32-vbinary/gen/vsqrdiff-scalar-x2.c",
267 "src/f32-vbinary/gen/vsqrdiff-scalar-x4.c",
268 "src/f32-vbinary/gen/vsqrdiffc-scalar-x1.c",
269 "src/f32-vbinary/gen/vsqrdiffc-scalar-x2.c",
270 "src/f32-vbinary/gen/vsqrdiffc-scalar-x4.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700271 "src/f32-vbinary/gen/vsub-minmax-scalar-x1.c",
272 "src/f32-vbinary/gen/vsub-minmax-scalar-x2.c",
273 "src/f32-vbinary/gen/vsub-minmax-scalar-x4.c",
274 "src/f32-vbinary/gen/vsubc-minmax-scalar-x1.c",
275 "src/f32-vbinary/gen/vsubc-minmax-scalar-x2.c",
276 "src/f32-vbinary/gen/vsubc-minmax-scalar-x4.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700277 "src/f32-vmulcaddc/gen/c1-minmax-scalar-2x.c",
278 "src/f32-vmulcaddc/gen/c2-minmax-scalar-2x.c",
279 "src/f32-vmulcaddc/gen/c4-minmax-scalar-2x.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700280 "src/f32-vrnd/gen/vrndne-scalar-libm-x1.c",
281 "src/f32-vrnd/gen/vrndne-scalar-libm-x2.c",
282 "src/f32-vrnd/gen/vrndne-scalar-libm-x4.c",
283 "src/f32-vrnd/gen/vrndz-scalar-libm-x1.c",
284 "src/f32-vrnd/gen/vrndz-scalar-libm-x2.c",
285 "src/f32-vrnd/gen/vrndz-scalar-libm-x4.c",
286 "src/f32-vrnd/gen/vrndu-scalar-libm-x1.c",
287 "src/f32-vrnd/gen/vrndu-scalar-libm-x2.c",
288 "src/f32-vrnd/gen/vrndu-scalar-libm-x4.c",
289 "src/f32-vrnd/gen/vrndd-scalar-libm-x1.c",
290 "src/f32-vrnd/gen/vrndd-scalar-libm-x2.c",
291 "src/f32-vrnd/gen/vrndd-scalar-libm-x4.c",
Marat Dukhan5020b962020-06-08 13:30:10 -0700292 "src/f32-vunary/gen/vabs-scalar-x1.c",
293 "src/f32-vunary/gen/vabs-scalar-x2.c",
294 "src/f32-vunary/gen/vabs-scalar-x4.c",
295 "src/f32-vunary/gen/vneg-scalar-x1.c",
296 "src/f32-vunary/gen/vneg-scalar-x2.c",
297 "src/f32-vunary/gen/vneg-scalar-x4.c",
298 "src/f32-vunary/gen/vsqr-scalar-x1.c",
299 "src/f32-vunary/gen/vsqr-scalar-x2.c",
300 "src/f32-vunary/gen/vsqr-scalar-x4.c",
Marat Dukhan5739f702019-12-22 19:45:09 -0800301 "src/math/expminus-scalar-lut2048-p1.c",
302 "src/math/expminus-scalar-lut64-p2.c",
303 "src/math/expminus-scalar-p5.c",
Marat Dukhan075088a2020-05-12 19:42:12 -0700304 "src/math/roundne-scalar-addsub.c",
Marat Dukhanffbf96a2020-05-14 02:59:08 -0700305 "src/math/roundne-scalar-nearbyint.c",
306 "src/math/roundne-scalar-rint.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700307 "src/math/roundd-scalar-addsub.c",
308 "src/math/roundd-scalar-cvt.c",
Marat Dukhanffbf96a2020-05-14 02:59:08 -0700309 "src/math/roundd-scalar-floor.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700310 "src/math/roundu-scalar-addsub.c",
Marat Dukhanffbf96a2020-05-14 02:59:08 -0700311 "src/math/roundu-scalar-ceil.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700312 "src/math/roundu-scalar-cvt.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700313 "src/math/roundz-scalar-addsub.c",
314 "src/math/roundz-scalar-cvt.c",
Marat Dukhanffbf96a2020-05-14 02:59:08 -0700315 "src/math/roundz-scalar-trunc.c",
Marat Dukhan5739f702019-12-22 19:45:09 -0800316 "src/math/sigmoid-scalar-lut2048-p1-div.c",
317 "src/math/sigmoid-scalar-lut64-p2-div.c",
318 "src/math/sigmoid-scalar-p5-div.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700319 "src/q8-avgpool/9p8x-minmax-scalar-c1.c",
320 "src/q8-avgpool/9x-minmax-scalar-c1.c",
Marat Dukhande06f492020-04-09 00:19:31 -0700321 "src/q8-dwconv/up1x9-minmax-scalar.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700322 "src/q8-gavgpool/7p7x-minmax-scalar-c1.c",
323 "src/q8-gavgpool/7x-minmax-scalar-c1.c",
Marat Dukhande06f492020-04-09 00:19:31 -0700324 "src/q8-gemm/2x2-minmax-scalar.c",
325 "src/q8-igemm/2x2-minmax-scalar.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700326 "src/q8-vadd/minmax-scalar.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700327 "src/u8-clamp/scalar-x4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700328 "src/u8-lut32norm/scalar.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700329 "src/u8-maxpool/9p8x-minmax-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700330 "src/u8-rmax/scalar.c",
Marat Dukhan3bb3bfc2020-05-19 17:42:46 -0700331 "src/x32-fill/scalar-float.c",
332 "src/x32-fill/scalar-int.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700333 "src/x32-packx/x2-scalar.c",
334 "src/x32-packx/x3-scalar.c",
335 "src/x32-packx/x4-scalar.c",
Marat Dukhan63523d42020-05-22 17:07:33 -0700336 "src/x32-pad/scalar-int.c",
337 "src/x32-pad/scalar-float.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700338 "src/x32-unpool/scalar.c",
339 "src/x32-zip/x2-scalar.c",
340 "src/x32-zip/x3-scalar.c",
341 "src/x32-zip/x4-scalar.c",
342 "src/x32-zip/xm-scalar.c",
343 "src/x8-lut/scalar.c",
344 "src/x8-zip/x2-scalar.c",
345 "src/x8-zip/x3-scalar.c",
346 "src/x8-zip/x4-scalar.c",
347 "src/x8-zip/xm-scalar.c",
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700348 "src/requantization/precise-scalar.c",
349 "src/requantization/fp32-scalar.c",
350 "src/requantization/q31-scalar.c",
351 "src/requantization/gemmlowp-scalar.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700352]
353
Marat Dukhan436ebe62019-12-04 15:10:12 -0800354WASM_UKERNELS = [
Marat Dukhan99936602020-04-11 16:47:01 -0700355 "src/f32-avgpool/9p8x-minmax-wasm-c1.c",
356 "src/f32-avgpool/9x-minmax-wasm-c1.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700357 "src/f32-clamp/gen/wasm-x1.c",
358 "src/f32-clamp/gen/wasm-x2.c",
359 "src/f32-clamp/gen/wasm-x4.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700360 "src/f32-dwconv/gen/up1x4-wasm-acc2.c",
361 "src/f32-dwconv/gen/up1x4-wasm.c",
362 "src/f32-dwconv/gen/up1x9-wasm-acc2.c",
363 "src/f32-dwconv/gen/up1x9-wasm.c",
364 "src/f32-dwconv/gen/up1x25-wasm-acc2.c",
365 "src/f32-dwconv/gen/up1x25-wasm.c",
366 "src/f32-dwconv/gen/up2x4-wasm-acc2.c",
367 "src/f32-dwconv/gen/up2x4-wasm.c",
368 "src/f32-dwconv/gen/up2x9-wasm-acc2.c",
369 "src/f32-dwconv/gen/up2x9-wasm.c",
370 "src/f32-dwconv/gen/up2x25-wasm-acc2.c",
371 "src/f32-dwconv/gen/up2x25-wasm.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700372 "src/f32-dwconv/gen/up1x4-minmax-wasm-acc2.c",
373 "src/f32-dwconv/gen/up1x4-minmax-wasm.c",
374 "src/f32-dwconv/gen/up1x9-minmax-wasm-acc2.c",
375 "src/f32-dwconv/gen/up1x9-minmax-wasm.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700376 "src/f32-dwconv/gen/up1x25-minmax-wasm-acc2.c",
377 "src/f32-dwconv/gen/up1x25-minmax-wasm.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700378 "src/f32-dwconv/gen/up2x4-minmax-wasm-acc2.c",
379 "src/f32-dwconv/gen/up2x4-minmax-wasm.c",
380 "src/f32-dwconv/gen/up2x9-minmax-wasm-acc2.c",
381 "src/f32-dwconv/gen/up2x9-minmax-wasm.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700382 "src/f32-dwconv/gen/up2x25-minmax-wasm-acc2.c",
383 "src/f32-dwconv/gen/up2x25-minmax-wasm.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700384 "src/f32-gavgpool/7p7x-minmax-wasm-c1.c",
385 "src/f32-gavgpool/7x-minmax-wasm-c1.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700386 "src/f32-gemm/gen-inc/1x4inc-minmax-wasm.c",
387 "src/f32-gemm/gen-inc/2x4inc-minmax-wasm.c",
388 "src/f32-gemm/gen-inc/4x4inc-minmax-wasm.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700389 "src/f32-gemm/gen/1x4-wasm.c",
390 "src/f32-gemm/gen/2x4-wasm.c",
391 "src/f32-gemm/gen/4x2-wasm.c",
392 "src/f32-gemm/gen/4x4-wasm.c",
Marat Dukhan467f6362020-05-22 23:21:55 -0700393 "src/f32-gemm/gen/1x4-relu-wasm.c",
394 "src/f32-gemm/gen/2x4-relu-wasm.c",
395 "src/f32-gemm/gen/4x2-relu-wasm.c",
396 "src/f32-gemm/gen/4x4-relu-wasm.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700397 "src/f32-gemm/gen/1x4-minmax-wasm.c",
398 "src/f32-gemm/gen/2x4-minmax-wasm.c",
399 "src/f32-gemm/gen/4x2-minmax-wasm.c",
400 "src/f32-gemm/gen/4x4-minmax-wasm.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800401 "src/f32-hswish/gen/wasm-x1.c",
402 "src/f32-hswish/gen/wasm-x2.c",
403 "src/f32-hswish/gen/wasm-x4.c",
Marat Dukhan163a7e62020-04-09 04:19:26 -0700404 "src/f32-igemm/gen/1x4-wasm.c",
405 "src/f32-igemm/gen/2x4-wasm.c",
406 "src/f32-igemm/gen/4x2-wasm.c",
407 "src/f32-igemm/gen/4x4-wasm.c",
Marat Dukhan467f6362020-05-22 23:21:55 -0700408 "src/f32-igemm/gen/1x4-relu-wasm.c",
409 "src/f32-igemm/gen/2x4-relu-wasm.c",
410 "src/f32-igemm/gen/4x2-relu-wasm.c",
411 "src/f32-igemm/gen/4x4-relu-wasm.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700412 "src/f32-igemm/gen/1x4-minmax-wasm.c",
413 "src/f32-igemm/gen/2x4-minmax-wasm.c",
414 "src/f32-igemm/gen/4x2-minmax-wasm.c",
415 "src/f32-igemm/gen/4x4-minmax-wasm.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700416 "src/f32-maxpool/9p8x-minmax-wasm-c1.c",
417 "src/f32-pavgpool/9p8x-minmax-wasm-c1.c",
418 "src/f32-pavgpool/9x-minmax-wasm-c1.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700419 "src/f32-vbinary/gen/vadd-minmax-wasm-x1.c",
420 "src/f32-vbinary/gen/vadd-minmax-wasm-x2.c",
421 "src/f32-vbinary/gen/vadd-minmax-wasm-x4.c",
422 "src/f32-vbinary/gen/vaddc-minmax-wasm-x1.c",
423 "src/f32-vbinary/gen/vaddc-minmax-wasm-x2.c",
424 "src/f32-vbinary/gen/vaddc-minmax-wasm-x4.c",
425 "src/f32-vbinary/gen/vdiv-minmax-wasm-x1.c",
426 "src/f32-vbinary/gen/vdiv-minmax-wasm-x2.c",
427 "src/f32-vbinary/gen/vdiv-minmax-wasm-x4.c",
428 "src/f32-vbinary/gen/vdivc-minmax-wasm-x1.c",
429 "src/f32-vbinary/gen/vdivc-minmax-wasm-x2.c",
430 "src/f32-vbinary/gen/vdivc-minmax-wasm-x4.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800431 "src/f32-vbinary/gen/vmax-wasm-x1.c",
432 "src/f32-vbinary/gen/vmax-wasm-x2.c",
433 "src/f32-vbinary/gen/vmax-wasm-x4.c",
434 "src/f32-vbinary/gen/vmaxc-wasm-x1.c",
435 "src/f32-vbinary/gen/vmaxc-wasm-x2.c",
436 "src/f32-vbinary/gen/vmaxc-wasm-x4.c",
437 "src/f32-vbinary/gen/vmin-wasm-x1.c",
438 "src/f32-vbinary/gen/vmin-wasm-x2.c",
439 "src/f32-vbinary/gen/vmin-wasm-x4.c",
440 "src/f32-vbinary/gen/vminc-wasm-x1.c",
441 "src/f32-vbinary/gen/vminc-wasm-x2.c",
442 "src/f32-vbinary/gen/vminc-wasm-x4.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700443 "src/f32-vbinary/gen/vmul-minmax-wasm-x1.c",
444 "src/f32-vbinary/gen/vmul-minmax-wasm-x2.c",
445 "src/f32-vbinary/gen/vmul-minmax-wasm-x4.c",
446 "src/f32-vbinary/gen/vmulc-minmax-wasm-x1.c",
447 "src/f32-vbinary/gen/vmulc-minmax-wasm-x2.c",
448 "src/f32-vbinary/gen/vmulc-minmax-wasm-x4.c",
449 "src/f32-vbinary/gen/vrdivc-minmax-wasm-x1.c",
450 "src/f32-vbinary/gen/vrdivc-minmax-wasm-x2.c",
451 "src/f32-vbinary/gen/vrdivc-minmax-wasm-x4.c",
452 "src/f32-vbinary/gen/vrsubc-minmax-wasm-x1.c",
453 "src/f32-vbinary/gen/vrsubc-minmax-wasm-x2.c",
454 "src/f32-vbinary/gen/vrsubc-minmax-wasm-x4.c",
455 "src/f32-vbinary/gen/vsub-minmax-wasm-x1.c",
456 "src/f32-vbinary/gen/vsub-minmax-wasm-x2.c",
457 "src/f32-vbinary/gen/vsub-minmax-wasm-x4.c",
458 "src/f32-vbinary/gen/vsubc-minmax-wasm-x1.c",
459 "src/f32-vbinary/gen/vsubc-minmax-wasm-x2.c",
460 "src/f32-vbinary/gen/vsubc-minmax-wasm-x4.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700461 "src/f32-vmulcaddc/gen/c1-minmax-wasm-2x.c",
462 "src/f32-vmulcaddc/gen/c2-minmax-wasm-2x.c",
463 "src/f32-vmulcaddc/gen/c4-minmax-wasm-2x.c",
Marat Dukhan436ebe62019-12-04 15:10:12 -0800464]
465
Marat Dukhan290055c2020-06-09 12:24:29 -0700466WASMSIMD_UKERNELS = [
467 "src/requantization/fp32-wasmsimd.c",
468]
469
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800470PSIMD_FASTMATH_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -0800471 "src/f32-argmaxpool/4x-psimd-c4.c",
Marat Dukhan1e782c42019-11-21 17:02:40 -0800472 "src/f32-argmaxpool/9p8x-psimd-c4.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800473 "src/f32-argmaxpool/9x-psimd-c4.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700474 "src/f32-avgpool/9p8x-minmax-psimd-c4.c",
475 "src/f32-avgpool/9x-minmax-psimd-c4.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700476 "src/f32-clamp/gen/psimd-x4.c",
477 "src/f32-clamp/gen/psimd-x8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700478 "src/f32-dwconv/gen/up4x25-minmax-psimd-acc2.c",
479 "src/f32-dwconv/gen/up4x25-minmax-psimd.c",
480 "src/f32-dwconv/gen/up4x4-minmax-psimd-acc2.c",
481 "src/f32-dwconv/gen/up4x4-minmax-psimd.c",
482 "src/f32-dwconv/gen/up4x9-minmax-psimd-acc2.c",
483 "src/f32-dwconv/gen/up4x9-minmax-psimd.c",
484 "src/f32-dwconv/gen/up8x25-minmax-psimd-acc2.c",
485 "src/f32-dwconv/gen/up8x25-minmax-psimd.c",
486 "src/f32-dwconv/gen/up8x4-minmax-psimd-acc2.c",
487 "src/f32-dwconv/gen/up8x4-minmax-psimd.c",
488 "src/f32-dwconv/gen/up8x9-minmax-psimd-acc2.c",
489 "src/f32-dwconv/gen/up8x9-minmax-psimd.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700490 "src/f32-gavgpool/7p7x-minmax-psimd-c4.c",
491 "src/f32-gavgpool/7x-minmax-psimd-c4.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700492 "src/f32-gemm/gen/1x8-minmax-psimd-loadsplat.c",
493 "src/f32-gemm/gen/1x8-minmax-psimd-splat.c",
494 "src/f32-gemm/gen/1x8s4-minmax-psimd.c",
495 "src/f32-gemm/gen/4x2c4-minmax-psimd.c",
496 "src/f32-gemm/gen/4x8-minmax-psimd-loadsplat.c",
497 "src/f32-gemm/gen/4x8-minmax-psimd-splat.c",
498 "src/f32-gemm/gen/4x8s4-minmax-psimd.c",
499 "src/f32-gemm/gen/6x8-minmax-psimd-loadsplat.c",
500 "src/f32-gemm/gen/6x8-minmax-psimd-splat.c",
501 "src/f32-gemm/gen/6x8s4-minmax-psimd.c",
502 "src/f32-gemm/gen-inc/1x8inc-minmax-psimd-loadsplat.c",
503 "src/f32-gemm/gen-inc/1x8inc-minmax-psimd-splat.c",
504 "src/f32-gemm/gen-inc/1x8s4inc-minmax-psimd.c",
505 "src/f32-gemm/gen-inc/4x8inc-minmax-psimd-loadsplat.c",
506 "src/f32-gemm/gen-inc/4x8inc-minmax-psimd-splat.c",
507 "src/f32-gemm/gen-inc/4x8s4inc-minmax-psimd.c",
508 "src/f32-gemm/gen-inc/6x8inc-minmax-psimd-loadsplat.c",
509 "src/f32-gemm/gen-inc/6x8inc-minmax-psimd-splat.c",
510 "src/f32-gemm/gen-inc/6x8s4inc-minmax-psimd.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800511 "src/f32-hswish/gen/psimd-x4.c",
512 "src/f32-hswish/gen/psimd-x8.c",
Marat Dukhan660fd192020-03-10 04:55:30 -0700513 "src/f32-ibilinear/gen/psimd-c4.c",
514 "src/f32-ibilinear/gen/psimd-c8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700515 "src/f32-igemm/gen/1x8-minmax-psimd-loadsplat.c",
516 "src/f32-igemm/gen/1x8-minmax-psimd-splat.c",
517 "src/f32-igemm/gen/1x8s4-minmax-psimd.c",
518 "src/f32-igemm/gen/4x2c4-minmax-psimd.c",
519 "src/f32-igemm/gen/4x8-minmax-psimd-loadsplat.c",
520 "src/f32-igemm/gen/4x8-minmax-psimd-splat.c",
521 "src/f32-igemm/gen/4x8s4-minmax-psimd.c",
522 "src/f32-igemm/gen/6x8-minmax-psimd-loadsplat.c",
523 "src/f32-igemm/gen/6x8-minmax-psimd-splat.c",
524 "src/f32-igemm/gen/6x8s4-minmax-psimd.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700525 "src/f32-maxpool/9p8x-minmax-psimd-c4.c",
Erich Elsen6e80fdc2020-06-09 15:35:37 -0700526 "src/f32-spmm/gen/4x1-minmax-psimd.c",
527 "src/f32-spmm/gen/8x1-minmax-psimd.c",
528 "src/f32-spmm/gen/16x1-minmax-psimd.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700529 "src/f32-pavgpool/9p8x-minmax-psimd-c4.c",
530 "src/f32-pavgpool/9x-minmax-psimd-c4.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700531 "src/f32-ppmm/gen/4x8-minmax-psimd.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800532 "src/f32-prelu/gen/psimd-2x4.c",
533 "src/f32-prelu/gen/psimd-2x8.c",
Marat Dukhanb39689d2020-01-24 13:32:20 -0800534 "src/f32-rmax/psimd.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700535 "src/f32-vbinary/gen/vadd-minmax-psimd-x4.c",
536 "src/f32-vbinary/gen/vadd-minmax-psimd-x8.c",
537 "src/f32-vbinary/gen/vaddc-minmax-psimd-x4.c",
538 "src/f32-vbinary/gen/vaddc-minmax-psimd-x8.c",
539 "src/f32-vbinary/gen/vdiv-minmax-psimd-x4.c",
540 "src/f32-vbinary/gen/vdiv-minmax-psimd-x8.c",
541 "src/f32-vbinary/gen/vdivc-minmax-psimd-x4.c",
542 "src/f32-vbinary/gen/vdivc-minmax-psimd-x8.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800543 "src/f32-vbinary/gen/vmax-psimd-x4.c",
544 "src/f32-vbinary/gen/vmax-psimd-x8.c",
545 "src/f32-vbinary/gen/vmaxc-psimd-x4.c",
546 "src/f32-vbinary/gen/vmaxc-psimd-x8.c",
547 "src/f32-vbinary/gen/vmin-psimd-x4.c",
548 "src/f32-vbinary/gen/vmin-psimd-x8.c",
549 "src/f32-vbinary/gen/vminc-psimd-x4.c",
550 "src/f32-vbinary/gen/vminc-psimd-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700551 "src/f32-vbinary/gen/vmul-minmax-psimd-x4.c",
552 "src/f32-vbinary/gen/vmul-minmax-psimd-x8.c",
553 "src/f32-vbinary/gen/vmulc-minmax-psimd-x4.c",
554 "src/f32-vbinary/gen/vmulc-minmax-psimd-x8.c",
555 "src/f32-vbinary/gen/vrdivc-minmax-psimd-x4.c",
556 "src/f32-vbinary/gen/vrdivc-minmax-psimd-x8.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -0700557 "src/f32-vbinary/gen/vrsqrdiffc-psimd-x4.c",
558 "src/f32-vbinary/gen/vrsqrdiffc-psimd-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700559 "src/f32-vbinary/gen/vrsubc-minmax-psimd-x4.c",
560 "src/f32-vbinary/gen/vrsubc-minmax-psimd-x8.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -0700561 "src/f32-vbinary/gen/vsqrdiff-psimd-x4.c",
562 "src/f32-vbinary/gen/vsqrdiff-psimd-x8.c",
563 "src/f32-vbinary/gen/vsqrdiffc-psimd-x4.c",
564 "src/f32-vbinary/gen/vsqrdiffc-psimd-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700565 "src/f32-vbinary/gen/vsub-minmax-psimd-x4.c",
566 "src/f32-vbinary/gen/vsub-minmax-psimd-x8.c",
567 "src/f32-vbinary/gen/vsubc-minmax-psimd-x4.c",
568 "src/f32-vbinary/gen/vsubc-minmax-psimd-x8.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700569 "src/f32-vmulcaddc/gen/c4-minmax-psimd-2x.c",
570 "src/f32-vmulcaddc/gen/c8-minmax-psimd-2x.c",
Marat Dukhan5020b962020-06-08 13:30:10 -0700571 "src/f32-vunary/gen/vabs-psimd-x4.c",
572 "src/f32-vunary/gen/vabs-psimd-x8.c",
573 "src/f32-vunary/gen/vneg-psimd-x4.c",
574 "src/f32-vunary/gen/vneg-psimd-x8.c",
575 "src/f32-vunary/gen/vsqr-psimd-x4.c",
576 "src/f32-vunary/gen/vsqr-psimd-x8.c",
Marat Dukhan3bb3bfc2020-05-19 17:42:46 -0700577 "src/x32-fill/psimd.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700578 "src/x32-packx/x4-psimd.c",
Marat Dukhan63523d42020-05-22 17:07:33 -0700579 "src/x32-pad/psimd.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700580 "src/x32-unpool/psimd.c",
581 "src/x32-zip/x2-psimd.c",
582 "src/x32-zip/x3-psimd.c",
583 "src/x32-zip/x4-psimd.c",
584 "src/x32-zip/xm-psimd.c",
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700585 "src/requantization/precise-psimd.c",
586 "src/requantization/fp32-psimd.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700587]
588
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800589PSIMD_ACCMATH_UKERNELS = [
Marat Dukhanb39689d2020-01-24 13:32:20 -0800590 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x4.c",
591 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x8.c",
592 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x8-acc2.c",
593 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12.c",
594 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12-acc2.c",
595 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x12-acc3.c",
596 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16.c",
597 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16-acc2.c",
598 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x16-acc4.c",
599 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20.c",
600 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20-acc2.c",
601 "src/f32-raddstoreexpminusmax/gen/psimd-p5-x20-acc5.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800602 "src/f32-sigmoid/gen/psimd-p5-div-x4.c",
603 "src/f32-sigmoid/gen/psimd-p5-div-x8.c",
604 "src/f32-sigmoid/gen/psimd-p5-div-x12.c",
605 "src/f32-sigmoid/gen/psimd-p5-div-x16.c",
606 "src/f32-sigmoid/gen/psimd-p5-div-x20.c",
607 "src/f32-sigmoid/gen/psimd-p5-div-x24.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700608 "src/f32-vrnd/gen/vrndne-psimd-x4.c",
609 "src/f32-vrnd/gen/vrndne-psimd-x8.c",
610 "src/f32-vrnd/gen/vrndz-psimd-x4.c",
611 "src/f32-vrnd/gen/vrndz-psimd-x8.c",
612 "src/f32-vrnd/gen/vrndu-psimd-x4.c",
613 "src/f32-vrnd/gen/vrndu-psimd-x8.c",
614 "src/f32-vrnd/gen/vrndd-psimd-x4.c",
615 "src/f32-vrnd/gen/vrndd-psimd-x8.c",
Marat Dukhan075088a2020-05-12 19:42:12 -0700616 "src/math/roundne-psimd-addsub.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700617 "src/math/roundd-psimd-addsub.c",
618 "src/math/roundu-psimd-addsub.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700619 "src/math/roundz-psimd-addsub.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800620 "src/math/sigmoid-psimd-p5-div.c",
621]
622
Marat Dukhan08c4a432019-10-03 09:29:21 -0700623# ISA-specific micro-kernels
624NEON_UKERNELS = [
Marat Dukhan99936602020-04-11 16:47:01 -0700625 "src/f32-avgpool/9p8x-minmax-neon-c4.c",
626 "src/f32-avgpool/9x-minmax-neon-c4.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700627 "src/f32-clamp/gen/neon-x4.c",
628 "src/f32-clamp/gen/neon-x8.c",
Marat Dukhan56b10cd2020-05-18 09:35:49 -0700629 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neon-2x1.c",
630 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neon-2x1.c",
Marat Dukhance7a3f82020-05-17 21:46:44 -0700631 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neon-2x2.c",
632 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neon-2x2.c",
Marat Dukhan56b10cd2020-05-18 09:35:49 -0700633 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neon-2x1.c",
634 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neon-2x1.c",
Marat Dukhance7a3f82020-05-17 21:46:44 -0700635 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neon-2x2.c",
636 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neon-2x2.c",
Marat Dukhanf5425ea2020-04-24 01:46:00 -0700637 "src/f32-dwconv/gen/up4x4-minmax-neon.c",
638 "src/f32-dwconv/gen/up4x4-minmax-neon-acc2.c",
639 "src/f32-dwconv/gen/up8x4-minmax-neon.c",
640 "src/f32-dwconv/gen/up8x4-minmax-neon-acc2.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700641 "src/f32-dwconv/gen/up4x9-minmax-neon.c",
642 "src/f32-dwconv/gen/up4x9-minmax-neon-acc2.c",
643 "src/f32-dwconv/gen/up8x9-minmax-neon.c",
644 "src/f32-dwconv/gen/up8x9-minmax-neon-acc2.c",
Marat Dukhanf5425ea2020-04-24 01:46:00 -0700645 "src/f32-dwconv/gen/up4x25-minmax-neon.c",
646 "src/f32-dwconv/gen/up4x25-minmax-neon-acc2.c",
647 "src/f32-dwconv/gen/up8x25-minmax-neon.c",
648 "src/f32-dwconv/gen/up8x25-minmax-neon-acc2.c",
Marat Dukhan1f29b802020-05-15 23:46:39 -0700649 "src/f32-gavgpool-cw/neon-x4.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700650 "src/f32-gavgpool/7p7x-minmax-neon-c4.c",
651 "src/f32-gavgpool/7x-minmax-neon-c4.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700652 "src/f32-gemm/gen/1x8-minmax-neon-lane-ld64.c",
653 "src/f32-gemm/gen/4x2-minmax-neon-lane-ld64.c",
654 "src/f32-gemm/gen/4x8-minmax-neon-lane-ld128.c",
655 "src/f32-gemm/gen/4x8-minmax-neon-lane-ld64.c",
656 "src/f32-gemm/gen/5x8-minmax-neon-lane-ld64.c",
657 "src/f32-gemm/gen/6x8-minmax-neon-lane-ld64.c",
658 "src/f32-gemm/gen/6x8-minmax-neon-lane-ld128.c",
659 "src/f32-gemm/gen/1x8-minmax-neon-dup-ld64.c",
660 "src/f32-gemm/gen/4x8-minmax-neon-dup-ld128.c",
661 "src/f32-gemm/gen/4x8-minmax-neon-dup-ld64.c",
662 "src/f32-gemm/gen/6x8-minmax-neon-dup-ld64.c",
663 "src/f32-gemm/gen/6x8-minmax-neon-dup-ld128.c",
664 "src/f32-gemm/gen/1x8s4-minmax-neon.c",
665 "src/f32-gemm/gen/4x8s4-minmax-neon.c",
666 "src/f32-gemm/gen/6x8s4-minmax-neon.c",
667 "src/f32-gemm/gen/8x8s4-minmax-neon.c",
668 "src/f32-gemm/gen-inc/1x8inc-minmax-neon-lane-ld64.c",
669 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-lane-ld128.c",
670 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-lane-ld64.c",
671 "src/f32-gemm/gen-inc/5x8inc-minmax-neon-lane-ld64.c",
672 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-lane-ld64.c",
673 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-lane-ld128.c",
674 "src/f32-gemm/gen-inc/1x8inc-minmax-neon-dup-ld64.c",
675 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-dup-ld128.c",
676 "src/f32-gemm/gen-inc/4x8inc-minmax-neon-dup-ld64.c",
677 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-dup-ld64.c",
678 "src/f32-gemm/gen-inc/6x8inc-minmax-neon-dup-ld128.c",
679 "src/f32-gemm/gen-inc/1x8s4inc-minmax-neon.c",
680 "src/f32-gemm/gen-inc/4x8s4inc-minmax-neon.c",
681 "src/f32-gemm/gen-inc/6x8s4inc-minmax-neon.c",
682 "src/f32-gemm/gen-inc/8x8s4inc-minmax-neon.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800683 "src/f32-hswish/gen/neon-x4.c",
684 "src/f32-hswish/gen/neon-x8.c",
Marat Dukhan660fd192020-03-10 04:55:30 -0700685 "src/f32-ibilinear/gen/neon-c4.c",
686 "src/f32-ibilinear/gen/neon-c8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700687 "src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c",
688 "src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c",
689 "src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c",
690 "src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c",
691 "src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c",
692 "src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c",
693 "src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c",
694 "src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c",
695 "src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c",
696 "src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c",
697 "src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c",
698 "src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c",
699 "src/f32-igemm/gen/1x8s4-minmax-neon.c",
700 "src/f32-igemm/gen/4x8s4-minmax-neon.c",
701 "src/f32-igemm/gen/6x8s4-minmax-neon.c",
702 "src/f32-igemm/gen/8x8s4-minmax-neon.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700703 "src/f32-maxpool/9p8x-minmax-neon-c4.c",
704 "src/f32-pavgpool/9p8x-minmax-neon-c4.c",
705 "src/f32-pavgpool/9x-minmax-neon-c4.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700706 "src/f32-ppmm/gen/4x8-minmax-neon.c",
707 "src/f32-ppmm/gen/8x8-minmax-neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800708 "src/f32-prelu/gen/neon-2x4.c",
709 "src/f32-prelu/gen/neon-2x8.c",
Marat Dukhan8137e4c2020-01-25 12:56:58 -0800710 "src/f32-raddstoreexpminusmax/gen/neon-p5-x4.c",
711 "src/f32-raddstoreexpminusmax/gen/neon-p5-x8.c",
712 "src/f32-raddstoreexpminusmax/gen/neon-p5-x8-acc2.c",
713 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12.c",
714 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc2.c",
715 "src/f32-raddstoreexpminusmax/gen/neon-p5-x12-acc3.c",
716 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16.c",
717 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc2.c",
718 "src/f32-raddstoreexpminusmax/gen/neon-p5-x16-acc4.c",
719 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20.c",
720 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc2.c",
721 "src/f32-raddstoreexpminusmax/gen/neon-p5-x20-acc5.c",
722 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x4.c",
723 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8.c",
724 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x8-acc2.c",
725 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12.c",
726 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc2.c",
727 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x12-acc3.c",
728 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16.c",
729 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc2.c",
730 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x16-acc4.c",
731 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20.c",
732 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc2.c",
733 "src/f32-raddstoreexpminusmax/gen/neon-lut64-p2-x20-acc5.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700734 "src/f32-rmax/neon.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -0800735 "src/f32-sigmoid/gen/neon-frac-p9-p10-nr1recps-x16.c",
Marat Dukhan4a24a582020-01-06 13:30:00 -0800736 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x4.c",
737 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x8.c",
738 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x12.c",
739 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x16.c",
740 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x20.c",
741 "src/f32-sigmoid/gen/neon-rr2-p5-nr2recps-x24.c",
742 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x4.c",
743 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x8.c",
744 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x12.c",
745 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x16.c",
746 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x20.c",
747 "src/f32-sigmoid/gen/neon-rr2-lut64-p2-nr2recps-x24.c",
748 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x4.c",
749 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x8.c",
750 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x12.c",
751 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x16.c",
752 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x20.c",
753 "src/f32-sigmoid/gen/neon-rr2-lut2048-p1-nr2recps-x24.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700754 "src/f32-vbinary/gen/vadd-minmax-neon-x4.c",
755 "src/f32-vbinary/gen/vadd-minmax-neon-x8.c",
756 "src/f32-vbinary/gen/vaddc-minmax-neon-x4.c",
757 "src/f32-vbinary/gen/vaddc-minmax-neon-x8.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -0800758 "src/f32-vbinary/gen/vmax-neon-x4.c",
759 "src/f32-vbinary/gen/vmax-neon-x8.c",
760 "src/f32-vbinary/gen/vmaxc-neon-x4.c",
761 "src/f32-vbinary/gen/vmaxc-neon-x8.c",
762 "src/f32-vbinary/gen/vmin-neon-x4.c",
763 "src/f32-vbinary/gen/vmin-neon-x8.c",
764 "src/f32-vbinary/gen/vminc-neon-x4.c",
765 "src/f32-vbinary/gen/vminc-neon-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700766 "src/f32-vbinary/gen/vmul-minmax-neon-x4.c",
767 "src/f32-vbinary/gen/vmul-minmax-neon-x8.c",
768 "src/f32-vbinary/gen/vmulc-minmax-neon-x4.c",
769 "src/f32-vbinary/gen/vmulc-minmax-neon-x8.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -0700770 "src/f32-vbinary/gen/vrsqrdiffc-neon-x4.c",
771 "src/f32-vbinary/gen/vrsqrdiffc-neon-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700772 "src/f32-vbinary/gen/vrsubc-minmax-neon-x4.c",
773 "src/f32-vbinary/gen/vrsubc-minmax-neon-x8.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -0700774 "src/f32-vbinary/gen/vsqrdiff-neon-x4.c",
775 "src/f32-vbinary/gen/vsqrdiff-neon-x8.c",
776 "src/f32-vbinary/gen/vsqrdiffc-neon-x4.c",
777 "src/f32-vbinary/gen/vsqrdiffc-neon-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700778 "src/f32-vbinary/gen/vsub-minmax-neon-x4.c",
779 "src/f32-vbinary/gen/vsub-minmax-neon-x8.c",
780 "src/f32-vbinary/gen/vsubc-minmax-neon-x4.c",
781 "src/f32-vbinary/gen/vsubc-minmax-neon-x8.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700782 "src/f32-vmulcaddc/gen/c4-minmax-neon-2x.c",
783 "src/f32-vmulcaddc/gen/c8-minmax-neon-2x.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -0700784 "src/f32-vrnd/gen/vrndne-neon-x4.c",
785 "src/f32-vrnd/gen/vrndne-neon-x8.c",
786 "src/f32-vrnd/gen/vrndz-neon-x4.c",
787 "src/f32-vrnd/gen/vrndz-neon-x8.c",
788 "src/f32-vrnd/gen/vrndu-neon-x4.c",
789 "src/f32-vrnd/gen/vrndu-neon-x8.c",
790 "src/f32-vrnd/gen/vrndd-neon-x4.c",
791 "src/f32-vrnd/gen/vrndd-neon-x8.c",
Marat Dukhan5020b962020-06-08 13:30:10 -0700792 "src/f32-vunary/gen/vabs-neon-x4.c",
793 "src/f32-vunary/gen/vabs-neon-x8.c",
794 "src/f32-vunary/gen/vneg-neon-x4.c",
795 "src/f32-vunary/gen/vneg-neon-x8.c",
796 "src/f32-vunary/gen/vsqr-neon-x4.c",
797 "src/f32-vunary/gen/vsqr-neon-x8.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700798 "src/q8-avgpool/9p8x-minmax-neon-c8.c",
799 "src/q8-avgpool/9x-minmax-neon-c8.c",
Marat Dukhande06f492020-04-09 00:19:31 -0700800 "src/q8-dwconv/up8x9-minmax-neon.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700801 "src/q8-gavgpool/7p7x-minmax-neon-c8.c",
802 "src/q8-gavgpool/7x-minmax-neon-c8.c",
Marat Dukhande06f492020-04-09 00:19:31 -0700803 "src/q8-gemm/4x8-minmax-neon.c",
804 "src/q8-gemm/8x8-minmax-neon.c",
805 "src/q8-igemm/4x8-minmax-neon.c",
806 "src/q8-igemm/8x8-minmax-neon.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700807 "src/q8-vadd/minmax-neon.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -0700808 "src/u8-clamp/neon-x64.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700809 "src/u8-maxpool/9p8x-minmax-neon-c16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700810 "src/u8-rmax/neon.c",
Marat Dukhan3bb3bfc2020-05-19 17:42:46 -0700811 "src/x32-fill/neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700812 "src/x32-packx/x4-neon-st4.c",
Marat Dukhan63523d42020-05-22 17:07:33 -0700813 "src/x32-pad/neon.c",
Marat Dukhan57dccd82020-04-14 00:53:10 -0700814 "src/x32-unpool/neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700815 "src/x32-zip/x2-neon.c",
816 "src/x32-zip/x3-neon.c",
817 "src/x32-zip/x4-neon.c",
818 "src/x32-zip/xm-neon.c",
819 "src/x8-zip/x2-neon.c",
820 "src/x8-zip/x3-neon.c",
821 "src/x8-zip/x4-neon.c",
822 "src/x8-zip/xm-neon.c",
Marat Dukhan075088a2020-05-12 19:42:12 -0700823 "src/math/roundne-neon-addsub.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700824 "src/math/roundd-neon-addsub.c",
825 "src/math/roundd-neon-cvt.c",
826 "src/math/roundu-neon-addsub.c",
827 "src/math/roundu-neon-cvt.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700828 "src/math/roundz-neon-addsub.c",
829 "src/math/roundz-neon-cvt.c",
Marat Dukhan68b3b452020-01-02 10:11:15 -0800830 "src/math/sigmoid-neon-frac-p9-p10-nr1recps.c",
Marat Dukhan77221d32020-01-06 10:04:39 -0800831 "src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c",
832 "src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c",
833 "src/math/sigmoid-neon-rr1-p5-nr2recps.c",
834 "src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c",
835 "src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c",
836 "src/math/sigmoid-neon-rr2-p5-nr2recps.c",
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700837 "src/requantization/precise-neon.c",
838 "src/requantization/fp32-neon.c",
839 "src/requantization/q31-neon.c",
840 "src/requantization/gemmlowp-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700841]
842
843NEONFMA_UKERNELS = [
Marat Dukhan660fd192020-03-10 04:55:30 -0700844 "src/f32-ibilinear/gen/neonfma-c4.c",
845 "src/f32-ibilinear/gen/neonfma-c8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700846 "src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c",
847 "src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c",
848 "src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c",
849 "src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c",
850 "src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c",
851 "src/f32-igemm/gen/1x8s4-minmax-neonfma.c",
852 "src/f32-igemm/gen/4x8s4-minmax-neonfma.c",
853 "src/f32-igemm/gen/6x8s4-minmax-neonfma.c",
854 "src/f32-igemm/gen/8x8s4-minmax-neonfma.c",
Marat Dukhanf5425ea2020-04-24 01:46:00 -0700855 "src/f32-dwconv/gen/up4x4-minmax-neonfma.c",
856 "src/f32-dwconv/gen/up4x4-minmax-neonfma-acc2.c",
857 "src/f32-dwconv/gen/up8x4-minmax-neonfma.c",
858 "src/f32-dwconv/gen/up8x4-minmax-neonfma-acc2.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700859 "src/f32-dwconv/gen/up4x9-minmax-neonfma.c",
860 "src/f32-dwconv/gen/up4x9-minmax-neonfma-acc2.c",
861 "src/f32-dwconv/gen/up8x9-minmax-neonfma.c",
862 "src/f32-dwconv/gen/up8x9-minmax-neonfma-acc2.c",
Marat Dukhanf5425ea2020-04-24 01:46:00 -0700863 "src/f32-dwconv/gen/up4x25-minmax-neonfma.c",
864 "src/f32-dwconv/gen/up4x25-minmax-neonfma-acc2.c",
865 "src/f32-dwconv/gen/up8x25-minmax-neonfma.c",
866 "src/f32-dwconv/gen/up8x25-minmax-neonfma-acc2.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700867 "src/f32-gemm/gen/1x8-minmax-neonfma-dup-ld64.c",
868 "src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld128.c",
869 "src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld64.c",
870 "src/f32-gemm/gen/6x8-minmax-neonfma-dup-ld64.c",
871 "src/f32-gemm/gen/6x8-minmax-neonfma-dup-ld128.c",
872 "src/f32-gemm/gen/1x8s4-minmax-neonfma.c",
873 "src/f32-gemm/gen/4x8s4-minmax-neonfma.c",
874 "src/f32-gemm/gen/6x8s4-minmax-neonfma.c",
875 "src/f32-gemm/gen/8x8s4-minmax-neonfma.c",
876 "src/f32-gemm/gen-inc/1x8inc-minmax-neonfma-dup-ld64.c",
877 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-dup-ld128.c",
878 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-dup-ld64.c",
879 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-dup-ld64.c",
880 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-dup-ld128.c",
881 "src/f32-gemm/gen-inc/1x8s4inc-minmax-neonfma.c",
882 "src/f32-gemm/gen-inc/4x8s4inc-minmax-neonfma.c",
883 "src/f32-gemm/gen-inc/6x8s4inc-minmax-neonfma.c",
884 "src/f32-gemm/gen-inc/8x8s4inc-minmax-neonfma.c",
Marat Dukhan662faa02019-12-09 22:48:16 -0800885 "src/f32-hswish/gen/neonfma-x4.c",
886 "src/f32-hswish/gen/neonfma-x8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -0700887 "src/f32-ppmm/gen/4x8-minmax-neonfma.c",
888 "src/f32-ppmm/gen/8x8-minmax-neonfma.c",
Marat Dukhan8137e4c2020-01-25 12:56:58 -0800889 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x4.c",
890 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8.c",
891 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x8-acc2.c",
892 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12.c",
893 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc2.c",
894 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x12-acc3.c",
895 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16.c",
896 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc2.c",
897 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x16-acc4.c",
898 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20.c",
899 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc2.c",
900 "src/f32-raddstoreexpminusmax/gen/neonfma-p5-x20-acc5.c",
901 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x4.c",
902 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8.c",
903 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x8-acc2.c",
904 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12.c",
905 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc2.c",
906 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x12-acc3.c",
907 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16.c",
908 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc2.c",
909 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x16-acc4.c",
910 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20.c",
911 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc2.c",
912 "src/f32-raddstoreexpminusmax/gen/neonfma-lut64-p2-x20-acc5.c",
Marat Dukhan4a24a582020-01-06 13:30:00 -0800913 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x4.c",
914 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x8.c",
915 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x12.c",
916 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x16.c",
917 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x20.c",
918 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2fma-x24.c",
919 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x4.c",
920 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x8.c",
921 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x12.c",
922 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x16.c",
923 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x20.c",
924 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr1recps1fma-x24.c",
925 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x4.c",
926 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x8.c",
927 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x12.c",
928 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x16.c",
929 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x20.c",
930 "src/f32-sigmoid/gen/neonfma-rr1-p5-nr2recps-x24.c",
931 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x4.c",
932 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x8.c",
933 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x12.c",
934 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x16.c",
935 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x20.c",
936 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2fma-x24.c",
937 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x4.c",
938 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x8.c",
939 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x12.c",
940 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x16.c",
941 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x20.c",
942 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr1recps1fma-x24.c",
943 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x4.c",
944 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x8.c",
945 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x12.c",
946 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x16.c",
947 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x20.c",
948 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-nr2recps-x24.c",
949 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x4.c",
950 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x8.c",
951 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x12.c",
952 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x16.c",
953 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x20.c",
954 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2fma-x24.c",
955 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c",
956 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c",
957 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c",
958 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c",
959 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c",
960 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c",
961 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x4.c",
962 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x8.c",
963 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x12.c",
964 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x16.c",
965 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x20.c",
966 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-nr2recps-x24.c",
Marat Dukhan99936602020-04-11 16:47:01 -0700967 "src/f32-vmulcaddc/gen/c4-minmax-neonfma-2x.c",
968 "src/f32-vmulcaddc/gen/c8-minmax-neonfma-2x.c",
Marat Dukhan797a8fe2019-11-14 20:21:57 -0800969 "src/math/exp-neonfma-lut64-p2.c",
970 "src/math/exp-neonfma-p5.c",
Marat Dukhan189ae802019-11-26 11:28:44 -0800971 "src/math/expminus-neonfma-lut2048-p1.c",
972 "src/math/expminus-neonfma-lut64-p2.c",
Marat Dukhan346a9e52019-11-15 09:06:30 -0800973 "src/math/expminus-neonfma-p5.c",
Marat Dukhan77221d32020-01-06 10:04:39 -0800974 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma.c",
975 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2fma.c",
976 "src/math/sigmoid-neonfma-rr1-lut2048-p1-nr2recps.c",
977 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr1recps1fma.c",
978 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2fma.c",
979 "src/math/sigmoid-neonfma-rr1-lut64-p2-nr2recps.c",
980 "src/math/sigmoid-neonfma-rr1-p5-nr1recps1fma.c",
981 "src/math/sigmoid-neonfma-rr1-p5-nr2fma.c",
982 "src/math/sigmoid-neonfma-rr1-p5-nr2recps.c",
983 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr1recps1fma.c",
984 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2fma.c",
985 "src/math/sigmoid-neonfma-rr2-lut2048-p1-nr2recps.c",
986 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr1recps1fma.c",
987 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2fma.c",
988 "src/math/sigmoid-neonfma-rr2-lut64-p2-nr2recps.c",
989 "src/math/sigmoid-neonfma-rr2-p5-nr1recps1fma.c",
990 "src/math/sigmoid-neonfma-rr2-p5-nr2fma.c",
991 "src/math/sigmoid-neonfma-rr2-p5-nr2recps.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700992]
993
994AARCH64_NEONFMA_UKERNELS = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700995 "src/f32-vbinary/gen/vdiv-minmax-neon-x4.c",
996 "src/f32-vbinary/gen/vdiv-minmax-neon-x8.c",
997 "src/f32-vbinary/gen/vdivc-minmax-neon-x4.c",
998 "src/f32-vbinary/gen/vdivc-minmax-neon-x8.c",
999 "src/f32-vbinary/gen/vrdivc-minmax-neon-x4.c",
1000 "src/f32-vbinary/gen/vrdivc-minmax-neon-x8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001001 "src/f32-gemm/gen/1x8-minmax-neonfma-lane-ld64.c",
1002 "src/f32-gemm/gen/4x2-minmax-neonfma-lane-ld64.c",
1003 "src/f32-gemm/gen/4x8-minmax-neonfma-lane-ld128.c",
1004 "src/f32-gemm/gen/4x8-minmax-neonfma-lane-ld64.c",
1005 "src/f32-gemm/gen/5x8-minmax-neonfma-lane-ld64.c",
1006 "src/f32-gemm/gen/6x8-minmax-neonfma-lane-ld64.c",
1007 "src/f32-gemm/gen/6x8-minmax-neonfma-lane-ld128.c",
1008 "src/f32-gemm/gen-inc/1x8inc-minmax-neonfma-lane-ld64.c",
1009 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-lane-ld128.c",
1010 "src/f32-gemm/gen-inc/4x8inc-minmax-neonfma-lane-ld64.c",
1011 "src/f32-gemm/gen-inc/5x8inc-minmax-neonfma-lane-ld64.c",
1012 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-lane-ld64.c",
1013 "src/f32-gemm/gen-inc/6x8inc-minmax-neonfma-lane-ld128.c",
1014 "src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c",
1015 "src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c",
1016 "src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c",
1017 "src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c",
1018 "src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c",
1019 "src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c",
1020 "src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c",
Marat Dukhan56b10cd2020-05-18 09:35:49 -07001021 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neonfma-2x1.c",
1022 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neonfma-2x1.c",
Marat Dukhance7a3f82020-05-17 21:46:44 -07001023 "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neonfma-2x2.c",
1024 "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neonfma-2x2.c",
Marat Dukhan56b10cd2020-05-18 09:35:49 -07001025 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neonfma-2x1.c",
1026 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neonfma-2x1.c",
Marat Dukhance7a3f82020-05-17 21:46:44 -07001027 "src/f32-conv-hwc/gen/3x3s2p1c3x4-neonfma-2x2.c",
1028 "src/f32-conv-hwc/gen/3x3s2p1c3x8-neonfma-2x2.c",
Marat Dukhan1f29b802020-05-15 23:46:39 -07001029 "src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c",
1030 "src/f32-dwconv-chw/3x3p1-neonfma.c",
1031 "src/f32-dwconv-chw/5x5p2-neonfma.c",
1032 "src/f32-dwconv-chw/3x3s2p1-neonfma.c",
1033 "src/f32-dwconv-chw/5x5s2p2-neonfma.c",
Marat Dukhan4a24a582020-01-06 13:30:00 -08001034 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c",
1035 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c",
1036 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c",
1037 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x16.c",
1038 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x20.c",
1039 "src/f32-sigmoid/gen/neonfma-rr1-p5-div-x24.c",
1040 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x4.c",
1041 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x8.c",
1042 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x12.c",
1043 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x16.c",
1044 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x20.c",
1045 "src/f32-sigmoid/gen/neonfma-rr1-lut64-p2-div-x24.c",
1046 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x4.c",
1047 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x8.c",
1048 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x12.c",
1049 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x16.c",
1050 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x20.c",
1051 "src/f32-sigmoid/gen/neonfma-rr1-lut2048-p1-div-x24.c",
Marat Dukhan355ab432020-04-09 19:01:52 -07001052 "src/f32-spmm/gen/12x1-minmax-neonfma.c",
1053 "src/f32-spmm/gen/12x2-minmax-neonfma.c",
1054 "src/f32-spmm/gen/12x4-minmax-neonfma.c",
1055 "src/f32-spmm/gen/16x1-minmax-neonfma-pipelined.c",
1056 "src/f32-spmm/gen/16x1-minmax-neonfma-unroll2.c",
1057 "src/f32-spmm/gen/16x1-minmax-neonfma.c",
1058 "src/f32-spmm/gen/16x2-minmax-neonfma.c",
1059 "src/f32-spmm/gen/16x4-minmax-neonfma.c",
1060 "src/f32-spmm/gen/4x1-minmax-neonfma-pipelined.c",
1061 "src/f32-spmm/gen/4x1-minmax-neonfma-unroll2.c",
1062 "src/f32-spmm/gen/4x1-minmax-neonfma.c",
1063 "src/f32-spmm/gen/4x2-minmax-neonfma.c",
1064 "src/f32-spmm/gen/4x4-minmax-neonfma.c",
1065 "src/f32-spmm/gen/8x1-minmax-neonfma-pipelined.c",
1066 "src/f32-spmm/gen/8x1-minmax-neonfma-unroll2.c",
1067 "src/f32-spmm/gen/8x1-minmax-neonfma.c",
1068 "src/f32-spmm/gen/8x2-minmax-neonfma.c",
1069 "src/f32-spmm/gen/8x4-minmax-neonfma.c",
Marat Dukhan77221d32020-01-06 10:04:39 -08001070 "src/math/sigmoid-neonfma-rr1-lut2048-p1-div.c",
1071 "src/math/sigmoid-neonfma-rr1-lut64-p2-div.c",
1072 "src/math/sigmoid-neonfma-rr1-p5-div.c",
1073 "src/math/sigmoid-neonfma-rr2-lut2048-p1-div.c",
1074 "src/math/sigmoid-neonfma-rr2-lut64-p2-div.c",
1075 "src/math/sigmoid-neonfma-rr2-p5-div.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001076]
1077
Marat Dukhan8853b822020-05-07 12:19:01 -07001078NEONV8_UKERNELS = [
Marat Dukhaneecf8fd2020-06-09 08:59:37 -07001079 "src/f32-vrnd/gen/vrndne-neonv8-x4.c",
1080 "src/f32-vrnd/gen/vrndne-neonv8-x8.c",
1081 "src/f32-vrnd/gen/vrndz-neonv8-x4.c",
1082 "src/f32-vrnd/gen/vrndz-neonv8-x8.c",
1083 "src/f32-vrnd/gen/vrndu-neonv8-x4.c",
1084 "src/f32-vrnd/gen/vrndu-neonv8-x8.c",
1085 "src/f32-vrnd/gen/vrndd-neonv8-x4.c",
1086 "src/f32-vrnd/gen/vrndd-neonv8-x8.c",
Marat Dukhan8853b822020-05-07 12:19:01 -07001087 "src/math/roundne-neonv8.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001088 "src/math/roundd-neonv8.c",
1089 "src/math/roundu-neonv8.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001090 "src/math/roundz-neonv8.c",
Marat Dukhan8853b822020-05-07 12:19:01 -07001091]
1092
Marat Dukhan08c4a432019-10-03 09:29:21 -07001093AARCH64_NEONFP16ARITH_UKERNELS = [
Frank Barchardb1966592020-05-12 13:47:06 -07001094 "src/f16-clamp/gen/neonfp16arith-x16.c",
1095 "src/f16-clamp/gen/neonfp16arith-x8.c",
Frank Barchard5a599a62020-06-04 20:12:44 -07001096 "src/f16-dwconv/gen/up16x25-minmax-neonfp16arith-acc2.c",
1097 "src/f16-dwconv/gen/up16x25-minmax-neonfp16arith.c",
1098 "src/f16-dwconv/gen/up16x4-minmax-neonfp16arith-acc2.c",
1099 "src/f16-dwconv/gen/up16x4-minmax-neonfp16arith.c",
1100 "src/f16-dwconv/gen/up16x9-minmax-neonfp16arith-acc2.c",
1101 "src/f16-dwconv/gen/up16x9-minmax-neonfp16arith.c",
1102 "src/f16-dwconv/gen/up8x25-minmax-neonfp16arith-acc2.c",
1103 "src/f16-dwconv/gen/up8x25-minmax-neonfp16arith.c",
1104 "src/f16-dwconv/gen/up8x4-minmax-neonfp16arith-acc2.c",
1105 "src/f16-dwconv/gen/up8x4-minmax-neonfp16arith.c",
1106 "src/f16-dwconv/gen/up8x9-minmax-neonfp16arith-acc2.c",
1107 "src/f16-dwconv/gen/up8x9-minmax-neonfp16arith.c",
Frank Barchard0bb49a72020-06-04 11:35:11 -07001108 "src/f16-gavgpool/7p7x-minmax-neonfp16arith-c8.c",
1109 "src/f16-gavgpool/7x-minmax-neonfp16arith-c8.c",
Frank Barchardb1966592020-05-12 13:47:06 -07001110 "src/f16-hswish/gen/neonfp16arith-x16.c",
1111 "src/f16-hswish/gen/neonfp16arith-x8.c",
1112 "src/f16-prelu/gen/neonfp16arith-2x16.c",
1113 "src/f16-prelu/gen/neonfp16arith-2x8.c",
Frank Barchardd793f6c2020-05-08 13:37:43 -07001114 "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x16.c",
1115 "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x8.c",
1116 "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x16.c",
1117 "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x8.c",
1118 "src/f16-vbinary/gen/vdivc-minmax-neonfp16arith-x16.c",
1119 "src/f16-vbinary/gen/vdivc-minmax-neonfp16arith-x8.c",
1120 "src/f16-vbinary/gen/vdiv-minmax-neonfp16arith-x16.c",
1121 "src/f16-vbinary/gen/vdiv-minmax-neonfp16arith-x8.c",
1122 "src/f16-vbinary/gen/vmaxc-neonfp16arith-x16.c",
1123 "src/f16-vbinary/gen/vmaxc-neonfp16arith-x8.c",
1124 "src/f16-vbinary/gen/vmax-neonfp16arith-x16.c",
1125 "src/f16-vbinary/gen/vmax-neonfp16arith-x8.c",
1126 "src/f16-vbinary/gen/vminc-neonfp16arith-x16.c",
1127 "src/f16-vbinary/gen/vminc-neonfp16arith-x8.c",
1128 "src/f16-vbinary/gen/vmin-neonfp16arith-x16.c",
1129 "src/f16-vbinary/gen/vmin-neonfp16arith-x8.c",
1130 "src/f16-vbinary/gen/vmulc-minmax-neonfp16arith-x16.c",
1131 "src/f16-vbinary/gen/vmulc-minmax-neonfp16arith-x8.c",
1132 "src/f16-vbinary/gen/vmul-minmax-neonfp16arith-x16.c",
1133 "src/f16-vbinary/gen/vmul-minmax-neonfp16arith-x8.c",
1134 "src/f16-vbinary/gen/vrdivc-minmax-neonfp16arith-x16.c",
1135 "src/f16-vbinary/gen/vrdivc-minmax-neonfp16arith-x8.c",
1136 "src/f16-vbinary/gen/vrsubc-minmax-neonfp16arith-x16.c",
1137 "src/f16-vbinary/gen/vrsubc-minmax-neonfp16arith-x8.c",
1138 "src/f16-vbinary/gen/vsubc-minmax-neonfp16arith-x16.c",
1139 "src/f16-vbinary/gen/vsubc-minmax-neonfp16arith-x8.c",
1140 "src/f16-vbinary/gen/vsub-minmax-neonfp16arith-x16.c",
1141 "src/f16-vbinary/gen/vsub-minmax-neonfp16arith-x8.c",
Frank Barchard2a1049c2020-06-03 02:31:27 -07001142 "src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c",
1143 "src/f16-vmulcaddc/gen/c8-minmax-neonfp16arith-2x.c",
Frank Barchard83ea9392020-04-20 20:15:14 -07001144 "src/f16-gemm/gen/1x8-minmax-neonfp16arith-ld64.c",
1145 "src/f16-gemm/gen/4x8-minmax-neonfp16arith-ld64.c",
1146 "src/f16-gemm/gen/6x8-minmax-neonfp16arith-ld64.c",
1147 "src/f16-gemm/gen/8x8-minmax-neonfp16arith-ld64.c",
1148 "src/f16-gemm/gen-inc/1x8inc-minmax-neonfp16arith-ld64.c",
1149 "src/f16-gemm/gen-inc/4x8inc-minmax-neonfp16arith-ld64.c",
1150 "src/f16-gemm/gen-inc/6x8inc-minmax-neonfp16arith-ld64.c",
1151 "src/f16-gemm/gen-inc/8x8inc-minmax-neonfp16arith-ld64.c",
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001152 "src/f16-igemm/gen/1x8-minmax-neonfp16arith-ld64.c",
1153 "src/f16-igemm/gen/4x8-minmax-neonfp16arith-ld64.c",
1154 "src/f16-igemm/gen/6x8-minmax-neonfp16arith-ld64.c",
1155 "src/f16-igemm/gen/8x8-minmax-neonfp16arith-ld64.c",
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001156 "src/f16-gemm/gen/1x16-minmax-neonfp16arith-ld64.c",
1157 "src/f16-gemm/gen/4x16-minmax-neonfp16arith-ld64.c",
1158 "src/f16-gemm/gen/6x16-minmax-neonfp16arith-ld64.c",
1159 "src/f16-gemm/gen/8x16-minmax-neonfp16arith-ld64.c",
1160 "src/f16-gemm/gen-inc/1x16inc-minmax-neonfp16arith-ld64.c",
1161 "src/f16-gemm/gen-inc/4x16inc-minmax-neonfp16arith-ld64.c",
1162 "src/f16-gemm/gen-inc/6x16inc-minmax-neonfp16arith-ld64.c",
1163 "src/f16-gemm/gen-inc/8x16inc-minmax-neonfp16arith-ld64.c",
1164 "src/f16-igemm/gen/1x16-minmax-neonfp16arith-ld64.c",
1165 "src/f16-igemm/gen/4x16-minmax-neonfp16arith-ld64.c",
1166 "src/f16-igemm/gen/6x16-minmax-neonfp16arith-ld64.c",
1167 "src/f16-igemm/gen/8x16-minmax-neonfp16arith-ld64.c",
Marat Dukhan355ab432020-04-09 19:01:52 -07001168 "src/f16-spmm/gen/8x1-minmax-neonfp16arith.c",
1169 "src/f16-spmm/gen/8x1-minmax-neonfp16arith-unroll2.c",
1170 "src/f16-spmm/gen/16x1-minmax-neonfp16arith.c",
1171 "src/f16-spmm/gen/16x1-minmax-neonfp16arith-unroll2.c",
1172 "src/f16-spmm/gen/24x1-minmax-neonfp16arith.c",
1173 "src/f16-spmm/gen/24x1-minmax-neonfp16arith-unroll2.c",
1174 "src/f16-spmm/gen/32x1-minmax-neonfp16arith.c",
1175 "src/f16-spmm/gen/32x1-minmax-neonfp16arith-unroll2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001176]
1177
1178SSE_UKERNELS = [
Marat Dukhan99936602020-04-11 16:47:01 -07001179 "src/f32-avgpool/9p8x-minmax-sse-c4.c",
1180 "src/f32-avgpool/9x-minmax-sse-c4.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001181 "src/f32-clamp/gen/sse-x4.c",
1182 "src/f32-clamp/gen/sse-x8.c",
Erich Elsenb1233402020-06-08 15:53:15 -07001183 "src/f32-conv-hwc2chw/3x3s2p1c3x4-sse-1x1.c",
1184 "src/f32-conv-hwc2chw/3x3s2p1c3x4-sse-2x2.c",
Marat Dukhan1f29b802020-05-15 23:46:39 -07001185 "src/f32-dwconv-chw/3x3p1-sse.c",
1186 "src/f32-dwconv-chw/3x3s2p1-sse.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001187 "src/f32-dwconv/gen/up4x25-minmax-sse-acc2.c",
1188 "src/f32-dwconv/gen/up4x25-minmax-sse.c",
1189 "src/f32-dwconv/gen/up4x4-minmax-sse-acc2.c",
1190 "src/f32-dwconv/gen/up4x4-minmax-sse.c",
1191 "src/f32-dwconv/gen/up4x9-minmax-sse-acc2.c",
1192 "src/f32-dwconv/gen/up4x9-minmax-sse.c",
1193 "src/f32-dwconv/gen/up8x25-minmax-sse-acc2.c",
1194 "src/f32-dwconv/gen/up8x25-minmax-sse.c",
1195 "src/f32-dwconv/gen/up8x4-minmax-sse-acc2.c",
1196 "src/f32-dwconv/gen/up8x4-minmax-sse.c",
1197 "src/f32-dwconv/gen/up8x9-minmax-sse-acc2.c",
1198 "src/f32-dwconv/gen/up8x9-minmax-sse.c",
Marat Dukhan1f29b802020-05-15 23:46:39 -07001199 "src/f32-gavgpool-cw/sse-x4.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001200 "src/f32-gavgpool/7p7x-minmax-sse-c4.c",
1201 "src/f32-gavgpool/7x-minmax-sse-c4.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001202 "src/f32-gemm/gen/1x8-minmax-sse-dup.c",
1203 "src/f32-gemm/gen/1x8-minmax-sse-load1.c",
1204 "src/f32-gemm/gen/1x8s4-minmax-sse.c",
1205 "src/f32-gemm/gen/4x2c4-minmax-sse.c",
1206 "src/f32-gemm/gen/4x8-minmax-sse-dup.c",
1207 "src/f32-gemm/gen/4x8-minmax-sse-load1.c",
1208 "src/f32-gemm/gen/4x8s4-minmax-sse.c",
1209 "src/f32-gemm/gen-inc/1x8inc-minmax-sse-dup.c",
1210 "src/f32-gemm/gen-inc/1x8inc-minmax-sse-load1.c",
1211 "src/f32-gemm/gen-inc/1x8s4inc-minmax-sse.c",
1212 "src/f32-gemm/gen-inc/4x8inc-minmax-sse-dup.c",
1213 "src/f32-gemm/gen-inc/4x8inc-minmax-sse-load1.c",
1214 "src/f32-gemm/gen-inc/4x8s4inc-minmax-sse.c",
Marat Dukhan662faa02019-12-09 22:48:16 -08001215 "src/f32-hswish/gen/sse-x4.c",
1216 "src/f32-hswish/gen/sse-x8.c",
Marat Dukhan660fd192020-03-10 04:55:30 -07001217 "src/f32-ibilinear/gen/sse-c4.c",
1218 "src/f32-ibilinear/gen/sse-c8.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001219 "src/f32-igemm/gen/1x8-minmax-sse-dup.c",
1220 "src/f32-igemm/gen/1x8-minmax-sse-load1.c",
1221 "src/f32-igemm/gen/1x8s4-minmax-sse.c",
1222 "src/f32-igemm/gen/4x2c4-minmax-sse.c",
1223 "src/f32-igemm/gen/4x8-minmax-sse-dup.c",
1224 "src/f32-igemm/gen/4x8-minmax-sse-load1.c",
1225 "src/f32-igemm/gen/4x8s4-minmax-sse.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001226 "src/f32-maxpool/9p8x-minmax-sse-c4.c",
1227 "src/f32-pavgpool/9p8x-minmax-sse-c4.c",
1228 "src/f32-pavgpool/9x-minmax-sse-c4.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001229 "src/f32-ppmm/gen/4x8-minmax-sse.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001230 "src/f32-rmax/sse.c",
Marat Dukhan355ab432020-04-09 19:01:52 -07001231 "src/f32-spmm/gen/4x1-minmax-sse.c",
1232 "src/f32-spmm/gen/8x1-minmax-sse.c",
Erich Elsen6e80fdc2020-06-09 15:35:37 -07001233 "src/f32-spmm/gen/16x1-minmax-sse.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001234 "src/f32-vbinary/gen/vadd-minmax-sse-x4.c",
1235 "src/f32-vbinary/gen/vadd-minmax-sse-x8.c",
1236 "src/f32-vbinary/gen/vaddc-minmax-sse-x4.c",
1237 "src/f32-vbinary/gen/vaddc-minmax-sse-x8.c",
1238 "src/f32-vbinary/gen/vdiv-minmax-sse-x4.c",
1239 "src/f32-vbinary/gen/vdiv-minmax-sse-x8.c",
1240 "src/f32-vbinary/gen/vdivc-minmax-sse-x4.c",
1241 "src/f32-vbinary/gen/vdivc-minmax-sse-x8.c",
Marat Dukhan403b7d42019-12-05 12:49:11 -08001242 "src/f32-vbinary/gen/vmax-sse-x4.c",
1243 "src/f32-vbinary/gen/vmax-sse-x8.c",
1244 "src/f32-vbinary/gen/vmaxc-sse-x4.c",
1245 "src/f32-vbinary/gen/vmaxc-sse-x8.c",
1246 "src/f32-vbinary/gen/vmin-sse-x4.c",
1247 "src/f32-vbinary/gen/vmin-sse-x8.c",
1248 "src/f32-vbinary/gen/vminc-sse-x4.c",
1249 "src/f32-vbinary/gen/vminc-sse-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001250 "src/f32-vbinary/gen/vmul-minmax-sse-x4.c",
1251 "src/f32-vbinary/gen/vmul-minmax-sse-x8.c",
1252 "src/f32-vbinary/gen/vmulc-minmax-sse-x4.c",
1253 "src/f32-vbinary/gen/vmulc-minmax-sse-x8.c",
1254 "src/f32-vbinary/gen/vrdivc-minmax-sse-x4.c",
1255 "src/f32-vbinary/gen/vrdivc-minmax-sse-x8.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -07001256 "src/f32-vbinary/gen/vrsqrdiffc-sse-x4.c",
1257 "src/f32-vbinary/gen/vrsqrdiffc-sse-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001258 "src/f32-vbinary/gen/vrsubc-minmax-sse-x4.c",
1259 "src/f32-vbinary/gen/vrsubc-minmax-sse-x8.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -07001260 "src/f32-vbinary/gen/vsqrdiff-sse-x4.c",
1261 "src/f32-vbinary/gen/vsqrdiff-sse-x8.c",
1262 "src/f32-vbinary/gen/vsqrdiffc-sse-x4.c",
1263 "src/f32-vbinary/gen/vsqrdiffc-sse-x8.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001264 "src/f32-vbinary/gen/vsub-minmax-sse-x4.c",
1265 "src/f32-vbinary/gen/vsub-minmax-sse-x8.c",
1266 "src/f32-vbinary/gen/vsubc-minmax-sse-x4.c",
1267 "src/f32-vbinary/gen/vsubc-minmax-sse-x8.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001268 "src/f32-vmulcaddc/gen/c4-minmax-sse-2x.c",
1269 "src/f32-vmulcaddc/gen/c8-minmax-sse-2x.c",
Marat Dukhan5020b962020-06-08 13:30:10 -07001270 "src/f32-vunary/gen/vabs-sse-x4.c",
1271 "src/f32-vunary/gen/vabs-sse-x8.c",
1272 "src/f32-vunary/gen/vneg-sse-x4.c",
1273 "src/f32-vunary/gen/vneg-sse-x8.c",
1274 "src/f32-vunary/gen/vsqr-sse-x4.c",
1275 "src/f32-vunary/gen/vsqr-sse-x8.c",
Marat Dukhan3bb3bfc2020-05-19 17:42:46 -07001276 "src/x32-fill/sse.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001277 "src/x32-packx/x4-sse.c",
Marat Dukhan63523d42020-05-22 17:07:33 -07001278 "src/x32-pad/sse.c",
Marat Dukhan075088a2020-05-12 19:42:12 -07001279 "src/math/roundne-sse-addsub.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001280 "src/math/roundd-sse-addsub.c",
1281 "src/math/roundu-sse-addsub.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001282 "src/math/roundz-sse-addsub.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001283]
1284
1285SSE2_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -08001286 "src/f32-argmaxpool/9p8x-sse2-c4.c",
1287 "src/f32-argmaxpool/4x-sse2-c4.c",
1288 "src/f32-argmaxpool/9x-sse2-c4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001289 "src/f32-prelu/gen/sse2-2x4.c",
1290 "src/f32-prelu/gen/sse2-2x8.c",
Marat Dukhanb39689d2020-01-24 13:32:20 -08001291 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x4.c",
1292 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8.c",
1293 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x8-acc2.c",
1294 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12.c",
1295 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc2.c",
1296 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x12-acc3.c",
1297 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16.c",
1298 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc2.c",
1299 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x16-acc4.c",
1300 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20.c",
1301 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc2.c",
1302 "src/f32-raddstoreexpminusmax/gen/sse2-p5-x20-acc5.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001303 "src/f32-sigmoid/gen/sse2-p5-div-x4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001304 "src/f32-sigmoid/gen/sse2-p5-div-x8.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001305 "src/f32-sigmoid/gen/sse2-p5-div-x12.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001306 "src/f32-sigmoid/gen/sse2-p5-div-x16.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001307 "src/f32-sigmoid/gen/sse2-p5-div-x20.c",
1308 "src/f32-sigmoid/gen/sse2-p5-div-x24.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -07001309 "src/f32-vrnd/gen/vrndne-sse2-x4.c",
1310 "src/f32-vrnd/gen/vrndne-sse2-x8.c",
1311 "src/f32-vrnd/gen/vrndz-sse2-x4.c",
1312 "src/f32-vrnd/gen/vrndz-sse2-x8.c",
1313 "src/f32-vrnd/gen/vrndu-sse2-x4.c",
1314 "src/f32-vrnd/gen/vrndu-sse2-x8.c",
1315 "src/f32-vrnd/gen/vrndd-sse2-x4.c",
1316 "src/f32-vrnd/gen/vrndd-sse2-x8.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001317 "src/q8-avgpool/9p8x-minmax-sse2-c8.c",
1318 "src/q8-avgpool/9x-minmax-sse2-c8.c",
Marat Dukhande06f492020-04-09 00:19:31 -07001319 "src/q8-igemm/4x4c2-minmax-sse2.c",
1320 "src/q8-dwconv/up8x9-minmax-sse2.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001321 "src/q8-gavgpool/7p7x-minmax-sse2-c8.c",
1322 "src/q8-gavgpool/7x-minmax-sse2-c8.c",
Marat Dukhande06f492020-04-09 00:19:31 -07001323 "src/q8-gemm/2x4c8-minmax-sse2.c",
1324 "src/q8-gemm/4x4c2-minmax-sse2.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001325 "src/q8-vadd/minmax-sse2.c",
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001326 "src/u8-clamp/sse2-x64.c",
Marat Dukhan99936602020-04-11 16:47:01 -07001327 "src/u8-maxpool/9p8x-minmax-sse2-c16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001328 "src/u8-rmax/sse2.c",
Marat Dukhan57dccd82020-04-14 00:53:10 -07001329 "src/x32-unpool/sse2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001330 "src/x32-zip/x2-sse2.c",
1331 "src/x32-zip/x3-sse2.c",
1332 "src/x32-zip/x4-sse2.c",
1333 "src/x32-zip/xm-sse2.c",
1334 "src/x8-zip/x2-sse2.c",
1335 "src/x8-zip/x3-sse2.c",
1336 "src/x8-zip/x4-sse2.c",
1337 "src/x8-zip/xm-sse2.c",
Marat Dukhanffd68402019-11-15 15:19:11 -08001338 "src/math/exp-sse2-p5.c",
1339 "src/math/expminus-sse2-p5.c",
Marat Dukhan075088a2020-05-12 19:42:12 -07001340 "src/math/roundne-sse2-cvt.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001341 "src/math/roundd-sse2-cvt.c",
1342 "src/math/roundu-sse2-cvt.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001343 "src/math/roundz-sse2-cvt.c",
Marat Dukhan80bafd22019-11-18 10:16:01 -08001344 "src/math/sigmoid-sse2-p5-div.c",
Marat Dukhanfe7acb62020-03-09 19:30:05 -07001345 "src/requantization/precise-sse2.c",
1346 "src/requantization/fp32-sse2.c",
1347 "src/requantization/q31-sse2.c",
1348 "src/requantization/gemmlowp-sse2.c",
1349]
1350
1351SSSE3_UKERNELS = [
1352 "src/requantization/precise-ssse3.c",
1353 "src/requantization/q31-ssse3.c",
1354 "src/requantization/gemmlowp-ssse3.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001355]
1356
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001357SSE41_UKERNELS = [
Marat Dukhan40a672f2019-11-25 03:08:22 -08001358 "src/f32-prelu/gen/sse41-2x4.c",
1359 "src/f32-prelu/gen/sse41-2x8.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001360 "src/f32-sigmoid/gen/sse41-p5-div-x4.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001361 "src/f32-sigmoid/gen/sse41-p5-div-x8.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001362 "src/f32-sigmoid/gen/sse41-p5-div-x12.c",
Marat Dukhan40a672f2019-11-25 03:08:22 -08001363 "src/f32-sigmoid/gen/sse41-p5-div-x16.c",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08001364 "src/f32-sigmoid/gen/sse41-p5-div-x20.c",
1365 "src/f32-sigmoid/gen/sse41-p5-div-x24.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -07001366 "src/f32-vrnd/gen/vrndne-sse41-x4.c",
1367 "src/f32-vrnd/gen/vrndne-sse41-x8.c",
1368 "src/f32-vrnd/gen/vrndz-sse41-x4.c",
1369 "src/f32-vrnd/gen/vrndz-sse41-x8.c",
1370 "src/f32-vrnd/gen/vrndu-sse41-x4.c",
1371 "src/f32-vrnd/gen/vrndu-sse41-x8.c",
1372 "src/f32-vrnd/gen/vrndd-sse41-x4.c",
1373 "src/f32-vrnd/gen/vrndd-sse41-x8.c",
Marat Dukhan8853b822020-05-07 12:19:01 -07001374 "src/math/roundne-sse41.c",
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001375 "src/math/roundd-sse41.c",
1376 "src/math/roundu-sse41.c",
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001377 "src/math/roundz-sse41.c",
Marat Dukhanfe7acb62020-03-09 19:30:05 -07001378 "src/requantization/precise-sse4.c",
1379 "src/requantization/q31-sse4.c",
1380 "src/requantization/gemmlowp-sse4.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001381]
1382
Marat Dukhan08c4a432019-10-03 09:29:21 -07001383AVX_UKERNELS = [
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001384 "src/f32-clamp/gen/avx-x8.c",
1385 "src/f32-clamp/gen/avx-x16.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001386 "src/f32-dwconv/gen/up16x4-minmax-avx-acc2.c",
1387 "src/f32-dwconv/gen/up16x4-minmax-avx.c",
1388 "src/f32-dwconv/gen/up8x4-minmax-avx-acc2.c",
1389 "src/f32-dwconv/gen/up8x4-minmax-avx.c",
1390 "src/f32-dwconv/gen/up16x9-minmax-avx-acc2.c",
1391 "src/f32-dwconv/gen/up16x9-minmax-avx.c",
1392 "src/f32-dwconv/gen/up8x9-minmax-avx-acc2.c",
1393 "src/f32-dwconv/gen/up8x9-minmax-avx.c",
1394 "src/f32-dwconv/gen/up16x25-minmax-avx-acc2.c",
1395 "src/f32-dwconv/gen/up16x25-minmax-avx.c",
1396 "src/f32-dwconv/gen/up8x25-minmax-avx-acc2.c",
1397 "src/f32-dwconv/gen/up8x25-minmax-avx.c",
1398 "src/f32-gemm/gen/1x8-minmax-avx-broadcast.c",
1399 "src/f32-gemm/gen/4x8-minmax-avx-broadcast.c",
1400 "src/f32-gemm/gen/5x8-minmax-avx-broadcast.c",
1401 "src/f32-gemm/gen/6x8-minmax-avx-broadcast.c",
1402 "src/f32-gemm/gen/7x8-minmax-avx-broadcast.c",
1403 "src/f32-gemm/gen/1x16-minmax-avx-broadcast.c",
1404 "src/f32-gemm/gen/3x16-minmax-avx-broadcast.c",
1405 "src/f32-gemm/gen/4x16-minmax-avx-broadcast.c",
1406 "src/f32-gemm/gen/5x16-minmax-avx-broadcast.c",
1407 "src/f32-gemm/gen-inc/1x8inc-minmax-avx-broadcast.c",
1408 "src/f32-gemm/gen-inc/4x8inc-minmax-avx-broadcast.c",
1409 "src/f32-gemm/gen-inc/5x8inc-minmax-avx-broadcast.c",
1410 "src/f32-gemm/gen-inc/6x8inc-minmax-avx-broadcast.c",
1411 "src/f32-gemm/gen-inc/7x8inc-minmax-avx-broadcast.c",
1412 "src/f32-gemm/gen-inc/1x16inc-minmax-avx-broadcast.c",
1413 "src/f32-gemm/gen-inc/3x16inc-minmax-avx-broadcast.c",
1414 "src/f32-gemm/gen-inc/4x16inc-minmax-avx-broadcast.c",
1415 "src/f32-gemm/gen-inc/5x16inc-minmax-avx-broadcast.c",
Marat Dukhan662faa02019-12-09 22:48:16 -08001416 "src/f32-hswish/gen/avx-x8.c",
1417 "src/f32-hswish/gen/avx-x16.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001418 "src/f32-igemm/gen/1x8-minmax-avx-broadcast.c",
1419 "src/f32-igemm/gen/4x8-minmax-avx-broadcast.c",
1420 "src/f32-igemm/gen/5x8-minmax-avx-broadcast.c",
1421 "src/f32-igemm/gen/6x8-minmax-avx-broadcast.c",
1422 "src/f32-igemm/gen/7x8-minmax-avx-broadcast.c",
1423 "src/f32-igemm/gen/1x16-minmax-avx-broadcast.c",
1424 "src/f32-igemm/gen/3x16-minmax-avx-broadcast.c",
1425 "src/f32-igemm/gen/4x16-minmax-avx-broadcast.c",
1426 "src/f32-igemm/gen/5x16-minmax-avx-broadcast.c",
Marat Dukhan90eca0a2020-03-11 00:52:23 -07001427 "src/f32-prelu/gen/avx-2x8.c",
1428 "src/f32-prelu/gen/avx-2x16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001429 "src/f32-rmax/avx.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001430 "src/f32-vbinary/gen/vadd-minmax-avx-x8.c",
1431 "src/f32-vbinary/gen/vadd-minmax-avx-x16.c",
1432 "src/f32-vbinary/gen/vaddc-minmax-avx-x8.c",
1433 "src/f32-vbinary/gen/vaddc-minmax-avx-x16.c",
1434 "src/f32-vbinary/gen/vdiv-minmax-avx-x8.c",
1435 "src/f32-vbinary/gen/vdiv-minmax-avx-x16.c",
1436 "src/f32-vbinary/gen/vdivc-minmax-avx-x8.c",
1437 "src/f32-vbinary/gen/vdivc-minmax-avx-x16.c",
Marat Dukhan9a88efe2019-12-10 15:54:24 -08001438 "src/f32-vbinary/gen/vmax-avx-x8.c",
1439 "src/f32-vbinary/gen/vmax-avx-x16.c",
1440 "src/f32-vbinary/gen/vmaxc-avx-x8.c",
1441 "src/f32-vbinary/gen/vmaxc-avx-x16.c",
1442 "src/f32-vbinary/gen/vmin-avx-x8.c",
1443 "src/f32-vbinary/gen/vmin-avx-x16.c",
1444 "src/f32-vbinary/gen/vminc-avx-x8.c",
1445 "src/f32-vbinary/gen/vminc-avx-x16.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001446 "src/f32-vbinary/gen/vmul-minmax-avx-x8.c",
1447 "src/f32-vbinary/gen/vmul-minmax-avx-x16.c",
1448 "src/f32-vbinary/gen/vmulc-minmax-avx-x8.c",
1449 "src/f32-vbinary/gen/vmulc-minmax-avx-x16.c",
1450 "src/f32-vbinary/gen/vrdivc-minmax-avx-x8.c",
1451 "src/f32-vbinary/gen/vrdivc-minmax-avx-x16.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -07001452 "src/f32-vbinary/gen/vrsqrdiffc-avx-x8.c",
1453 "src/f32-vbinary/gen/vrsqrdiffc-avx-x16.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001454 "src/f32-vbinary/gen/vrsubc-minmax-avx-x8.c",
1455 "src/f32-vbinary/gen/vrsubc-minmax-avx-x16.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -07001456 "src/f32-vbinary/gen/vsqrdiff-avx-x8.c",
1457 "src/f32-vbinary/gen/vsqrdiff-avx-x16.c",
1458 "src/f32-vbinary/gen/vsqrdiffc-avx-x8.c",
1459 "src/f32-vbinary/gen/vsqrdiffc-avx-x16.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001460 "src/f32-vbinary/gen/vsub-minmax-avx-x8.c",
1461 "src/f32-vbinary/gen/vsub-minmax-avx-x16.c",
1462 "src/f32-vbinary/gen/vsubc-minmax-avx-x8.c",
1463 "src/f32-vbinary/gen/vsubc-minmax-avx-x16.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001464 "src/f32-vscale/avx-unroll32.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -07001465 "src/f32-vrnd/gen/vrndne-avx-x8.c",
1466 "src/f32-vrnd/gen/vrndne-avx-x16.c",
1467 "src/f32-vrnd/gen/vrndz-avx-x8.c",
1468 "src/f32-vrnd/gen/vrndz-avx-x16.c",
1469 "src/f32-vrnd/gen/vrndu-avx-x8.c",
1470 "src/f32-vrnd/gen/vrndu-avx-x16.c",
1471 "src/f32-vrnd/gen/vrndd-avx-x8.c",
1472 "src/f32-vrnd/gen/vrndd-avx-x16.c",
Marat Dukhan5020b962020-06-08 13:30:10 -07001473 "src/f32-vunary/gen/vabs-avx-x8.c",
1474 "src/f32-vunary/gen/vabs-avx-x16.c",
1475 "src/f32-vunary/gen/vneg-avx-x8.c",
1476 "src/f32-vunary/gen/vneg-avx-x16.c",
1477 "src/f32-vunary/gen/vsqr-avx-x8.c",
1478 "src/f32-vunary/gen/vsqr-avx-x16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001479]
1480
Marat Dukhanfda12b82019-11-21 12:27:59 -08001481FMA3_UKERNELS = [
Marat Dukhan1c587112020-04-08 20:04:28 -07001482 "src/f32-dwconv/gen/up16x4-minmax-fma3-acc2.c",
1483 "src/f32-dwconv/gen/up16x4-minmax-fma3.c",
1484 "src/f32-dwconv/gen/up8x4-minmax-fma3-acc2.c",
1485 "src/f32-dwconv/gen/up8x4-minmax-fma3.c",
1486 "src/f32-dwconv/gen/up16x9-minmax-fma3-acc2.c",
1487 "src/f32-dwconv/gen/up16x9-minmax-fma3.c",
1488 "src/f32-dwconv/gen/up8x9-minmax-fma3-acc2.c",
1489 "src/f32-dwconv/gen/up8x9-minmax-fma3.c",
1490 "src/f32-dwconv/gen/up16x25-minmax-fma3-acc2.c",
1491 "src/f32-dwconv/gen/up16x25-minmax-fma3.c",
1492 "src/f32-dwconv/gen/up8x25-minmax-fma3-acc2.c",
1493 "src/f32-dwconv/gen/up8x25-minmax-fma3.c",
1494 "src/f32-gemm/gen/1x8-minmax-fma3-broadcast.c",
1495 "src/f32-gemm/gen/4x8-minmax-fma3-broadcast.c",
1496 "src/f32-gemm/gen/5x8-minmax-fma3-broadcast.c",
1497 "src/f32-gemm/gen/6x8-minmax-fma3-broadcast.c",
1498 "src/f32-gemm/gen/7x8-minmax-fma3-broadcast.c",
1499 "src/f32-gemm/gen/8x8-minmax-fma3-broadcast.c",
1500 "src/f32-gemm/gen/1x16-minmax-fma3-broadcast.c",
1501 "src/f32-gemm/gen/3x16-minmax-fma3-broadcast.c",
1502 "src/f32-gemm/gen/4x16-minmax-fma3-broadcast.c",
1503 "src/f32-gemm/gen/5x16-minmax-fma3-broadcast.c",
1504 "src/f32-gemm/gen/1x16s4-minmax-fma3-broadcast.c",
1505 "src/f32-gemm/gen/3x16s4-minmax-fma3-broadcast.c",
1506 "src/f32-gemm/gen/4x16s4-minmax-fma3-broadcast.c",
1507 "src/f32-gemm/gen/5x16s4-minmax-fma3-broadcast.c",
1508 "src/f32-gemm/gen-inc/1x8inc-minmax-fma3-broadcast.c",
1509 "src/f32-gemm/gen-inc/4x8inc-minmax-fma3-broadcast.c",
1510 "src/f32-gemm/gen-inc/5x8inc-minmax-fma3-broadcast.c",
1511 "src/f32-gemm/gen-inc/6x8inc-minmax-fma3-broadcast.c",
1512 "src/f32-gemm/gen-inc/7x8inc-minmax-fma3-broadcast.c",
1513 "src/f32-gemm/gen-inc/8x8inc-minmax-fma3-broadcast.c",
1514 "src/f32-gemm/gen-inc/1x16inc-minmax-fma3-broadcast.c",
1515 "src/f32-gemm/gen-inc/3x16inc-minmax-fma3-broadcast.c",
1516 "src/f32-gemm/gen-inc/4x16inc-minmax-fma3-broadcast.c",
1517 "src/f32-gemm/gen-inc/5x16inc-minmax-fma3-broadcast.c",
1518 "src/f32-gemm/gen-inc/1x16s4inc-minmax-fma3-broadcast.c",
1519 "src/f32-gemm/gen-inc/3x16s4inc-minmax-fma3-broadcast.c",
1520 "src/f32-gemm/gen-inc/4x16s4inc-minmax-fma3-broadcast.c",
1521 "src/f32-gemm/gen-inc/5x16s4inc-minmax-fma3-broadcast.c",
Marat Dukhan662faa02019-12-09 22:48:16 -08001522 "src/f32-hswish/gen/fma3-x8.c",
1523 "src/f32-hswish/gen/fma3-x16.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001524 "src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c",
1525 "src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c",
1526 "src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c",
1527 "src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c",
1528 "src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c",
1529 "src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c",
1530 "src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c",
1531 "src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c",
1532 "src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c",
1533 "src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c",
1534 "src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c",
1535 "src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c",
1536 "src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c",
1537 "src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c",
Marat Dukhanfda12b82019-11-21 12:27:59 -08001538]
1539
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001540AVX2_UKERNELS = [
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001541 "src/f32-raddexpminusmax/gen/avx2-p5-x64.c",
1542 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc2.c",
1543 "src/f32-raddexpminusmax/gen/avx2-p5-x64-acc4.c",
1544 "src/f32-raddexpminusmax/gen/avx2-p5-x72.c",
1545 "src/f32-raddexpminusmax/gen/avx2-p5-x72-acc3.c",
1546 "src/f32-raddexpminusmax/gen/avx2-p5-x80.c",
1547 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc2.c",
1548 "src/f32-raddexpminusmax/gen/avx2-p5-x80-acc5.c",
1549 "src/f32-raddexpminusmax/gen/avx2-p5-x96.c",
1550 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc2.c",
1551 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc3.c",
1552 "src/f32-raddexpminusmax/gen/avx2-p5-x96-acc6.c",
1553 "src/f32-raddextexp/gen/avx2-p5-x64.c",
1554 "src/f32-raddextexp/gen/avx2-p5-x64-acc2.c",
1555 "src/f32-raddextexp/gen/avx2-p5-x64-acc4.c",
1556 "src/f32-raddextexp/gen/avx2-p5-x72.c",
1557 "src/f32-raddextexp/gen/avx2-p5-x72-acc3.c",
1558 "src/f32-raddextexp/gen/avx2-p5-x80.c",
1559 "src/f32-raddextexp/gen/avx2-p5-x80-acc2.c",
1560 "src/f32-raddextexp/gen/avx2-p5-x80-acc5.c",
1561 "src/f32-raddextexp/gen/avx2-p5-x96.c",
1562 "src/f32-raddextexp/gen/avx2-p5-x96-acc2.c",
1563 "src/f32-raddextexp/gen/avx2-p5-x96-acc3.c",
1564 "src/f32-raddextexp/gen/avx2-p5-x96-acc6.c",
1565 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64.c",
1566 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc2.c",
1567 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x64-acc4.c",
1568 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72.c",
1569 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x72-acc3.c",
1570 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80.c",
1571 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc2.c",
1572 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x80-acc5.c",
1573 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96.c",
1574 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc2.c",
1575 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc3.c",
1576 "src/f32-raddstoreexpminusmax/gen/avx2-p5-x96-acc6.c",
Marat Dukhanfa0a4322020-01-06 16:14:29 -08001577 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x8.c",
1578 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x16.c",
1579 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x24.c",
1580 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x32.c",
1581 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x40.c",
1582 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x48.c",
1583 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x56.c",
1584 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x64.c",
1585 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x72.c",
1586 "src/f32-sigmoid/gen/avx2-rr1-p5-div-x80.c",
1587 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x8.c",
1588 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x16.c",
1589 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x24.c",
1590 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x32.c",
1591 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x40.c",
1592 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x48.c",
1593 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x56.c",
1594 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x64.c",
1595 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x72.c",
1596 "src/f32-sigmoid/gen/avx2-rr1-p5-nr1fma-x80.c",
1597 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x8.c",
1598 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x16.c",
1599 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x24.c",
1600 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x32.c",
1601 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x40.c",
1602 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x48.c",
1603 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x56.c",
1604 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x64.c",
1605 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x72.c",
1606 "src/f32-sigmoid/gen/avx2-rr1-p5-nr2fma-x80.c",
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001607 "src/f32-vscaleexpminusmax/gen/avx2-p5-x8.c",
1608 "src/f32-vscaleexpminusmax/gen/avx2-p5-x16.c",
1609 "src/f32-vscaleexpminusmax/gen/avx2-p5-x24.c",
1610 "src/f32-vscaleexpminusmax/gen/avx2-p5-x32.c",
1611 "src/f32-vscaleexpminusmax/gen/avx2-p5-x40.c",
1612 "src/f32-vscaleexpminusmax/gen/avx2-p5-x48.c",
1613 "src/f32-vscaleexpminusmax/gen/avx2-p5-x56.c",
1614 "src/f32-vscaleexpminusmax/gen/avx2-p5-x64.c",
1615 "src/f32-vscaleexpminusmax/gen/avx2-p5-x72.c",
1616 "src/f32-vscaleexpminusmax/gen/avx2-p5-x80.c",
1617 "src/f32-vscaleexpminusmax/gen/avx2-p5-x88.c",
1618 "src/f32-vscaleexpminusmax/gen/avx2-p5-x96.c",
1619 "src/f32-vscaleextexp/gen/avx2-p5-x8.c",
1620 "src/f32-vscaleextexp/gen/avx2-p5-x16.c",
1621 "src/f32-vscaleextexp/gen/avx2-p5-x24.c",
1622 "src/f32-vscaleextexp/gen/avx2-p5-x32.c",
1623 "src/f32-vscaleextexp/gen/avx2-p5-x40.c",
1624 "src/f32-vscaleextexp/gen/avx2-p5-x48.c",
1625 "src/f32-vscaleextexp/gen/avx2-p5-x56.c",
1626 "src/f32-vscaleextexp/gen/avx2-p5-x64.c",
1627 "src/f32-vscaleextexp/gen/avx2-p5-x72.c",
1628 "src/f32-vscaleextexp/gen/avx2-p5-x80.c",
1629 "src/f32-vscaleextexp/gen/avx2-p5-x88.c",
1630 "src/f32-vscaleextexp/gen/avx2-p5-x96.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001631 "src/math/exp-avx2-p5.c",
1632 "src/math/exp-avx2-perm-p3.c",
1633 "src/math/exp-avx2-perm-p4.c",
Marat Dukhan515c9772019-10-17 18:07:57 -07001634 "src/math/expminus-avx2-p5.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -07001635 "src/math/extexp-avx2-p5.c",
Marat Dukhan72416002020-01-05 21:53:19 -08001636 "src/math/sigmoid-avx2-rr2-p5-div.c",
1637 "src/math/sigmoid-avx2-rr1-p5-div.c",
1638 "src/math/sigmoid-avx2-rr2-p5-nr2fma.c",
1639 "src/math/sigmoid-avx2-rr1-p5-nr2fma.c",
1640 "src/math/sigmoid-avx2-rr2-p5-nr1fma.c",
1641 "src/math/sigmoid-avx2-rr1-p5-nr1fma.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001642]
1643
Marat Dukhan08c4a432019-10-03 09:29:21 -07001644AVX512F_UKERNELS = [
Marat Dukhan5c5fa962020-03-10 18:38:33 -07001645 "src/f32-clamp/gen/avx512f-x16.c",
1646 "src/f32-clamp/gen/avx512f-x32.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001647 "src/f32-dwconv/gen/up32x4-minmax-avx512f-acc2.c",
1648 "src/f32-dwconv/gen/up32x4-minmax-avx512f.c",
1649 "src/f32-dwconv/gen/up16x4-minmax-avx512f-acc2.c",
1650 "src/f32-dwconv/gen/up16x4-minmax-avx512f.c",
1651 "src/f32-dwconv/gen/up32x9-minmax-avx512f-acc2.c",
1652 "src/f32-dwconv/gen/up32x9-minmax-avx512f.c",
1653 "src/f32-dwconv/gen/up16x9-minmax-avx512f-acc2.c",
1654 "src/f32-dwconv/gen/up16x9-minmax-avx512f.c",
1655 "src/f32-dwconv/gen/up32x25-minmax-avx512f-acc2.c",
1656 "src/f32-dwconv/gen/up32x25-minmax-avx512f.c",
1657 "src/f32-dwconv/gen/up16x25-minmax-avx512f-acc2.c",
1658 "src/f32-dwconv/gen/up16x25-minmax-avx512f.c",
1659 "src/f32-gemm/gen/1x16-minmax-avx512f-broadcast.c",
1660 "src/f32-gemm/gen/4x16-minmax-avx512f-broadcast.c",
1661 "src/f32-gemm/gen/5x16-minmax-avx512f-broadcast.c",
1662 "src/f32-gemm/gen/6x16-minmax-avx512f-broadcast.c",
1663 "src/f32-gemm/gen/7x16-minmax-avx512f-broadcast.c",
1664 "src/f32-gemm/gen/8x16-minmax-avx512f-broadcast.c",
1665 "src/f32-gemm/gen-inc/1x16inc-minmax-avx512f-broadcast.c",
1666 "src/f32-gemm/gen-inc/4x16inc-minmax-avx512f-broadcast.c",
1667 "src/f32-gemm/gen-inc/5x16inc-minmax-avx512f-broadcast.c",
1668 "src/f32-gemm/gen-inc/6x16inc-minmax-avx512f-broadcast.c",
1669 "src/f32-gemm/gen-inc/7x16inc-minmax-avx512f-broadcast.c",
1670 "src/f32-gemm/gen-inc/8x16inc-minmax-avx512f-broadcast.c",
Marat Dukhan662faa02019-12-09 22:48:16 -08001671 "src/f32-hswish/gen/avx512f-x16.c",
1672 "src/f32-hswish/gen/avx512f-x32.c",
Marat Dukhan1c587112020-04-08 20:04:28 -07001673 "src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c",
1674 "src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c",
1675 "src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c",
1676 "src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c",
1677 "src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c",
1678 "src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c",
Marat Dukhan90eca0a2020-03-11 00:52:23 -07001679 "src/f32-prelu/gen/avx512f-2x16.c",
1680 "src/f32-prelu/gen/avx512f-2x32.c",
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001681 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128.c",
1682 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
1683 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
1684 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144.c",
1685 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
1686 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160.c",
1687 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
1688 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
1689 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192.c",
1690 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
1691 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
1692 "src/f32-raddexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
1693 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128.c",
1694 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc2.c",
1695 "src/f32-raddextexp/gen/avx512f-p5-scalef-x128-acc4.c",
1696 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144.c",
1697 "src/f32-raddextexp/gen/avx512f-p5-scalef-x144-acc3.c",
1698 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160.c",
1699 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc2.c",
1700 "src/f32-raddextexp/gen/avx512f-p5-scalef-x160-acc5.c",
1701 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192.c",
1702 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc2.c",
1703 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc3.c",
1704 "src/f32-raddextexp/gen/avx512f-p5-scalef-x192-acc6.c",
1705 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128.c",
1706 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc2.c",
1707 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x128-acc4.c",
1708 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144.c",
1709 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x144-acc3.c",
1710 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160.c",
1711 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc2.c",
1712 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x160-acc5.c",
1713 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192.c",
1714 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc2.c",
1715 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc3.c",
1716 "src/f32-raddstoreexpminusmax/gen/avx512f-p5-scalef-x192-acc6.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001717 "src/f32-rmax/avx512f.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001718 "src/f32-vbinary/gen/vadd-minmax-avx512f-x16.c",
1719 "src/f32-vbinary/gen/vadd-minmax-avx512f-x32.c",
1720 "src/f32-vbinary/gen/vaddc-minmax-avx512f-x16.c",
1721 "src/f32-vbinary/gen/vaddc-minmax-avx512f-x32.c",
1722 "src/f32-vbinary/gen/vdiv-minmax-avx512f-x16.c",
1723 "src/f32-vbinary/gen/vdiv-minmax-avx512f-x32.c",
1724 "src/f32-vbinary/gen/vdivc-minmax-avx512f-x16.c",
1725 "src/f32-vbinary/gen/vdivc-minmax-avx512f-x32.c",
Marat Dukhan9a88efe2019-12-10 15:54:24 -08001726 "src/f32-vbinary/gen/vmax-avx512f-x16.c",
1727 "src/f32-vbinary/gen/vmax-avx512f-x32.c",
1728 "src/f32-vbinary/gen/vmaxc-avx512f-x16.c",
1729 "src/f32-vbinary/gen/vmaxc-avx512f-x32.c",
1730 "src/f32-vbinary/gen/vmin-avx512f-x16.c",
1731 "src/f32-vbinary/gen/vmin-avx512f-x32.c",
1732 "src/f32-vbinary/gen/vminc-avx512f-x16.c",
1733 "src/f32-vbinary/gen/vminc-avx512f-x32.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001734 "src/f32-vbinary/gen/vmul-minmax-avx512f-x16.c",
1735 "src/f32-vbinary/gen/vmul-minmax-avx512f-x32.c",
1736 "src/f32-vbinary/gen/vmulc-minmax-avx512f-x16.c",
1737 "src/f32-vbinary/gen/vmulc-minmax-avx512f-x32.c",
1738 "src/f32-vbinary/gen/vrdivc-minmax-avx512f-x16.c",
1739 "src/f32-vbinary/gen/vrdivc-minmax-avx512f-x32.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -07001740 "src/f32-vbinary/gen/vrsqrdiffc-avx512f-x16.c",
1741 "src/f32-vbinary/gen/vrsqrdiffc-avx512f-x32.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001742 "src/f32-vbinary/gen/vrsubc-minmax-avx512f-x16.c",
1743 "src/f32-vbinary/gen/vrsubc-minmax-avx512f-x32.c",
Marat Dukhan13bafb02020-06-05 00:43:11 -07001744 "src/f32-vbinary/gen/vsqrdiff-avx512f-x16.c",
1745 "src/f32-vbinary/gen/vsqrdiff-avx512f-x32.c",
1746 "src/f32-vbinary/gen/vsqrdiffc-avx512f-x16.c",
1747 "src/f32-vbinary/gen/vsqrdiffc-avx512f-x32.c",
Marat Dukhan91cd2b72020-04-09 23:57:31 -07001748 "src/f32-vbinary/gen/vsub-minmax-avx512f-x16.c",
1749 "src/f32-vbinary/gen/vsub-minmax-avx512f-x32.c",
1750 "src/f32-vbinary/gen/vsubc-minmax-avx512f-x16.c",
1751 "src/f32-vbinary/gen/vsubc-minmax-avx512f-x32.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001752 "src/f32-vscale/avx512f-unroll64.c",
Marat Dukhan4c4eb002019-12-08 21:27:49 -08001753 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x16.c",
1754 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x32.c",
1755 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x48.c",
1756 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x64.c",
1757 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x80.c",
1758 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x96.c",
1759 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x112.c",
1760 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x128.c",
1761 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x144.c",
1762 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x160.c",
1763 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x176.c",
1764 "src/f32-vscaleexpminusmax/gen/avx512f-p5-scalef-x192.c",
1765 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x16.c",
1766 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x32.c",
1767 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x48.c",
1768 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x64.c",
1769 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x80.c",
1770 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x96.c",
1771 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x112.c",
1772 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x128.c",
1773 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x144.c",
1774 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x160.c",
1775 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x176.c",
1776 "src/f32-vscaleextexp/gen/avx512f-p5-scalef-x192.c",
Marat Dukhaneecf8fd2020-06-09 08:59:37 -07001777 "src/f32-vrnd/gen/vrndne-avx512f-x16.c",
1778 "src/f32-vrnd/gen/vrndne-avx512f-x32.c",
1779 "src/f32-vrnd/gen/vrndz-avx512f-x16.c",
1780 "src/f32-vrnd/gen/vrndz-avx512f-x32.c",
1781 "src/f32-vrnd/gen/vrndu-avx512f-x16.c",
1782 "src/f32-vrnd/gen/vrndu-avx512f-x32.c",
1783 "src/f32-vrnd/gen/vrndd-avx512f-x16.c",
1784 "src/f32-vrnd/gen/vrndd-avx512f-x32.c",
Marat Dukhan5020b962020-06-08 13:30:10 -07001785 "src/f32-vunary/gen/vabs-avx512f-x16.c",
1786 "src/f32-vunary/gen/vabs-avx512f-x32.c",
1787 "src/f32-vunary/gen/vneg-avx512f-x16.c",
1788 "src/f32-vunary/gen/vneg-avx512f-x32.c",
1789 "src/f32-vunary/gen/vsqr-avx512f-x16.c",
1790 "src/f32-vunary/gen/vsqr-avx512f-x32.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001791 "src/math/exp-avx512f-p5-scalef.c",
1792 "src/math/exp-avx512f-p5.c",
1793 "src/math/exp-avx512f-perm-p3.c",
Marat Dukhanfeb49232019-10-28 11:03:31 -07001794 "src/math/exp-avx512f-perm2-p2.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -07001795 "src/math/extexp-avx512f-p5.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001796]
1797
1798AARCH32_ASM_UKERNELS = [
Marat Dukhan32f93812020-05-17 20:31:21 -07001799 "src/f32-gemm/4x4-aarch32-vfp-ld64.S",
Marat Dukhan3b98f6b2020-05-17 10:09:22 -07001800 "src/f32-gemm/4x4-minmax-aarch32-vfp-ld64.S",
Marat Dukhan1c587112020-04-08 20:04:28 -07001801 "src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a53.S",
1802 "src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a55.S",
1803 "src/f32-gemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S",
1804 "src/f32-gemm/gen/4x8-minmax-aarch32-neon-pld-cortex-a75.S",
1805 "src/f32-gemm/4x8-minmax-aarch32-neon-ld64.S",
1806 "src/f32-igemm/4x8-minmax-aarch32-neon-ld64.S",
1807 "src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S",
1808 "src/f32-igemm/gen/4x8-minmax-aarch32-neon-pld-cortex-a75.S",
1809 "src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a53.S",
1810 "src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a55.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001811]
1812
1813AARCH64_ASM_UKERNELS = [
Frank Barchard36b76b62020-04-10 12:39:17 -07001814 "src/f16-gemm/gen/1x16-minmax-aarch64-neonfp16arith-ld32.S",
Frank Barchard683f5592020-04-10 00:48:26 -07001815 "src/f16-gemm/gen/4x16-minmax-aarch64-neonfp16arith-ld32.S",
1816 "src/f16-gemm/gen/6x16-minmax-aarch64-neonfp16arith-ld32.S",
Frank Barchard36b76b62020-04-10 12:39:17 -07001817 "src/f16-gemm/gen-inc/1x16inc-minmax-aarch64-neonfp16arith-ld32.S",
Frank Barchard683f5592020-04-10 00:48:26 -07001818 "src/f16-gemm/gen-inc/4x16inc-minmax-aarch64-neonfp16arith-ld32.S",
1819 "src/f16-gemm/gen-inc/6x16inc-minmax-aarch64-neonfp16arith-ld32.S",
Frank Barchardbddfbcd2020-04-15 12:32:41 -07001820 "src/f16-gemm/gen/1x8-minmax-aarch64-neonfp16arith-ld64.S",
1821 "src/f16-gemm/gen/4x8-minmax-aarch64-neonfp16arith-ld64.S",
1822 "src/f16-gemm/gen/6x8-minmax-aarch64-neonfp16arith-ld64.S",
Frank Barchard3b8e5662020-04-20 12:12:53 -07001823 "src/f16-gemm/gen/8x8-minmax-aarch64-neonfp16arith-ld64.S",
Frank Barchardbddfbcd2020-04-15 12:32:41 -07001824 "src/f16-gemm/gen-inc/1x8inc-minmax-aarch64-neonfp16arith-ld64.S",
1825 "src/f16-gemm/gen-inc/4x8inc-minmax-aarch64-neonfp16arith-ld64.S",
1826 "src/f16-gemm/gen-inc/6x8inc-minmax-aarch64-neonfp16arith-ld64.S",
Frank Barchard3b8e5662020-04-20 12:12:53 -07001827 "src/f16-gemm/gen-inc/8x8inc-minmax-aarch64-neonfp16arith-ld64.S",
Marat Dukhan1c587112020-04-08 20:04:28 -07001828 "src/f32-dwconv/up4x9-minmax-aarch64-neonfma-cortex-a55.S",
1829 "src/f32-dwconv/up4x9-minmax-aarch64-neonfma.S",
Frank Barchard3cb54f92020-04-10 10:46:08 -07001830 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-ld64.S",
Marat Dukhan1c587112020-04-08 20:04:28 -07001831 "src/f32-gemm/gen/1x12-minmax-aarch64-neonfma-cortex-a53.S",
1832 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a53.S",
1833 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a57.S",
1834 "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S",
1835 "src/f32-gemm/gen/4x12-minmax-aarch64-neonfma-cortex-a53.S",
1836 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a53.S",
1837 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a55.S",
1838 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a57.S",
1839 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S",
1840 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-ld128.S",
1841 "src/f32-gemm/gen/4x8-minmax-aarch64-neonfma-ld64.S",
1842 "src/f32-gemm/gen/5x8-minmax-aarch64-neonfma-cortex-a57.S",
1843 "src/f32-gemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S",
1844 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a53.S",
1845 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a55.S",
1846 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a73.S",
1847 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a57.S",
1848 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S",
1849 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ios.S",
1850 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld128.S",
1851 "src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld64.S",
Frank Barchard3cb54f92020-04-10 10:46:08 -07001852 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-ld64.S",
Marat Dukhan1c587112020-04-08 20:04:28 -07001853 "src/f32-gemm/gen-inc/1x12inc-minmax-aarch64-neonfma-cortex-a53.S",
1854 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a53.S",
1855 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a57.S",
1856 "src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a75.S",
1857 "src/f32-gemm/gen-inc/4x12inc-minmax-aarch64-neonfma-cortex-a53.S",
1858 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a53.S",
1859 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a55.S",
1860 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a57.S",
1861 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-cortex-a75.S",
1862 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-ld128.S",
1863 "src/f32-gemm/gen-inc/4x8inc-minmax-aarch64-neonfma-ld64.S",
1864 "src/f32-gemm/gen-inc/5x8inc-minmax-aarch64-neonfma-cortex-a57.S",
1865 "src/f32-gemm/gen-inc/5x8inc-minmax-aarch64-neonfma-cortex-a75.S",
1866 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a53.S",
1867 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a55.S",
1868 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a73.S",
1869 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a57.S",
1870 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-cortex-a75.S",
1871 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ios.S",
1872 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ld128.S",
1873 "src/f32-gemm/gen-inc/6x8inc-minmax-aarch64-neonfma-ld64.S",
1874 "src/f32-igemm/1x12-minmax-aarch64-neonfma-cortex-a53.S",
1875 "src/f32-igemm/1x8-minmax-aarch64-neonfma-cortex-a53.S",
1876 "src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a57.S",
1877 "src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S",
1878 "src/f32-igemm/4x12-minmax-aarch64-neonfma-cortex-a53.S",
1879 "src/f32-igemm/4x8-minmax-aarch64-neonfma-cortex-a53.S",
1880 "src/f32-igemm/4x8-minmax-aarch64-neonfma-cortex-a55.S",
1881 "src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a57.S",
1882 "src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S",
1883 "src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a57.S",
1884 "src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S",
1885 "src/f32-igemm/6x8-minmax-aarch64-neonfma-cortex-a53.S",
1886 "src/f32-igemm/6x8-minmax-aarch64-neonfma-cortex-a55.S",
1887 "src/f32-igemm/6x8-minmax-aarch64-neonfma-cortex-a73.S",
1888 "src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a57.S",
1889 "src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S",
1890 "src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ios.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001891]
1892
Marat Dukhan1b354632020-03-23 12:50:22 -07001893INTERNAL_MICROKERNEL_HDRS = [
Marat Dukhanfe7acb62020-03-09 19:30:05 -07001894 "src/requantization/gemmlowp-requantization.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001895 "src/xnnpack/argmaxpool.h",
1896 "src/xnnpack/avgpool.h",
1897 "src/xnnpack/clamp.h",
1898 "src/xnnpack/common.h",
1899 "src/xnnpack/conv.h",
1900 "src/xnnpack/dwconv.h",
1901 "src/xnnpack/gavgpool.h",
1902 "src/xnnpack/gemm.h",
Marat Dukhan3bb3bfc2020-05-19 17:42:46 -07001903 "src/xnnpack/fill.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001904 "src/xnnpack/hswish.h",
Marat Dukhan660fd192020-03-10 04:55:30 -07001905 "src/xnnpack/ibilinear.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001906 "src/xnnpack/igemm.h",
Marat Dukhancfb31342019-12-05 10:42:57 -08001907 "src/xnnpack/intrinsics-polyfill.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001908 "src/xnnpack/lut.h",
1909 "src/xnnpack/math.h",
1910 "src/xnnpack/maxpool.h",
Marat Dukhan04f03be2019-11-19 12:36:47 -08001911 "src/xnnpack/memory.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001912 "src/xnnpack/packx.h",
1913 "src/xnnpack/pad.h",
1914 "src/xnnpack/params.h",
1915 "src/xnnpack/pavgpool.h",
1916 "src/xnnpack/ppmm.h",
1917 "src/xnnpack/prelu.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001918 "src/xnnpack/raddexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001919 "src/xnnpack/raddextexp.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001920 "src/xnnpack/raddstoreexpminusmax.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001921 "src/xnnpack/rmax.h",
1922 "src/xnnpack/scalar-utils.h",
1923 "src/xnnpack/spmm.h",
1924 "src/xnnpack/unpool.h",
1925 "src/xnnpack/vadd.h",
Marat Dukhan1e782c42019-11-21 17:02:40 -08001926 "src/xnnpack/vbinary.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001927 "src/xnnpack/vmulcaddc.h",
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001928 "src/xnnpack/vscale.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001929 "src/xnnpack/vscaleexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001930 "src/xnnpack/vscaleextexp.h",
Marat Dukhan1e782c42019-11-21 17:02:40 -08001931 "src/xnnpack/vunary.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001932 "src/xnnpack/zip.h",
Marat Dukhan1b354632020-03-23 12:50:22 -07001933]
1934
1935INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001936 "include/xnnpack.h",
1937 "src/xnnpack/allocator.h",
1938 "src/xnnpack/compute.h",
1939 "src/xnnpack/im2col.h",
1940 "src/xnnpack/indirection.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001941 "src/xnnpack/math-stubs.h",
Chao Mei6ddfc602020-05-13 22:29:36 -07001942 "src/xnnpack/memory-planner.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001943 "src/xnnpack/operator.h",
1944 "src/xnnpack/pack.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -07001945 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001946 "src/xnnpack/requantization-stubs.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001947 "src/xnnpack/requantization.h",
Marat Dukhan1d75a542020-02-03 12:23:01 -08001948 "src/xnnpack/subgraph.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001949]
1950
Marat Dukhan1b354632020-03-23 12:50:22 -07001951ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001952 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001953]
1954
Marat Dukhan1b354632020-03-23 12:50:22 -07001955MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -07001956 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001957 "include/xnnpack.h",
1958]
1959
Marat Dukhan1b354632020-03-23 12:50:22 -07001960MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001961 "src/xnnpack/isa-checks.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -07001962 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001963 "src/xnnpack/requantization.h",
1964 "include/xnnpack.h",
1965]
1966
1967OPERATOR_TEST_PARAMS_HDRS = [
1968 "src/xnnpack/params.h",
1969 "src/xnnpack/common.h",
1970]
1971
1972WEIGHTS_PACK_HDRS = [
1973 "src/xnnpack/pack.h",
1974 "src/xnnpack/operator.h",
1975 "src/xnnpack/compute.h",
1976]
1977
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07001978LOGGING_COPTS = select({
1979 # No logging in optimized mode
1980 ":optimized_build": ["-DXNN_LOG_LEVEL=0"],
1981 # Full logging in debug mode
1982 ":debug_build": ["-DXNN_LOG_LEVEL=5"],
1983 # Error-only logging in default (fastbuild) mode
1984 "//conditions:default": ["-DXNN_LOG_LEVEL=2"],
1985})
1986
Marat Dukhan3b59de22020-06-03 20:15:19 -07001987LOGGING_SRCS = select({
1988 # No logging in optimized mode
1989 ":optimized_build": [],
1990 "//conditions:default": [
1991 "src/operator-strings.c",
1992 "src/subgraph-strings.c",
1993 ],
1994})
1995
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07001996LOGGING_HDRS = [
1997 "src/xnnpack/log.h",
1998]
1999
Marat Dukhan08c4a432019-10-03 09:29:21 -07002000xnnpack_cc_library(
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002001 name = "tables",
2002 srcs = TABLE_SRCS,
2003 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002004 gcc_copts = xnnpack_gcc_std_copts(),
2005 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002006)
2007
2008xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002009 name = "scalar_ukernels",
2010 srcs = SCALAR_UKERNELS,
2011 hdrs = INTERNAL_HDRS,
2012 aarch32_copts = ["-marm"],
Marat Dukhan10a38082020-04-17 03:58:35 -07002013 gcc_copts = xnnpack_gcc_std_copts(),
2014 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002015 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002016 ":tables",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002017 "@FP16",
2018 "@FXdiv",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002019 "@pthreadpool",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002020 ],
2021)
2022
2023xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002024 name = "scalar_ukernels_test_mode",
2025 srcs = SCALAR_UKERNELS,
2026 hdrs = INTERNAL_HDRS,
2027 aarch32_copts = ["-marm"],
2028 copts = [
2029 "-UNDEBUG",
2030 "-DXNN_TEST_MODE=1",
2031 ],
2032 gcc_copts = xnnpack_gcc_std_copts(),
2033 msvc_copts = xnnpack_msvc_std_copts(),
2034 deps = [
2035 ":tables",
2036 "@FP16",
2037 "@FXdiv",
2038 "@pthreadpool",
2039 ],
2040)
2041
2042xnnpack_cc_library(
Marat Dukhan436ebe62019-12-04 15:10:12 -08002043 name = "wasm_ukernels",
2044 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002045 gcc_copts = xnnpack_gcc_std_copts(),
2046 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan436ebe62019-12-04 15:10:12 -08002047 wasm_srcs = WASM_UKERNELS,
Marat Dukhan290055c2020-06-09 12:24:29 -07002048 wasmsimd_srcs = WASMSIMD_UKERNELS,
Marat Dukhan436ebe62019-12-04 15:10:12 -08002049 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002050 ":tables",
Marat Dukhan436ebe62019-12-04 15:10:12 -08002051 "@FP16",
2052 "@FXdiv",
2053 "@pthreadpool",
2054 ],
2055)
2056
2057xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002058 name = "wasm_ukernels_test_mode",
2059 hdrs = INTERNAL_HDRS,
2060 copts = [
2061 "-UNDEBUG",
2062 "-DXNN_TEST_MODE=1",
2063 ],
2064 gcc_copts = xnnpack_gcc_std_copts(),
2065 msvc_copts = xnnpack_msvc_std_copts(),
2066 wasm_srcs = WASM_UKERNELS,
Marat Dukhan290055c2020-06-09 12:24:29 -07002067 wasmsimd_srcs = WASMSIMD_UKERNELS,
Marat Dukhan33fcf782020-05-24 14:27:15 -07002068 deps = [
2069 ":tables",
2070 "@FP16",
2071 "@FXdiv",
2072 "@pthreadpool",
2073 ],
2074)
2075
2076xnnpack_cc_library(
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08002077 name = "psimd_fastmath_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002078 hdrs = INTERNAL_HDRS,
2079 aarch32_copts = [
2080 "-marm",
2081 "-mfpu=neon",
2082 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07002083 gcc_copts = xnnpack_gcc_std_copts(),
2084 gcc_x86_copts = ["-msse2"],
2085 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002086 optimized_copts = [
2087 "-O3",
2088 "-ffast-math",
2089 ],
Marat Dukhan500b8892020-04-15 17:09:50 -07002090 psimd_srcs = PSIMD_FASTMATH_UKERNELS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07002091 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002092 ":tables",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002093 "@FP16",
2094 "@psimd",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002095 "@pthreadpool",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002096 ],
2097)
2098
2099xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002100 name = "psimd_fastmath_ukernels_test_mode",
2101 hdrs = INTERNAL_HDRS,
2102 aarch32_copts = [
2103 "-marm",
2104 "-mfpu=neon",
2105 ],
2106 copts = [
2107 "-UNDEBUG",
2108 "-DXNN_TEST_MODE=1",
2109 ],
2110 gcc_copts = xnnpack_gcc_std_copts(),
2111 gcc_x86_copts = ["-msse2"],
2112 msvc_copts = xnnpack_msvc_std_copts(),
2113 optimized_copts = [
2114 "-O3",
2115 "-ffast-math",
2116 ],
2117 psimd_srcs = PSIMD_FASTMATH_UKERNELS,
2118 deps = [
2119 ":tables",
2120 "@FP16",
2121 "@psimd",
2122 "@pthreadpool",
2123 ],
2124)
2125
2126xnnpack_cc_library(
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08002127 name = "psimd_accmath_ukernels",
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08002128 hdrs = INTERNAL_HDRS,
2129 aarch32_copts = [
2130 "-marm",
2131 "-mfpu=neon",
2132 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07002133 gcc_copts = xnnpack_gcc_std_copts(),
2134 gcc_x86_copts = ["-msse2"],
2135 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08002136 optimized_copts = [
2137 "-O3",
2138 ],
Marat Dukhan500b8892020-04-15 17:09:50 -07002139 psimd_srcs = PSIMD_ACCMATH_UKERNELS,
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08002140 deps = [
2141 ":tables",
2142 "@FP16",
2143 "@psimd",
2144 "@pthreadpool",
2145 ],
2146)
2147
2148xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002149 name = "psimd_accmath_ukernels_test_mode",
2150 hdrs = INTERNAL_HDRS,
2151 aarch32_copts = [
2152 "-marm",
2153 "-mfpu=neon",
2154 ],
2155 copts = [
2156 "-UNDEBUG",
2157 "-DXNN_TEST_MODE=1",
2158 ],
2159 gcc_copts = xnnpack_gcc_std_copts(),
2160 gcc_x86_copts = ["-msse2"],
2161 msvc_copts = xnnpack_msvc_std_copts(),
2162 optimized_copts = [
2163 "-O3",
2164 ],
2165 psimd_srcs = PSIMD_ACCMATH_UKERNELS,
2166 deps = [
2167 ":tables",
2168 "@FP16",
2169 "@psimd",
2170 "@pthreadpool",
2171 ],
2172)
2173
2174xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002175 name = "neon_ukernels",
2176 hdrs = INTERNAL_HDRS,
2177 aarch32_copts = [
2178 "-marm",
Marat Dukhan8853b822020-05-07 12:19:01 -07002179 "-march=armv7-a",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002180 "-mfpu=neon",
2181 ],
2182 aarch32_srcs = NEON_UKERNELS,
2183 aarch64_srcs = NEON_UKERNELS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002184 gcc_copts = xnnpack_gcc_std_copts(),
2185 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -08002186 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002187 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002188 "@FP16",
2189 "@pthreadpool",
2190 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002191)
2192
2193xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002194 name = "neon_ukernels_test_mode",
2195 hdrs = INTERNAL_HDRS,
2196 aarch32_copts = [
2197 "-marm",
2198 "-march=armv7-a",
2199 "-mfpu=neon",
2200 ],
2201 aarch32_srcs = NEON_UKERNELS,
2202 aarch64_srcs = NEON_UKERNELS,
2203 copts = [
2204 "-UNDEBUG",
2205 "-DXNN_TEST_MODE=1",
2206 ],
2207 gcc_copts = xnnpack_gcc_std_copts(),
2208 msvc_copts = xnnpack_msvc_std_copts(),
2209 deps = [
2210 ":tables",
2211 "@FP16",
2212 "@pthreadpool",
2213 ],
2214)
2215
2216xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002217 name = "neonfma_ukernels",
2218 hdrs = INTERNAL_HDRS,
2219 aarch32_copts = [
2220 "-marm",
Marat Dukhan8853b822020-05-07 12:19:01 -07002221 "-march=armv7-a",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002222 "-mfpu=neon-vfpv4",
2223 ],
2224 aarch32_srcs = NEONFMA_UKERNELS,
2225 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
Marat Dukhanbc69ed62020-06-09 21:34:56 -07002226 apple_aarch32_copts = [
2227 "-mcpu=swift",
2228 "-mtune=generic",
2229 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07002230 gcc_copts = xnnpack_gcc_std_copts(),
2231 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -08002232 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002233 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002234 "@FP16",
2235 "@pthreadpool",
2236 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002237)
2238
2239xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002240 name = "neonfma_ukernels_test_mode",
2241 hdrs = INTERNAL_HDRS,
2242 aarch32_copts = [
2243 "-marm",
2244 "-march=armv7-a",
2245 "-mfpu=neon-vfpv4",
2246 ],
2247 aarch32_srcs = NEONFMA_UKERNELS,
2248 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
Marat Dukhanbc69ed62020-06-09 21:34:56 -07002249 apple_aarch32_copts = [
2250 "-mcpu=swift",
2251 "-mtune=generic",
2252 ],
Marat Dukhan33fcf782020-05-24 14:27:15 -07002253 copts = [
2254 "-UNDEBUG",
2255 "-DXNN_TEST_MODE=1",
2256 ],
2257 gcc_copts = xnnpack_gcc_std_copts(),
2258 msvc_copts = xnnpack_msvc_std_copts(),
2259 deps = [
2260 ":tables",
2261 "@FP16",
2262 "@pthreadpool",
2263 ],
2264)
2265
2266xnnpack_cc_library(
Marat Dukhan8853b822020-05-07 12:19:01 -07002267 name = "neonv8_ukernels",
2268 hdrs = INTERNAL_HDRS,
2269 aarch32_copts = [
2270 "-marm",
2271 "-march=armv8-a",
2272 "-mfpu=neon-fp-armv8",
2273 ],
2274 aarch32_srcs = NEONV8_UKERNELS,
2275 aarch64_srcs = NEONV8_UKERNELS,
Marat Dukhanbc69ed62020-06-09 21:34:56 -07002276 apple_aarch32_copts = [
2277 "-mcpu=cyclone",
2278 "-mtune=generic",
2279 ],
Marat Dukhan8853b822020-05-07 12:19:01 -07002280 gcc_copts = xnnpack_gcc_std_copts(),
2281 msvc_copts = xnnpack_msvc_std_copts(),
2282 deps = [
2283 ":tables",
2284 "@FP16",
2285 "@pthreadpool",
2286 ],
2287)
2288
2289xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002290 name = "neonv8_ukernels_test_mode",
2291 hdrs = INTERNAL_HDRS,
2292 aarch32_copts = [
2293 "-marm",
2294 "-march=armv8-a",
2295 "-mfpu=neon-fp-armv8",
2296 ],
2297 aarch32_srcs = NEONV8_UKERNELS,
2298 aarch64_srcs = NEONV8_UKERNELS,
Marat Dukhanbc69ed62020-06-09 21:34:56 -07002299 apple_aarch32_copts = [
2300 "-mcpu=cyclone",
2301 "-mtune=generic",
2302 ],
Marat Dukhan33fcf782020-05-24 14:27:15 -07002303 copts = [
2304 "-UNDEBUG",
2305 "-DXNN_TEST_MODE=1",
2306 ],
2307 gcc_copts = xnnpack_gcc_std_copts(),
2308 msvc_copts = xnnpack_msvc_std_copts(),
2309 deps = [
2310 ":tables",
2311 "@FP16",
2312 "@pthreadpool",
2313 ],
2314)
2315
2316xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002317 name = "neonfp16arith_ukernels",
2318 hdrs = INTERNAL_HDRS,
2319 aarch64_copts = ["-march=armv8.2-a+fp16"],
2320 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002321 gcc_copts = xnnpack_gcc_std_copts(),
2322 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -08002323 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002324 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002325 "@FP16",
2326 "@pthreadpool",
2327 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002328)
2329
2330xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002331 name = "neonfp16arith_ukernels_test_mode",
2332 hdrs = INTERNAL_HDRS,
2333 aarch64_copts = ["-march=armv8.2-a+fp16"],
2334 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
2335 copts = [
2336 "-UNDEBUG",
2337 "-DXNN_TEST_MODE=1",
2338 ],
2339 gcc_copts = xnnpack_gcc_std_copts(),
2340 msvc_copts = xnnpack_msvc_std_copts(),
2341 deps = [
2342 ":tables",
2343 "@FP16",
2344 "@pthreadpool",
2345 ],
2346)
2347
2348xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002349 name = "sse2_ukernels",
2350 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002351 gcc_copts = xnnpack_gcc_std_copts(),
2352 gcc_x86_copts = ["-msse2"],
2353 msvc_copts = xnnpack_msvc_std_copts(),
2354 msvc_x86_32_copts = ["/arch:SSE2"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002355 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08002356 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002357 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002358 "@FP16",
2359 "@pthreadpool",
2360 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002361)
2362
2363xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002364 name = "sse2_ukernels_test_mode",
2365 hdrs = INTERNAL_HDRS,
2366 copts = [
2367 "-UNDEBUG",
2368 "-DXNN_TEST_MODE=1",
2369 ],
2370 gcc_copts = xnnpack_gcc_std_copts(),
2371 gcc_x86_copts = ["-msse2"],
2372 msvc_copts = xnnpack_msvc_std_copts(),
2373 msvc_x86_32_copts = ["/arch:SSE2"],
2374 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
2375 deps = [
2376 ":tables",
2377 "@FP16",
2378 "@pthreadpool",
2379 ],
2380)
2381
2382xnnpack_cc_library(
Marat Dukhanfe7acb62020-03-09 19:30:05 -07002383 name = "ssse3_ukernels",
2384 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002385 gcc_copts = xnnpack_gcc_std_copts(),
2386 gcc_x86_copts = ["-mssse3"],
2387 msvc_copts = xnnpack_msvc_std_copts(),
2388 msvc_x86_32_copts = ["/arch:SSE2"],
Marat Dukhanfe7acb62020-03-09 19:30:05 -07002389 x86_srcs = SSSE3_UKERNELS,
2390 deps = [
2391 ":tables",
2392 "@FP16",
2393 "@pthreadpool",
2394 ],
2395)
2396
2397xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002398 name = "ssse3_ukernels_test_mode",
2399 hdrs = INTERNAL_HDRS,
2400 copts = [
2401 "-UNDEBUG",
2402 "-DXNN_TEST_MODE=1",
2403 ],
2404 gcc_copts = xnnpack_gcc_std_copts(),
2405 gcc_x86_copts = ["-mssse3"],
2406 msvc_copts = xnnpack_msvc_std_copts(),
2407 msvc_x86_32_copts = ["/arch:SSE2"],
2408 x86_srcs = SSSE3_UKERNELS,
2409 deps = [
2410 ":tables",
2411 "@FP16",
2412 "@pthreadpool",
2413 ],
2414)
2415
2416xnnpack_cc_library(
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08002417 name = "sse41_ukernels",
2418 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002419 gcc_copts = xnnpack_gcc_std_copts(),
2420 gcc_x86_copts = ["-msse4.1"],
2421 msvc_copts = xnnpack_msvc_std_copts(),
2422 msvc_x86_32_copts = ["/arch:SSE2"],
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08002423 x86_srcs = SSE41_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08002424 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002425 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002426 "@FP16",
2427 "@pthreadpool",
2428 ],
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08002429)
2430
2431xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002432 name = "sse41_ukernels_test_mode",
2433 hdrs = INTERNAL_HDRS,
2434 copts = [
2435 "-UNDEBUG",
2436 "-DXNN_TEST_MODE=1",
2437 ],
2438 gcc_copts = xnnpack_gcc_std_copts(),
2439 gcc_x86_copts = ["-msse4.1"],
2440 msvc_copts = xnnpack_msvc_std_copts(),
2441 msvc_x86_32_copts = ["/arch:SSE2"],
2442 x86_srcs = SSE41_UKERNELS,
2443 deps = [
2444 ":tables",
2445 "@FP16",
2446 "@pthreadpool",
2447 ],
2448)
2449
2450xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002451 name = "avx_ukernels",
2452 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002453 gcc_copts = xnnpack_gcc_std_copts(),
2454 gcc_x86_copts = ["-mavx"],
2455 msvc_copts = xnnpack_msvc_std_copts(),
2456 msvc_x86_32_copts = ["/arch:AVX"],
2457 msvc_x86_64_copts = ["/arch:AVX"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002458 x86_srcs = AVX_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08002459 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002460 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002461 "@FP16",
2462 "@pthreadpool",
2463 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002464)
2465
2466xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002467 name = "avx_ukernels_test_mode",
2468 hdrs = INTERNAL_HDRS,
2469 copts = [
2470 "-UNDEBUG",
2471 "-DXNN_TEST_MODE=1",
2472 ],
2473 gcc_copts = xnnpack_gcc_std_copts(),
2474 gcc_x86_copts = ["-mavx"],
2475 msvc_copts = xnnpack_msvc_std_copts(),
2476 msvc_x86_32_copts = ["/arch:AVX"],
2477 msvc_x86_64_copts = ["/arch:AVX"],
2478 x86_srcs = AVX_UKERNELS,
2479 deps = [
2480 ":tables",
2481 "@FP16",
2482 "@pthreadpool",
2483 ],
2484)
2485
2486xnnpack_cc_library(
Marat Dukhanfda12b82019-11-21 12:27:59 -08002487 name = "fma3_ukernels",
2488 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002489 gcc_copts = xnnpack_gcc_std_copts(),
2490 gcc_x86_copts = ["-mfma"],
2491 msvc_copts = xnnpack_msvc_std_copts(),
2492 msvc_x86_32_copts = ["/arch:AVX"],
2493 msvc_x86_64_copts = ["/arch:AVX"],
Marat Dukhanfda12b82019-11-21 12:27:59 -08002494 x86_srcs = FMA3_UKERNELS,
2495 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002496 ":tables",
Marat Dukhanfda12b82019-11-21 12:27:59 -08002497 "@FP16",
2498 "@pthreadpool",
2499 ],
2500)
2501
2502xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002503 name = "fma3_ukernels_test_mode",
2504 hdrs = INTERNAL_HDRS,
2505 copts = [
2506 "-UNDEBUG",
2507 "-DXNN_TEST_MODE=1",
2508 ],
2509 gcc_copts = xnnpack_gcc_std_copts(),
2510 gcc_x86_copts = ["-mfma"],
2511 msvc_copts = xnnpack_msvc_std_copts(),
2512 msvc_x86_32_copts = ["/arch:AVX"],
2513 msvc_x86_64_copts = ["/arch:AVX"],
2514 x86_srcs = FMA3_UKERNELS,
2515 deps = [
2516 ":tables",
2517 "@FP16",
2518 "@pthreadpool",
2519 ],
2520)
2521
2522xnnpack_cc_library(
Marat Dukhan6adff4e2019-10-14 18:32:07 -07002523 name = "avx2_ukernels",
2524 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002525 gcc_copts = xnnpack_gcc_std_copts(),
2526 gcc_x86_copts = [
Marat Dukhan6adff4e2019-10-14 18:32:07 -07002527 "-mfma",
2528 "-mavx2",
2529 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07002530 msvc_copts = xnnpack_msvc_std_copts(),
2531 msvc_x86_32_copts = ["/arch:AVX2"],
2532 msvc_x86_64_copts = ["/arch:AVX2"],
Marat Dukhan6adff4e2019-10-14 18:32:07 -07002533 x86_srcs = AVX2_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08002534 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002535 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002536 "@FP16",
2537 "@pthreadpool",
2538 ],
Marat Dukhan6adff4e2019-10-14 18:32:07 -07002539)
2540
2541xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002542 name = "avx2_ukernels_test_mode",
2543 hdrs = INTERNAL_HDRS,
2544 copts = [
2545 "-UNDEBUG",
2546 "-DXNN_TEST_MODE=1",
2547 ],
2548 gcc_copts = xnnpack_gcc_std_copts(),
2549 gcc_x86_copts = [
2550 "-mfma",
2551 "-mavx2",
2552 ],
2553 msvc_copts = xnnpack_msvc_std_copts(),
2554 msvc_x86_32_copts = ["/arch:AVX2"],
2555 msvc_x86_64_copts = ["/arch:AVX2"],
2556 x86_srcs = AVX2_UKERNELS,
2557 deps = [
2558 ":tables",
2559 "@FP16",
2560 "@pthreadpool",
2561 ],
2562)
2563
2564xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002565 name = "avx512f_ukernels",
2566 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002567 gcc_copts = xnnpack_gcc_std_copts(),
2568 gcc_x86_copts = ["-mavx512f"],
2569 mingw_copts = ["-fno-asynchronous-unwind-tables"],
2570 msvc_copts = xnnpack_msvc_std_copts(),
2571 msvc_x86_32_copts = ["/arch:AVX512"],
2572 msvc_x86_64_copts = ["/arch:AVX512"],
2573 msys_copts = ["-fno-asynchronous-unwind-tables"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002574 x86_srcs = AVX512F_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -08002575 deps = [
Marat Dukhan3a77ea72019-12-23 12:10:24 -08002576 ":tables",
Marat Dukhan04f03be2019-11-19 12:36:47 -08002577 "@FP16",
2578 "@pthreadpool",
2579 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002580)
2581
2582xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002583 name = "avx512f_ukernels_test_mode",
2584 hdrs = INTERNAL_HDRS,
2585 copts = [
2586 "-UNDEBUG",
2587 "-DXNN_TEST_MODE=1",
2588 ],
2589 gcc_copts = xnnpack_gcc_std_copts(),
2590 gcc_x86_copts = ["-mavx512f"],
2591 mingw_copts = ["-fno-asynchronous-unwind-tables"],
2592 msvc_copts = xnnpack_msvc_std_copts(),
2593 msvc_x86_32_copts = ["/arch:AVX512"],
2594 msvc_x86_64_copts = ["/arch:AVX512"],
2595 msys_copts = ["-fno-asynchronous-unwind-tables"],
2596 x86_srcs = AVX512F_UKERNELS,
2597 deps = [
2598 ":tables",
2599 "@FP16",
2600 "@pthreadpool",
2601 ],
2602)
2603
2604xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002605 name = "asm_ukernels",
2606 hdrs = ["src/xnnpack/assembly.h"],
2607 aarch32_srcs = AARCH32_ASM_UKERNELS,
Frank Barchard683f5592020-04-10 00:48:26 -07002608 aarch64_copts = ["-march=armv8.2-a+fp16"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002609 aarch64_srcs = AARCH64_ASM_UKERNELS,
2610)
2611
Marat Dukhan3b59de22020-06-03 20:15:19 -07002612xnnpack_cc_library(
2613 name = "logging_utils",
2614 srcs = LOGGING_SRCS,
2615 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
2616 copts = LOGGING_COPTS + [
2617 "-Isrc",
2618 "-Iinclude",
2619 ] + select({
2620 ":debug_build": [],
2621 "//conditions:default": xnnpack_min_size_copts(),
2622 }),
2623 gcc_copts = xnnpack_gcc_std_copts(),
2624 msvc_copts = xnnpack_msvc_std_copts(),
2625 visibility = xnnpack_visibility(),
2626 deps = [
2627 "@FP16",
2628 "@clog",
2629 "@pthreadpool",
2630 ],
2631)
2632
Marat Dukhan08c4a432019-10-03 09:29:21 -07002633xnnpack_aggregate_library(
2634 name = "ukernels",
2635 aarch32_deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -07002636 ":neon_ukernels",
2637 ":neonfma_ukernels",
Marat Dukhan8853b822020-05-07 12:19:01 -07002638 ":neonv8_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002639 ":asm_ukernels",
2640 ],
2641 aarch64_deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -07002642 ":neon_ukernels",
2643 ":neonfma_ukernels",
Marat Dukhan8853b822020-05-07 12:19:01 -07002644 ":neonv8_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002645 ":neonfp16arith_ukernels",
2646 ":asm_ukernels",
2647 ],
Marat Dukhan33fcf782020-05-24 14:27:15 -07002648 generic_deps = [
2649 ":scalar_ukernels",
2650 ],
Marat Dukhan500b8892020-04-15 17:09:50 -07002651 psimd_deps = [
Marat Dukhan8d3c07e2020-01-02 01:20:59 -08002652 ":psimd_fastmath_ukernels",
2653 ":psimd_accmath_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002654 ],
Marat Dukhan33fcf782020-05-24 14:27:15 -07002655 wasm_deps = [
2656 ":wasm_ukernels",
2657 ],
2658 wasmsimd_deps = [
2659 ":wasm_ukernels",
2660 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002661 x86_deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -07002662 ":sse2_ukernels",
Marat Dukhanfe7acb62020-03-09 19:30:05 -07002663 ":ssse3_ukernels",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08002664 ":sse41_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002665 ":avx_ukernels",
Marat Dukhanfda12b82019-11-21 12:27:59 -08002666 ":fma3_ukernels",
Marat Dukhan6adff4e2019-10-14 18:32:07 -07002667 ":avx2_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002668 ":avx512f_ukernels",
2669 ],
2670)
2671
Marat Dukhan33fcf782020-05-24 14:27:15 -07002672xnnpack_aggregate_library(
2673 name = "ukernels_test_mode",
2674 aarch32_deps = [
2675 ":neon_ukernels_test_mode",
2676 ":neonfma_ukernels_test_mode",
2677 ":neonv8_ukernels_test_mode",
2678 ":asm_ukernels",
2679 ],
2680 aarch64_deps = [
2681 ":neon_ukernels_test_mode",
2682 ":neonfma_ukernels_test_mode",
2683 ":neonv8_ukernels_test_mode",
2684 ":neonfp16arith_ukernels_test_mode",
2685 ":asm_ukernels",
2686 ],
2687 generic_deps = [
2688 ":scalar_ukernels_test_mode",
2689 ],
2690 psimd_deps = [
2691 ":psimd_fastmath_ukernels_test_mode",
2692 ":psimd_accmath_ukernels_test_mode",
2693 ],
2694 wasm_deps = [
2695 ":wasm_ukernels_test_mode",
2696 ],
2697 wasmsimd_deps = [
2698 ":wasm_ukernels_test_mode",
2699 ],
2700 x86_deps = [
2701 ":sse2_ukernels_test_mode",
2702 ":ssse3_ukernels_test_mode",
2703 ":sse41_ukernels_test_mode",
2704 ":avx_ukernels_test_mode",
2705 ":fma3_ukernels_test_mode",
2706 ":avx2_ukernels_test_mode",
2707 ":avx512f_ukernels_test_mode",
2708 ],
2709)
2710
Marat Dukhan08c4a432019-10-03 09:29:21 -07002711xnnpack_cc_library(
2712 name = "im2col",
2713 srcs = ["src/im2col.c"],
2714 hdrs = [
2715 "src/xnnpack/common.h",
2716 "src/xnnpack/im2col.h",
2717 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07002718 gcc_copts = xnnpack_gcc_std_copts(),
2719 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002720)
2721
2722xnnpack_cc_library(
2723 name = "indirection",
2724 srcs = ["src/indirection.c"],
2725 hdrs = INTERNAL_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002726 gcc_copts = xnnpack_gcc_std_copts(),
2727 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002728 deps = [
2729 "@FP16",
2730 "@FXdiv",
2731 "@pthreadpool",
2732 ],
2733)
2734
2735xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002736 name = "indirection_test_mode",
2737 srcs = ["src/indirection.c"],
2738 hdrs = INTERNAL_HDRS,
2739 copts = [
2740 "-UNDEBUG",
2741 "-DXNN_TEST_MODE=1",
2742 ],
2743 gcc_copts = xnnpack_gcc_std_copts(),
2744 msvc_copts = xnnpack_msvc_std_copts(),
2745 deps = [
2746 "@FP16",
2747 "@FXdiv",
2748 "@pthreadpool",
2749 ],
2750)
2751
2752xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002753 name = "operator_run",
2754 srcs = ["src/operator-run.c"],
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07002755 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002756 copts = LOGGING_COPTS + select({
Marat Dukhan05702cf2020-03-26 15:41:33 -07002757 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2758 "//conditions:default": [],
2759 }),
Marat Dukhan10a38082020-04-17 03:58:35 -07002760 gcc_copts = xnnpack_gcc_std_copts(),
2761 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002762 deps = [
Marat Dukhan3b59de22020-06-03 20:15:19 -07002763 ":logging_utils",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002764 "@FP16",
2765 "@FXdiv",
2766 "@clog",
2767 "@pthreadpool",
2768 ],
2769)
2770
Chao Mei6ddfc602020-05-13 22:29:36 -07002771xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002772 name = "operator_run_test_mode",
2773 srcs = ["src/operator-run.c"],
2774 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
2775 copts = LOGGING_COPTS + [
2776 "-UNDEBUG",
2777 "-DXNN_TEST_MODE=1",
2778 ] + select({
2779 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2780 "//conditions:default": [],
2781 }),
2782 gcc_copts = xnnpack_gcc_std_copts(),
2783 msvc_copts = xnnpack_msvc_std_copts(),
2784 deps = [
Marat Dukhan3b59de22020-06-03 20:15:19 -07002785 ":logging_utils",
Marat Dukhan33fcf782020-05-24 14:27:15 -07002786 "@FP16",
2787 "@FXdiv",
2788 "@clog",
2789 "@pthreadpool",
2790 ],
2791)
2792
2793xnnpack_cc_library(
Chao Mei6ddfc602020-05-13 22:29:36 -07002794 name = "memory_planner",
2795 srcs = ["src/memory-planner.c"],
2796 hdrs = INTERNAL_HDRS,
2797 defines = select({
2798 ":xnn_enable_memopt_explicit_true": ["XNN_ENABLE_MEMOPT=1"],
2799 ":xnn_enable_memopt_explicit_false": ["XNN_ENABLE_MEMOPT=0"],
2800 "//conditions:default": ["XNN_ENABLE_MEMOPT=1"],
2801 }),
2802 gcc_copts = xnnpack_gcc_std_copts(),
2803 msvc_copts = xnnpack_msvc_std_copts(),
2804 deps = [
Marat Dukhan3b59de22020-06-03 20:15:19 -07002805 ":logging_utils",
Chao Mei6ddfc602020-05-13 22:29:36 -07002806 "@pthreadpool",
2807 ],
2808)
2809
Marat Dukhan33fcf782020-05-24 14:27:15 -07002810xnnpack_cc_library(
2811 name = "memory_planner_test_mode",
2812 srcs = ["src/memory-planner.c"],
2813 hdrs = INTERNAL_HDRS,
2814 copts = [
2815 "-UNDEBUG",
2816 "-DXNN_TEST_MODE=1",
2817 ],
2818 defines = select({
2819 ":xnn_enable_memopt_explicit_true": ["XNN_ENABLE_MEMOPT=1"],
2820 ":xnn_enable_memopt_explicit_false": ["XNN_ENABLE_MEMOPT=0"],
2821 "//conditions:default": ["XNN_ENABLE_MEMOPT=1"],
2822 }),
2823 gcc_copts = xnnpack_gcc_std_copts(),
2824 msvc_copts = xnnpack_msvc_std_copts(),
2825 deps = [
Marat Dukhan3b59de22020-06-03 20:15:19 -07002826 ":logging_utils",
Marat Dukhan33fcf782020-05-24 14:27:15 -07002827 "@pthreadpool",
2828 ],
2829)
2830
Marat Dukhan08c4a432019-10-03 09:29:21 -07002831cc_library(
2832 name = "enable_assembly",
2833 defines = select({
2834 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
2835 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
Frank Barchard810171d2019-10-10 10:34:51 -07002836 "//conditions:default": ["XNN_ENABLE_ASSEMBLY=1"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002837 }),
2838)
2839
Marat Dukhancf056b22019-10-07 10:26:29 -07002840xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002841 name = "operators",
2842 srcs = OPERATOR_SRCS + [
Marat Dukhan04f03be2019-11-19 12:36:47 -08002843 "src/memory.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002844 "src/operator-delete.c",
Marat Dukhancf056b22019-10-07 10:26:29 -07002845 ],
2846 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002847 copts = LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -07002848 "-Isrc",
2849 "-Iinclude",
2850 ] + select({
2851 ":debug_build": [],
2852 "//conditions:default": xnnpack_min_size_copts(),
Marat Dukhan05702cf2020-03-26 15:41:33 -07002853 }) + select({
2854 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2855 "//conditions:default": [],
Marat Dukhan08c4a432019-10-03 09:29:21 -07002856 }),
Marat Dukhan10a38082020-04-17 03:58:35 -07002857 gcc_copts = xnnpack_gcc_std_copts(),
2858 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002859 deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -07002860 ":indirection",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002861 ":logging_utils",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002862 "@FP16",
2863 "@FXdiv",
2864 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002865 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002866 ],
2867)
2868
Marat Dukhan10a38082020-04-17 03:58:35 -07002869xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002870 name = "operators_test_mode",
2871 srcs = OPERATOR_SRCS + [
2872 "src/memory.c",
2873 "src/operator-delete.c",
2874 ],
2875 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
2876 copts = LOGGING_COPTS + [
2877 "-Isrc",
2878 "-Iinclude",
2879 "-UNDEBUG",
2880 "-DXNN_TEST_MODE=1",
2881 ] + select({
2882 ":debug_build": [],
2883 "//conditions:default": xnnpack_min_size_copts(),
2884 }) + select({
2885 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2886 "//conditions:default": [],
2887 }),
2888 gcc_copts = xnnpack_gcc_std_copts(),
2889 msvc_copts = xnnpack_msvc_std_copts(),
2890 deps = [
2891 ":indirection_test_mode",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002892 ":logging_utils",
Marat Dukhan33fcf782020-05-24 14:27:15 -07002893 "@FP16",
2894 "@FXdiv",
2895 "@clog",
2896 "@pthreadpool",
2897 ],
2898)
2899
2900xnnpack_cc_library(
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002901 name = "XNNPACK",
2902 srcs = [
2903 "src/init.c",
Marat Dukhanccfdbd12020-02-03 14:27:45 -08002904 "src/runtime.c",
2905 "src/subgraph.c",
2906 "src/tensor.c",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002907 ] + SUBGRAPH_SRCS + LOGGING_SRCS,
Marat Dukhan10a38082020-04-17 03:58:35 -07002908 hdrs = ["include/xnnpack.h"],
2909 copts = LOGGING_COPTS + [
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002910 "-Isrc",
2911 "-Iinclude",
2912 ] + select({
2913 ":debug_build": [],
2914 "//conditions:default": xnnpack_min_size_copts(),
Marat Dukhan05702cf2020-03-26 15:41:33 -07002915 }) + select({
2916 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2917 "//conditions:default": [],
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002918 }),
Marat Dukhan10a38082020-04-17 03:58:35 -07002919 gcc_copts = xnnpack_gcc_std_copts(),
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002920 includes = ["include"],
Marat Dukhan10a38082020-04-17 03:58:35 -07002921 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002922 visibility = xnnpack_visibility(),
2923 deps = [
2924 ":enable_assembly",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002925 ":logging_utils",
Chao Mei6ddfc602020-05-13 22:29:36 -07002926 ":memory_planner",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002927 ":operator_run",
2928 ":operators",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002929 ":ukernels",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002930 "@clog",
Marat Dukhanab2946c2020-05-21 20:04:13 -07002931 "@FP16",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002932 "@pthreadpool",
Marat Dukhand343c222019-10-07 09:22:14 -07002933 ] + select({
2934 ":emscripten": [],
2935 "//conditions:default": ["@cpuinfo"],
2936 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -07002937)
2938
Marat Dukhan10a38082020-04-17 03:58:35 -07002939xnnpack_cc_library(
Marat Dukhan33fcf782020-05-24 14:27:15 -07002940 name = "XNNPACK_test_mode",
2941 srcs = [
2942 "src/init.c",
2943 "src/runtime.c",
2944 "src/subgraph.c",
2945 "src/tensor.c",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002946 ] + SUBGRAPH_SRCS + LOGGING_SRCS,
Marat Dukhan33fcf782020-05-24 14:27:15 -07002947 hdrs = ["include/xnnpack.h"],
2948 copts = LOGGING_COPTS + [
2949 "-Isrc",
2950 "-Iinclude",
2951 "-UNDEBUG",
2952 "-DXNN_TEST_MODE=1",
2953 ] + select({
2954 ":debug_build": [],
2955 "//conditions:default": xnnpack_min_size_copts(),
2956 }) + select({
2957 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2958 "//conditions:default": [],
2959 }),
2960 gcc_copts = xnnpack_gcc_std_copts(),
2961 includes = ["include"],
2962 msvc_copts = xnnpack_msvc_std_copts(),
2963 visibility = xnnpack_visibility(),
2964 deps = [
2965 ":enable_assembly",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002966 ":logging_utils",
Marat Dukhan33fcf782020-05-24 14:27:15 -07002967 ":memory_planner_test_mode",
Marat Dukhan33fcf782020-05-24 14:27:15 -07002968 ":operator_run_test_mode",
2969 ":operators_test_mode",
Marat Dukhan3b59de22020-06-03 20:15:19 -07002970 ":ukernels_test_mode",
Marat Dukhan33fcf782020-05-24 14:27:15 -07002971 "@clog",
2972 "@FP16",
2973 "@pthreadpool",
2974 ] + select({
2975 ":emscripten": [],
2976 "//conditions:default": ["@cpuinfo"],
2977 }),
2978)
2979
2980xnnpack_cc_library(
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002981 name = "xnnpack_operators_nhwc_f32",
2982 srcs = [
2983 "src/init.c",
2984 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07002985 hdrs = ["include/xnnpack.h"],
2986 copts = LOGGING_COPTS + [
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002987 "-Isrc",
2988 "-Iinclude",
2989 ] + select({
2990 ":debug_build": [],
2991 "//conditions:default": xnnpack_min_size_copts(),
Marat Dukhan05702cf2020-03-26 15:41:33 -07002992 }) + select({
2993 ":xnn_enable_hmp_explicit_false": ["-DXNN_MAX_UARCH_TYPES=1"],
2994 "//conditions:default": [],
Marat Dukhan8fe54e42019-10-10 14:12:59 -07002995 }),
2996 defines = [
2997 "XNN_NO_Q8_OPERATORS",
2998 "XNN_NO_U8_OPERATORS",
2999 "XNN_NO_X8_OPERATORS",
Marat Dukhanefc47b82019-11-18 09:25:38 -08003000 "XNN_NO_NCHW_OPERATORS",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07003001 ],
Marat Dukhan10a38082020-04-17 03:58:35 -07003002 gcc_copts = xnnpack_gcc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003003 includes = ["include"],
Marat Dukhan10a38082020-04-17 03:58:35 -07003004 msvc_copts = xnnpack_msvc_std_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003005 visibility = xnnpack_visibility(),
3006 deps = [
Marat Dukhan8fe54e42019-10-10 14:12:59 -07003007 ":enable_assembly",
Marat Dukhan3b59de22020-06-03 20:15:19 -07003008 ":logging_utils",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003009 ":operator_run",
3010 ":operators",
Marat Dukhan3b59de22020-06-03 20:15:19 -07003011 ":ukernels",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07003012 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003013 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07003014 ] + select({
3015 ":emscripten": [],
3016 "//conditions:default": ["@cpuinfo"],
3017 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003018)
3019
Marat Dukhancf056b22019-10-07 10:26:29 -07003020xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003021 name = "bench_utils",
3022 srcs = ["bench/utils.cc"],
3023 hdrs = ["bench/utils.h"],
Marat Dukhanbad48fe2019-11-04 10:35:22 -08003024 deps = [
3025 "@com_google_benchmark//:benchmark",
3026 "@cpuinfo",
3027 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07003028)
3029
Frank Barchard7e955972019-10-11 10:34:25 -07003030######################### Benchmarks for micro-kernels #########################
Marat Dukhan08c4a432019-10-03 09:29:21 -07003031
3032xnnpack_benchmark(
3033 name = "q8_gemm_bench",
3034 srcs = [
3035 "bench/gemm.h",
3036 "bench/q8-gemm.cc",
3037 "src/xnnpack/AlignedAllocator.h",
3038 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07003039 copts = xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003040 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
3041)
3042
3043xnnpack_benchmark(
Frank Barchard40d20fe2020-05-05 00:37:45 -07003044 name = "f16_igemm_bench",
3045 srcs = [
3046 "bench/f16-igemm.cc",
3047 "bench/conv.h",
3048 "bench/google/conv.h",
3049 "src/xnnpack/AlignedAllocator.h",
3050 ] + MICROKERNEL_BENCHMARK_HDRS,
3051 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
3052)
3053
3054xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003055 name = "f16_gemm_bench",
3056 srcs = [
3057 "bench/f16-gemm.cc",
3058 "bench/gemm.h",
3059 "src/xnnpack/AlignedAllocator.h",
3060 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07003061 deps = MICROKERNEL_BENCHMARK_DEPS,
3062)
3063
3064xnnpack_benchmark(
Marat Dukhanbdb56f52020-02-05 21:42:49 -08003065 name = "f16_spmm_bench",
3066 srcs = [
3067 "bench/f16-spmm.cc",
3068 "bench/gemm.h",
Marat Dukhanbdb56f52020-02-05 21:42:49 -08003069 "src/xnnpack/AlignedAllocator.h",
3070 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhanbdb56f52020-02-05 21:42:49 -08003071 deps = MICROKERNEL_BENCHMARK_DEPS,
3072)
3073
3074xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003075 name = "f32_igemm_bench",
3076 srcs = [
3077 "bench/f32-igemm.cc",
3078 "bench/conv.h",
3079 "src/xnnpack/AlignedAllocator.h",
3080 ] + MICROKERNEL_BENCHMARK_HDRS,
Frank Barchard7e955972019-10-11 10:34:25 -07003081 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07003082)
3083
3084xnnpack_benchmark(
3085 name = "f32_conv_hwc_bench",
3086 srcs = [
3087 "bench/f32-conv-hwc.cc",
3088 "bench/dconv.h",
3089 "src/xnnpack/AlignedAllocator.h",
3090 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07003091 deps = MICROKERNEL_BENCHMARK_DEPS,
3092)
3093
3094xnnpack_benchmark(
Marat Dukhan1f29b802020-05-15 23:46:39 -07003095 name = "f32_conv_hwc2chw_bench",
Erich Elsen563df5f2019-10-23 08:02:21 -07003096 srcs = [
Marat Dukhan1f29b802020-05-15 23:46:39 -07003097 "bench/f32-conv-hwc2chw.cc",
Erich Elsen563df5f2019-10-23 08:02:21 -07003098 "bench/dconv.h",
3099 "src/xnnpack/AlignedAllocator.h",
3100 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
Erich Elsen563df5f2019-10-23 08:02:21 -07003101 deps = MICROKERNEL_BENCHMARK_DEPS,
3102)
3103
3104xnnpack_benchmark(
Frank Barchard5a599a62020-06-04 20:12:44 -07003105 name = "f16_dwconv_bench",
3106 srcs = [
3107 "bench/f16-dwconv.cc",
3108 "bench/dwconv.h",
3109 "bench/google/dwconv.h",
3110 "src/xnnpack/AlignedAllocator.h",
3111 ] + MICROKERNEL_BENCHMARK_HDRS,
3112 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
3113)
3114
3115xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003116 name = "f32_dwconv_bench",
3117 srcs = [
3118 "bench/f32-dwconv.cc",
3119 "bench/dwconv.h",
3120 "src/xnnpack/AlignedAllocator.h",
3121 ] + MICROKERNEL_BENCHMARK_HDRS,
3122 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
3123)
3124
3125xnnpack_benchmark(
Marat Dukhan1f29b802020-05-15 23:46:39 -07003126 name = "f32_dwconv_chw_bench",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003127 srcs = [
Marat Dukhan1f29b802020-05-15 23:46:39 -07003128 "bench/f32-dwconv-chw.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003129 "bench/dwconv.h",
3130 "src/xnnpack/AlignedAllocator.h",
3131 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
3132 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
3133)
3134
3135xnnpack_benchmark(
3136 name = "f32_gemm_bench",
3137 srcs = [
3138 "bench/f32-gemm.cc",
3139 "bench/gemm.h",
3140 "src/xnnpack/AlignedAllocator.h",
3141 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07003142 copts = xnnpack_optional_ruy_copts(),
Frank Barchard7e955972019-10-11 10:34:25 -07003143 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003144)
3145
3146xnnpack_benchmark(
Marat Dukhan4c4eb002019-12-08 21:27:49 -08003147 name = "f32_raddexpminusmax_bench",
3148 srcs = [
3149 "bench/f32-raddexpminusmax.cc",
3150 "src/xnnpack/AlignedAllocator.h",
3151 ] + MICROKERNEL_BENCHMARK_HDRS,
3152 deps = MICROKERNEL_BENCHMARK_DEPS,
3153)
3154
3155xnnpack_benchmark(
3156 name = "f32_raddextexp_bench",
3157 srcs = [
3158 "bench/f32-raddextexp.cc",
3159 "src/xnnpack/AlignedAllocator.h",
3160 ] + MICROKERNEL_BENCHMARK_HDRS,
3161 deps = MICROKERNEL_BENCHMARK_DEPS,
3162)
3163
3164xnnpack_benchmark(
3165 name = "f32_raddstoreexpminusmax_bench",
3166 srcs = [
3167 "bench/f32-raddstoreexpminusmax.cc",
3168 "src/xnnpack/AlignedAllocator.h",
3169 ] + MICROKERNEL_BENCHMARK_HDRS,
3170 deps = MICROKERNEL_BENCHMARK_DEPS,
3171)
3172
3173xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003174 name = "f32_rmax_bench",
3175 srcs = [
3176 "bench/f32-rmax.cc",
3177 "src/xnnpack/AlignedAllocator.h",
3178 ] + MICROKERNEL_BENCHMARK_HDRS,
3179 deps = MICROKERNEL_BENCHMARK_DEPS,
3180)
3181
3182xnnpack_benchmark(
Marat Dukhan14bec502019-11-18 11:35:31 -08003183 name = "f32_sigmoid_bench",
3184 srcs = [
3185 "bench/f32-sigmoid.cc",
3186 "src/xnnpack/AlignedAllocator.h",
3187 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan14bec502019-11-18 11:35:31 -08003188 deps = MICROKERNEL_BENCHMARK_DEPS,
3189)
3190
3191xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003192 name = "f32_spmm_bench",
3193 srcs = [
3194 "bench/f32-spmm.cc",
3195 "bench/gemm.h",
3196 "src/xnnpack/AlignedAllocator.h",
3197 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07003198 deps = MICROKERNEL_BENCHMARK_DEPS,
3199)
3200
3201xnnpack_benchmark(
Marat Dukhanfd8e6892020-01-27 15:25:25 -08003202 name = "f32_softmax_bench",
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -07003203 srcs = [
Marat Dukhanfd8e6892020-01-27 15:25:25 -08003204 "bench/f32-softmax.cc",
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -07003205 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan10a38082020-04-17 03:58:35 -07003206 copts = xnnpack_optional_dnnl_copts(),
Marat Dukhan8d3c6932020-03-06 20:27:27 -08003207 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_dnnl_deps(),
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -07003208)
3209
3210xnnpack_benchmark(
Marat Dukhan4c4eb002019-12-08 21:27:49 -08003211 name = "f32_vscaleexpminusmax_bench",
3212 srcs = [
3213 "bench/f32-vscaleexpminusmax.cc",
3214 "src/xnnpack/AlignedAllocator.h",
3215 ] + MICROKERNEL_BENCHMARK_HDRS,
3216 deps = MICROKERNEL_BENCHMARK_DEPS,
3217)
3218
3219xnnpack_benchmark(
3220 name = "f32_vscaleextexp_bench",
3221 srcs = [
3222 "bench/f32-vscaleextexp.cc",
3223 "src/xnnpack/AlignedAllocator.h",
3224 ] + MICROKERNEL_BENCHMARK_HDRS,
3225 deps = MICROKERNEL_BENCHMARK_DEPS,
3226)
3227
3228xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003229 name = "f32_im2col_gemm_bench",
3230 srcs = [
3231 "bench/f32-im2col-gemm.cc",
3232 "bench/conv.h",
3233 "src/xnnpack/AlignedAllocator.h",
3234 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
3235 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
3236)
3237
Marat Dukhanfe7acb62020-03-09 19:30:05 -07003238xnnpack_benchmark(
3239 name = "requantization_bench",
3240 srcs = [
3241 "bench/requantization.cc",
3242 "src/xnnpack/requantization-stubs.h",
3243 "src/xnnpack/AlignedAllocator.h",
3244 ] + MICROKERNEL_BENCHMARK_HDRS,
3245 deps = MICROKERNEL_BENCHMARK_DEPS,
3246)
3247
Marat Dukhanffbf96a2020-05-14 02:59:08 -07003248xnnpack_benchmark(
3249 name = "rounding_bench",
3250 srcs = [
3251 "bench/rounding.cc",
3252 "src/xnnpack/math-stubs.h",
3253 "src/xnnpack/AlignedAllocator.h",
3254 ] + MICROKERNEL_BENCHMARK_HDRS,
3255 deps = MICROKERNEL_BENCHMARK_DEPS,
3256)
3257
Marat Dukhan08c4a432019-10-03 09:29:21 -07003258########################### Benchmarks for operators ###########################
3259
3260xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003261 name = "average_pooling_bench",
3262 srcs = ["bench/average-pooling.cc"],
Marat Dukhan7a16d8b2020-03-11 04:22:44 -07003263 copts = xnnpack_optional_tflite_copts(),
Marat Dukhan8ea0b072020-04-23 16:12:18 -07003264 tags = ["nowin32"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003265 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003266)
3267
3268xnnpack_benchmark(
3269 name = "channel_shuffle_bench",
3270 srcs = ["bench/channel-shuffle.cc"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003271 deps = OPERATOR_BENCHMARK_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07003272)
3273
3274xnnpack_benchmark(
3275 name = "convolution_bench",
3276 srcs = ["bench/convolution.cc"],
3277 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
Marat Dukhan8ea0b072020-04-23 16:12:18 -07003278 tags = ["nowin32"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003279 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003280)
3281
3282xnnpack_benchmark(
3283 name = "deconvolution_bench",
3284 srcs = ["bench/deconvolution.cc"],
3285 copts = xnnpack_optional_tflite_copts(),
Marat Dukhan8ea0b072020-04-23 16:12:18 -07003286 tags = ["nowin32"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003287 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003288)
3289
3290xnnpack_benchmark(
3291 name = "global_average_pooling_bench",
3292 srcs = ["bench/global-average-pooling.cc"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003293 deps = OPERATOR_BENCHMARK_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07003294)
3295
3296xnnpack_benchmark(
3297 name = "max_pooling_bench",
3298 srcs = ["bench/max-pooling.cc"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003299 deps = OPERATOR_BENCHMARK_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07003300)
3301
3302xnnpack_benchmark(
3303 name = "sigmoid_bench",
3304 srcs = ["bench/sigmoid.cc"],
Marat Dukhanc3b9e862019-11-17 13:18:54 -08003305 copts = xnnpack_optional_tflite_copts(),
Marat Dukhanca2ba702020-04-24 01:31:47 -07003306 tags = ["nowin32"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003307 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003308)
3309
3310xnnpack_benchmark(
Marat Dukhan95b22432019-10-30 16:30:14 -07003311 name = "prelu_bench",
3312 srcs = ["bench/prelu.cc"],
3313 copts = xnnpack_optional_tflite_copts(),
Marat Dukhan8ea0b072020-04-23 16:12:18 -07003314 tags = ["nowin32"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003315 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan95b22432019-10-30 16:30:14 -07003316)
3317
3318xnnpack_benchmark(
Marat Dukhanfd8e6892020-01-27 15:25:25 -08003319 name = "softmax_bench",
3320 srcs = ["bench/softmax.cc"],
Marat Dukhan9c0db962020-01-28 12:30:14 -08003321 copts = xnnpack_optional_tflite_copts(),
Marat Dukhanca2ba702020-04-24 01:31:47 -07003322 tags = ["nowin32"],
Marat Dukhan1b354632020-03-23 12:50:22 -07003323 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07003324)
3325
Marat Dukhanc068bb62019-10-04 13:24:39 -07003326############################# End-to-end benchmarks ############################
3327
3328cc_library(
3329 name = "mobilenet_v1",
3330 srcs = ["models/mobilenet-v1.cc"],
3331 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08003332 copts = xnnpack_std_cxxopts(),
Marat Dukhanc068bb62019-10-04 13:24:39 -07003333 linkstatic = True,
3334 deps = [
3335 ":XNNPACK",
3336 "@pthreadpool",
3337 ],
3338)
3339
3340cc_library(
3341 name = "mobilenet_v2",
3342 srcs = ["models/mobilenet-v2.cc"],
3343 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08003344 copts = xnnpack_std_cxxopts(),
Marat Dukhanc068bb62019-10-04 13:24:39 -07003345 linkstatic = True,
3346 deps = [
3347 ":XNNPACK",
3348 "@pthreadpool",
3349 ],
3350)
3351
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003352cc_library(
3353 name = "mobilenet_v3_large",
3354 srcs = ["models/mobilenet-v3-large.cc"],
3355 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08003356 copts = xnnpack_std_cxxopts(),
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003357 linkstatic = True,
3358 deps = [
3359 ":XNNPACK",
3360 "@pthreadpool",
3361 ],
3362)
3363
3364cc_library(
3365 name = "mobilenet_v3_small",
3366 srcs = ["models/mobilenet-v3-small.cc"],
3367 hdrs = ["models/models.h"],
Marat Dukhana84e40b2019-12-11 15:38:03 -08003368 copts = xnnpack_std_cxxopts(),
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003369 linkstatic = True,
3370 deps = [
3371 ":XNNPACK",
3372 "@pthreadpool",
3373 ],
3374)
3375
Marat Dukhanc068bb62019-10-04 13:24:39 -07003376xnnpack_benchmark(
Marat Dukhanef4416e2019-10-31 13:44:40 -07003377 name = "f32_dwconv_e2e_bench",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003378 srcs = [
3379 "bench/f32-dwconv-e2e.cc",
3380 "bench/end2end.h",
3381 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhanef4416e2019-10-31 13:44:40 -07003382 deps = MICROKERNEL_BENCHMARK_DEPS + [
3383 ":XNNPACK",
3384 ":mobilenet_v1",
3385 ":mobilenet_v2",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003386 ":mobilenet_v3_large",
3387 ":mobilenet_v3_small",
Marat Dukhanef4416e2019-10-31 13:44:40 -07003388 ],
3389)
3390
3391xnnpack_benchmark(
Marat Dukhan5f18d262019-10-31 10:24:14 -07003392 name = "f32_gemm_e2e_bench",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003393 srcs = [
3394 "bench/f32-gemm-e2e.cc",
3395 "bench/end2end.h",
3396 ] + MICROKERNEL_BENCHMARK_HDRS,
Marat Dukhan5f18d262019-10-31 10:24:14 -07003397 deps = MICROKERNEL_BENCHMARK_DEPS + [
3398 ":XNNPACK",
3399 ":mobilenet_v1",
3400 ":mobilenet_v2",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003401 ":mobilenet_v3_large",
3402 ":mobilenet_v3_small",
Marat Dukhan5f18d262019-10-31 10:24:14 -07003403 ],
3404)
3405
3406xnnpack_benchmark(
Marat Dukhanc068bb62019-10-04 13:24:39 -07003407 name = "end2end_bench",
3408 srcs = ["bench/end2end.cc"],
3409 deps = [
3410 ":XNNPACK",
Frank Barchardc712fa42019-10-31 14:00:21 -07003411 ":bench_utils",
Marat Dukhanc068bb62019-10-04 13:24:39 -07003412 ":mobilenet_v1",
3413 ":mobilenet_v2",
Marat Dukhanc08cdf52019-12-09 09:17:51 -08003414 ":mobilenet_v3_large",
3415 ":mobilenet_v3_small",
Marat Dukhanc068bb62019-10-04 13:24:39 -07003416 "@pthreadpool",
3417 ],
3418)
3419
Marat Dukhan6adff4e2019-10-14 18:32:07 -07003420#################### Accuracy evaluation for math functions ####################
3421
3422xnnpack_benchmark(
3423 name = "f32_exp_eval",
3424 srcs = [
3425 "eval/f32-exp.cc",
3426 "src/xnnpack/AlignedAllocator.h",
3427 ] + ACCURACY_EVAL_HDRS,
3428 deps = ACCURACY_EVAL_DEPS,
3429)
3430
Marat Dukhan515c9772019-10-17 18:07:57 -07003431xnnpack_benchmark(
3432 name = "f32_expminus_eval",
3433 srcs = [
3434 "eval/f32-expminus.cc",
3435 "src/xnnpack/AlignedAllocator.h",
3436 ] + ACCURACY_EVAL_HDRS,
3437 deps = ACCURACY_EVAL_DEPS,
3438)
3439
Marat Dukhan98ba4412019-10-23 02:14:28 -07003440xnnpack_benchmark(
3441 name = "f32_extexp_eval",
3442 srcs = [
3443 "eval/f32-extexp.cc",
3444 "src/xnnpack/AlignedAllocator.h",
3445 ] + ACCURACY_EVAL_HDRS,
3446 deps = ACCURACY_EVAL_DEPS,
3447)
3448
Marat Dukhan8853b822020-05-07 12:19:01 -07003449xnnpack_unit_test(
3450 name = "f32_roundne_eval",
3451 srcs = [
3452 "eval/f32-roundne.cc",
3453 "src/xnnpack/AlignedAllocator.h",
3454 "src/xnnpack/math-stubs.h",
3455 ] + MICROKERNEL_TEST_HDRS,
Marat Dukhan22eed3d2020-05-11 20:13:37 -07003456 automatic = False,
Marat Dukhan8853b822020-05-07 12:19:01 -07003457 deps = MICROKERNEL_TEST_DEPS,
3458)
3459
Marat Dukhan2dbb9442020-05-12 20:43:43 -07003460xnnpack_unit_test(
Marat Dukhanc9852ba2020-05-13 17:21:29 -07003461 name = "f32_roundd_eval",
3462 srcs = [
3463 "eval/f32-roundd.cc",
3464 "src/xnnpack/AlignedAllocator.h",
3465 "src/xnnpack/math-stubs.h",
3466 ] + MICROKERNEL_TEST_HDRS,
3467 automatic = False,
3468 deps = MICROKERNEL_TEST_DEPS,
3469)
3470
3471xnnpack_unit_test(
3472 name = "f32_roundu_eval",
3473 srcs = [
3474 "eval/f32-roundu.cc",
3475 "src/xnnpack/AlignedAllocator.h",
3476 "src/xnnpack/math-stubs.h",
3477 ] + MICROKERNEL_TEST_HDRS,
3478 automatic = False,
3479 deps = MICROKERNEL_TEST_DEPS,
3480)
3481
3482xnnpack_unit_test(
Marat Dukhan2dbb9442020-05-12 20:43:43 -07003483 name = "f32_roundz_eval",
3484 srcs = [
3485 "eval/f32-roundz.cc",
3486 "src/xnnpack/AlignedAllocator.h",
3487 "src/xnnpack/math-stubs.h",
3488 ] + MICROKERNEL_TEST_HDRS,
Marat Dukhanc9852ba2020-05-13 17:21:29 -07003489 automatic = False,
Marat Dukhan2dbb9442020-05-12 20:43:43 -07003490 deps = MICROKERNEL_TEST_DEPS,
3491)
3492
Marat Dukhan346a9e52019-11-15 09:06:30 -08003493xnnpack_benchmark(
3494 name = "f32_sigmoid_eval",
3495 srcs = [
3496 "eval/f32-sigmoid.cc",
3497 "src/xnnpack/AlignedAllocator.h",
3498 ] + ACCURACY_EVAL_HDRS,
3499 deps = ACCURACY_EVAL_DEPS,
3500)
3501
Marat Dukhan08c4a432019-10-03 09:29:21 -07003502######################### Unit tests for micro-kernels #########################
3503
3504xnnpack_unit_test(
Marat Dukhande06f492020-04-09 00:19:31 -07003505 name = "f16_gemm_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003506 srcs = [
Marat Dukhande06f492020-04-09 00:19:31 -07003507 "test/f16-gemm-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003508 "test/gemm-microkernel-tester.h",
3509 "src/xnnpack/AlignedAllocator.h",
3510 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3511 deps = MICROKERNEL_TEST_DEPS,
3512)
3513
3514xnnpack_unit_test(
Marat Dukhan355ab432020-04-09 19:01:52 -07003515 name = "f16_spmm_minmax_test",
Marat Dukhanbdb56f52020-02-05 21:42:49 -08003516 srcs = [
Marat Dukhan355ab432020-04-09 19:01:52 -07003517 "test/f16-spmm-minmax.cc",
Marat Dukhanbdb56f52020-02-05 21:42:49 -08003518 "test/spmm-microkernel-tester.h",
3519 "src/xnnpack/AlignedAllocator.h",
3520 ] + MICROKERNEL_TEST_HDRS,
3521 deps = MICROKERNEL_TEST_DEPS,
3522)
3523
3524xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003525 name = "f32_argmaxpool_test",
3526 srcs = [
3527 "test/f32-argmaxpool.cc",
3528 "test/argmaxpool-microkernel-tester.h",
3529 "src/xnnpack/AlignedAllocator.h",
3530 ] + MICROKERNEL_TEST_HDRS,
3531 deps = MICROKERNEL_TEST_DEPS,
3532)
3533
3534xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07003535 name = "f32_avgpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003536 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07003537 "test/f32-avgpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003538 "test/avgpool-microkernel-tester.h",
3539 "src/xnnpack/AlignedAllocator.h",
3540 ] + MICROKERNEL_TEST_HDRS,
3541 deps = MICROKERNEL_TEST_DEPS,
3542)
3543
3544xnnpack_unit_test(
Marat Dukhan660fd192020-03-10 04:55:30 -07003545 name = "f32_ibilinear_test",
Marat Dukhan35dacfb2019-11-07 19:18:16 -08003546 srcs = [
Marat Dukhan660fd192020-03-10 04:55:30 -07003547 "test/f32-ibilinear.cc",
3548 "test/ibilinear-microkernel-tester.h",
Marat Dukhan35dacfb2019-11-07 19:18:16 -08003549 "src/xnnpack/AlignedAllocator.h",
3550 ] + MICROKERNEL_TEST_HDRS,
3551 deps = MICROKERNEL_TEST_DEPS,
3552)
3553
3554xnnpack_unit_test(
Frank Barchardb1966592020-05-12 13:47:06 -07003555 name = "f16_clamp_test",
3556 srcs = [
3557 "test/f16-clamp.cc",
3558 "test/clamp-microkernel-tester.h",
3559 ] + MICROKERNEL_TEST_HDRS,
3560 deps = MICROKERNEL_TEST_DEPS,
3561)
3562
3563xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003564 name = "f32_clamp_test",
3565 srcs = [
3566 "test/f32-clamp.cc",
3567 "test/clamp-microkernel-tester.h",
3568 ] + MICROKERNEL_TEST_HDRS,
3569 deps = MICROKERNEL_TEST_DEPS,
3570)
3571
3572xnnpack_unit_test(
Marat Dukhan163a7e62020-04-09 04:19:26 -07003573 name = "f32_igemm_test",
3574 srcs = [
3575 "test/f32-igemm.cc",
3576 "test/gemm-microkernel-tester.h",
3577 "src/xnnpack/AlignedAllocator.h",
3578 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3579 deps = MICROKERNEL_TEST_DEPS,
3580)
3581
3582xnnpack_unit_test(
Marat Dukhan467f6362020-05-22 23:21:55 -07003583 name = "f32_igemm_relu_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003584 srcs = [
Marat Dukhan467f6362020-05-22 23:21:55 -07003585 "test/f32-igemm-relu.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003586 "test/gemm-microkernel-tester.h",
3587 "src/xnnpack/AlignedAllocator.h",
3588 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3589 deps = MICROKERNEL_TEST_DEPS,
3590)
3591
3592xnnpack_unit_test(
Marat Dukhane207b7b2020-05-28 16:27:42 -07003593 name = "f32_igemm_minmax_test",
3594 srcs = [
3595 "test/f32-igemm-minmax.cc",
3596 "test/gemm-microkernel-tester.h",
3597 "src/xnnpack/AlignedAllocator.h",
3598 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3599 deps = MICROKERNEL_TEST_DEPS,
3600)
3601
3602xnnpack_unit_test(
Frank Barchardb0e4fae2020-05-04 15:27:51 -07003603 name = "f16_igemm_minmax_test",
3604 srcs = [
3605 "test/f16-igemm-minmax.cc",
3606 "test/gemm-microkernel-tester.h",
3607 "src/xnnpack/AlignedAllocator.h",
3608 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3609 deps = MICROKERNEL_TEST_DEPS,
3610)
3611
3612xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003613 name = "f32_conv_hwc_test",
3614 srcs = [
3615 "test/f32-conv-hwc.cc",
3616 "test/conv-hwc-microkernel-tester.h",
3617 "src/xnnpack/AlignedAllocator.h",
3618 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3619 deps = MICROKERNEL_TEST_DEPS,
3620)
3621
3622xnnpack_unit_test(
Marat Dukhan1f29b802020-05-15 23:46:39 -07003623 name = "f32_conv_hwc2chw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003624 srcs = [
Marat Dukhan1f29b802020-05-15 23:46:39 -07003625 "test/f32-conv-hwc2chw.cc",
3626 "test/conv-hwc2chw-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003627 "src/xnnpack/AlignedAllocator.h",
3628 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3629 deps = MICROKERNEL_TEST_DEPS,
3630)
3631
3632xnnpack_unit_test(
Marat Dukhan163a7e62020-04-09 04:19:26 -07003633 name = "f32_dwconv_test",
3634 srcs = [
3635 "test/f32-dwconv.cc",
3636 "test/dwconv-microkernel-tester.h",
3637 "src/xnnpack/AlignedAllocator.h",
3638 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3639 deps = MICROKERNEL_TEST_DEPS,
3640)
3641
3642xnnpack_unit_test(
Frank Barchard5a599a62020-06-04 20:12:44 -07003643 name = "f16_dwconv_minmax_test",
3644 srcs = [
3645 "test/f16-dwconv-minmax.cc",
3646 "test/dwconv-microkernel-tester.h",
3647 "src/xnnpack/AlignedAllocator.h",
3648 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3649 deps = MICROKERNEL_TEST_DEPS,
3650)
3651
3652xnnpack_unit_test(
Marat Dukhan1c587112020-04-08 20:04:28 -07003653 name = "f32_dwconv_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003654 srcs = [
Marat Dukhan1c587112020-04-08 20:04:28 -07003655 "test/f32-dwconv-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003656 "test/dwconv-microkernel-tester.h",
3657 "src/xnnpack/AlignedAllocator.h",
3658 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3659 deps = MICROKERNEL_TEST_DEPS,
3660)
3661
3662xnnpack_unit_test(
Marat Dukhan1f29b802020-05-15 23:46:39 -07003663 name = "f32_dwconv_chw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003664 srcs = [
Marat Dukhan1f29b802020-05-15 23:46:39 -07003665 "test/f32-dwconv-chw.cc",
3666 "test/dwconv-chw-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003667 "src/xnnpack/AlignedAllocator.h",
3668 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3669 deps = MICROKERNEL_TEST_DEPS,
3670)
3671
3672xnnpack_unit_test(
Frank Barchard0bb49a72020-06-04 11:35:11 -07003673 name = "f16_gavgpool_minmax_test",
3674 srcs = [
3675 "test/f16-gavgpool-minmax.cc",
3676 "test/gavgpool-microkernel-tester.h",
3677 "src/xnnpack/AlignedAllocator.h",
3678 ] + MICROKERNEL_TEST_HDRS,
3679 deps = MICROKERNEL_TEST_DEPS,
3680)
3681
3682xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07003683 name = "f32_gavgpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003684 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07003685 "test/f32-gavgpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003686 "test/gavgpool-microkernel-tester.h",
3687 "src/xnnpack/AlignedAllocator.h",
3688 ] + MICROKERNEL_TEST_HDRS,
3689 deps = MICROKERNEL_TEST_DEPS,
3690)
3691
3692xnnpack_unit_test(
Marat Dukhan1f29b802020-05-15 23:46:39 -07003693 name = "f32_gavgpool_cw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003694 srcs = [
Marat Dukhan1f29b802020-05-15 23:46:39 -07003695 "test/f32-gavgpool-cw.cc",
3696 "test/gavgpool-cw-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003697 "src/xnnpack/AlignedAllocator.h",
3698 ] + MICROKERNEL_TEST_HDRS,
3699 deps = MICROKERNEL_TEST_DEPS,
3700)
3701
3702xnnpack_unit_test(
Marat Dukhan163a7e62020-04-09 04:19:26 -07003703 name = "f32_gemm_test",
3704 srcs = [
3705 "test/f32-gemm.cc",
3706 "test/gemm-microkernel-tester.h",
3707 "src/xnnpack/AlignedAllocator.h",
3708 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3709 deps = MICROKERNEL_TEST_DEPS,
3710)
3711
3712xnnpack_unit_test(
Marat Dukhan467f6362020-05-22 23:21:55 -07003713 name = "f32_gemm_relu_test",
3714 srcs = [
3715 "test/f32-gemm-relu.cc",
3716 "test/gemm-microkernel-tester.h",
3717 "src/xnnpack/AlignedAllocator.h",
3718 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3719 deps = MICROKERNEL_TEST_DEPS,
3720)
3721
3722xnnpack_unit_test(
Marat Dukhan1c587112020-04-08 20:04:28 -07003723 name = "f32_gemm_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003724 srcs = [
Marat Dukhan1c587112020-04-08 20:04:28 -07003725 "test/f32-gemm-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003726 "test/gemm-microkernel-tester.h",
3727 "src/xnnpack/AlignedAllocator.h",
3728 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3729 deps = MICROKERNEL_TEST_DEPS,
3730)
3731
3732xnnpack_unit_test(
Marat Dukhan1c587112020-04-08 20:04:28 -07003733 name = "f32_gemminc_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003734 srcs = [
Marat Dukhan1c587112020-04-08 20:04:28 -07003735 "test/f32-gemminc-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003736 "test/gemm-microkernel-tester.h",
3737 "src/xnnpack/AlignedAllocator.h",
3738 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3739 deps = MICROKERNEL_TEST_DEPS,
3740)
3741
3742xnnpack_unit_test(
Frank Barchardb1966592020-05-12 13:47:06 -07003743 name = "f16_hswish_test",
3744 srcs = [
3745 "test/f16-hswish.cc",
3746 "test/hswish-microkernel-tester.h",
3747 ] + MICROKERNEL_TEST_HDRS,
3748 deps = MICROKERNEL_TEST_DEPS,
3749)
3750
3751xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003752 name = "f32_hswish_test",
3753 srcs = [
3754 "test/f32-hswish.cc",
3755 "test/hswish-microkernel-tester.h",
3756 ] + MICROKERNEL_TEST_HDRS,
3757 deps = MICROKERNEL_TEST_DEPS,
3758)
3759
3760xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07003761 name = "f32_maxpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003762 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07003763 "test/f32-maxpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003764 "test/maxpool-microkernel-tester.h",
3765 ] + MICROKERNEL_TEST_HDRS,
3766 deps = MICROKERNEL_TEST_DEPS,
3767)
3768
3769xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07003770 name = "f32_pavgpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003771 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07003772 "test/f32-pavgpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003773 "test/avgpool-microkernel-tester.h",
3774 "src/xnnpack/AlignedAllocator.h",
3775 ] + MICROKERNEL_TEST_HDRS,
3776 deps = MICROKERNEL_TEST_DEPS,
3777)
3778
3779xnnpack_unit_test(
Marat Dukhan1c587112020-04-08 20:04:28 -07003780 name = "f32_ppmm_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003781 srcs = [
Marat Dukhan1c587112020-04-08 20:04:28 -07003782 "test/f32-ppmm-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003783 "test/gemm-microkernel-tester.h",
3784 "src/xnnpack/AlignedAllocator.h",
3785 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
3786 deps = MICROKERNEL_TEST_DEPS,
3787)
3788
3789xnnpack_unit_test(
Frank Barchardb1966592020-05-12 13:47:06 -07003790 name = "f16_prelu_test",
3791 srcs = [
3792 "test/f16-prelu.cc",
3793 "test/prelu-microkernel-tester.h",
3794 "src/xnnpack/AlignedAllocator.h",
3795 ] + MICROKERNEL_TEST_HDRS,
3796 deps = MICROKERNEL_TEST_DEPS,
3797)
3798
3799xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003800 name = "f32_prelu_test",
3801 srcs = [
3802 "test/f32-prelu.cc",
3803 "test/prelu-microkernel-tester.h",
3804 "src/xnnpack/AlignedAllocator.h",
3805 ] + MICROKERNEL_TEST_HDRS,
3806 deps = MICROKERNEL_TEST_DEPS,
3807)
3808
3809xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07003810 name = "f32_raddexpminusmax_test",
3811 srcs = [
3812 "test/f32-raddexpminusmax.cc",
3813 "test/raddexpminusmax-microkernel-tester.h",
3814 ] + MICROKERNEL_TEST_HDRS,
3815 deps = MICROKERNEL_TEST_DEPS,
3816)
3817
3818xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07003819 name = "f32_raddextexp_test",
3820 srcs = [
3821 "test/f32-raddextexp.cc",
3822 "test/raddextexp-microkernel-tester.h",
3823 ] + MICROKERNEL_TEST_HDRS,
3824 deps = MICROKERNEL_TEST_DEPS,
3825)
3826
3827xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07003828 name = "f32_raddstoreexpminusmax_test",
3829 srcs = [
3830 "test/f32-raddstoreexpminusmax.cc",
3831 "test/raddstoreexpminusmax-microkernel-tester.h",
3832 ] + MICROKERNEL_TEST_HDRS,
3833 deps = MICROKERNEL_TEST_DEPS,
3834)
3835
3836xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07003837 name = "f32_rmax_test",
3838 srcs = [
3839 "test/f32-rmax.cc",
3840 "test/rmax-microkernel-tester.h",
3841 ] + MICROKERNEL_TEST_HDRS,
3842 deps = MICROKERNEL_TEST_DEPS,
3843)
3844
3845xnnpack_unit_test(
Marat Dukhan346a9e52019-11-15 09:06:30 -08003846 name = "f32_sigmoid_test",
3847 srcs = [
3848 "test/f32-sigmoid.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08003849 "test/vunary-microkernel-tester.h",
Marat Dukhan346a9e52019-11-15 09:06:30 -08003850 ] + MICROKERNEL_TEST_HDRS,
3851 deps = MICROKERNEL_TEST_DEPS,
3852)
3853
3854xnnpack_unit_test(
Marat Dukhan355ab432020-04-09 19:01:52 -07003855 name = "f32_spmm_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003856 srcs = [
Marat Dukhan355ab432020-04-09 19:01:52 -07003857 "test/f32-spmm-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07003858 "test/spmm-microkernel-tester.h",
3859 "src/xnnpack/AlignedAllocator.h",
3860 ] + MICROKERNEL_TEST_HDRS,
3861 deps = MICROKERNEL_TEST_DEPS,
3862)
3863
3864xnnpack_unit_test(
Frank Barchardd793f6c2020-05-08 13:37:43 -07003865 name = "f16_vadd_minmax_test",
3866 srcs = [
3867 "test/f16-vadd-minmax.cc",
3868 "test/vbinary-microkernel-tester.h",
3869 ] + MICROKERNEL_TEST_HDRS,
3870 deps = MICROKERNEL_TEST_DEPS,
3871)
3872
3873xnnpack_unit_test(
3874 name = "f16_vaddc_minmax_test",
3875 srcs = [
3876 "test/f16-vaddc-minmax.cc",
3877 "test/vbinaryc-microkernel-tester.h",
3878 ] + MICROKERNEL_TEST_HDRS,
3879 deps = MICROKERNEL_TEST_DEPS,
3880)
3881
3882xnnpack_unit_test(
3883 name = "f16_vdiv_minmax_test",
3884 srcs = [
3885 "test/f16-vdiv-minmax.cc",
3886 "test/vbinary-microkernel-tester.h",
3887 ] + MICROKERNEL_TEST_HDRS,
3888 deps = MICROKERNEL_TEST_DEPS,
3889)
3890
3891xnnpack_unit_test(
3892 name = "f16_vdivc_minmax_test",
3893 srcs = [
3894 "test/f16-vdivc-minmax.cc",
3895 "test/vbinaryc-microkernel-tester.h",
3896 ] + MICROKERNEL_TEST_HDRS,
3897 deps = MICROKERNEL_TEST_DEPS,
3898)
3899
3900xnnpack_unit_test(
3901 name = "f16_vrdivc_minmax_test",
3902 srcs = [
3903 "test/f16-vrdivc-minmax.cc",
3904 "test/vbinaryc-microkernel-tester.h",
3905 ] + MICROKERNEL_TEST_HDRS,
3906 deps = MICROKERNEL_TEST_DEPS,
3907)
3908
3909xnnpack_unit_test(
3910 name = "f16_vmax_test",
3911 srcs = [
3912 "test/f16-vmax.cc",
3913 "test/vbinary-microkernel-tester.h",
3914 ] + MICROKERNEL_TEST_HDRS,
3915 deps = MICROKERNEL_TEST_DEPS,
3916)
3917
3918xnnpack_unit_test(
3919 name = "f16_vmaxc_test",
3920 srcs = [
3921 "test/f16-vmaxc.cc",
3922 "test/vbinaryc-microkernel-tester.h",
3923 ] + MICROKERNEL_TEST_HDRS,
3924 deps = MICROKERNEL_TEST_DEPS,
3925)
3926
3927xnnpack_unit_test(
3928 name = "f16_vmin_test",
3929 srcs = [
3930 "test/f16-vmin.cc",
3931 "test/vbinary-microkernel-tester.h",
3932 ] + MICROKERNEL_TEST_HDRS,
3933 deps = MICROKERNEL_TEST_DEPS,
3934)
3935
3936xnnpack_unit_test(
3937 name = "f16_vminc_test",
3938 srcs = [
3939 "test/f16-vminc.cc",
3940 "test/vbinaryc-microkernel-tester.h",
3941 ] + MICROKERNEL_TEST_HDRS,
3942 deps = MICROKERNEL_TEST_DEPS,
3943)
3944
3945xnnpack_unit_test(
3946 name = "f16_vmul_minmax_test",
3947 srcs = [
3948 "test/f16-vmul-minmax.cc",
3949 "test/vbinary-microkernel-tester.h",
3950 ] + MICROKERNEL_TEST_HDRS,
3951 deps = MICROKERNEL_TEST_DEPS,
3952)
3953
3954xnnpack_unit_test(
3955 name = "f16_vmulc_minmax_test",
3956 srcs = [
3957 "test/f16-vmulc-minmax.cc",
3958 "test/vbinaryc-microkernel-tester.h",
3959 ] + MICROKERNEL_TEST_HDRS,
3960 deps = MICROKERNEL_TEST_DEPS,
3961)
3962
3963xnnpack_unit_test(
3964 name = "f16_vsub_minmax_test",
3965 srcs = [
3966 "test/f16-vsub-minmax.cc",
3967 "test/vbinary-microkernel-tester.h",
3968 ] + MICROKERNEL_TEST_HDRS,
3969 deps = MICROKERNEL_TEST_DEPS,
3970)
3971
3972xnnpack_unit_test(
3973 name = "f16_vsubc_minmax_test",
3974 srcs = [
3975 "test/f16-vsubc-minmax.cc",
3976 "test/vbinaryc-microkernel-tester.h",
3977 ] + MICROKERNEL_TEST_HDRS,
3978 deps = MICROKERNEL_TEST_DEPS,
3979)
3980
3981xnnpack_unit_test(
3982 name = "f16_vrsubc_minmax_test",
3983 srcs = [
3984 "test/f16-vrsubc-minmax.cc",
3985 "test/vbinaryc-microkernel-tester.h",
3986 ] + MICROKERNEL_TEST_HDRS,
3987 deps = MICROKERNEL_TEST_DEPS,
3988)
3989
3990xnnpack_unit_test(
Marat Dukhan5020b962020-06-08 13:30:10 -07003991 name = "f32_vabs_test",
3992 srcs = [
3993 "test/f32-vabs.cc",
3994 "test/vunary-microkernel-tester.h",
3995 ] + MICROKERNEL_TEST_HDRS,
3996 deps = MICROKERNEL_TEST_DEPS,
3997)
3998
3999xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004000 name = "f32_vadd_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004001 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004002 "test/f32-vadd-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004003 "test/vbinary-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004004 ] + MICROKERNEL_TEST_HDRS,
4005 deps = MICROKERNEL_TEST_DEPS,
4006)
4007
4008xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004009 name = "f32_vaddc_minmax_test",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004010 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004011 "test/f32-vaddc-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004012 "test/vbinaryc-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004013 ] + MICROKERNEL_TEST_HDRS,
4014 deps = MICROKERNEL_TEST_DEPS,
4015)
4016
4017xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004018 name = "f32_vdiv_minmax_test",
Marat Dukhan77ca6302019-12-06 12:48:15 -08004019 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004020 "test/f32-vdiv-minmax.cc",
Marat Dukhan77ca6302019-12-06 12:48:15 -08004021 "test/vbinary-microkernel-tester.h",
4022 ] + MICROKERNEL_TEST_HDRS,
4023 deps = MICROKERNEL_TEST_DEPS,
4024)
4025
4026xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004027 name = "f32_vdivc_minmax_test",
Marat Dukhan77ca6302019-12-06 12:48:15 -08004028 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004029 "test/f32-vdivc-minmax.cc",
Marat Dukhan77ca6302019-12-06 12:48:15 -08004030 "test/vbinaryc-microkernel-tester.h",
4031 ] + MICROKERNEL_TEST_HDRS,
4032 deps = MICROKERNEL_TEST_DEPS,
4033)
4034
4035xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004036 name = "f32_vrdivc_minmax_test",
Marat Dukhan77ca6302019-12-06 12:48:15 -08004037 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004038 "test/f32-vrdivc-minmax.cc",
Marat Dukhan77ca6302019-12-06 12:48:15 -08004039 "test/vbinaryc-microkernel-tester.h",
4040 ] + MICROKERNEL_TEST_HDRS,
4041 deps = MICROKERNEL_TEST_DEPS,
4042)
4043
4044xnnpack_unit_test(
Marat Dukhan403b7d42019-12-05 12:49:11 -08004045 name = "f32_vmax_test",
4046 srcs = [
4047 "test/f32-vmax.cc",
4048 "test/vbinary-microkernel-tester.h",
4049 ] + MICROKERNEL_TEST_HDRS,
4050 deps = MICROKERNEL_TEST_DEPS,
4051)
4052
4053xnnpack_unit_test(
4054 name = "f32_vmaxc_test",
4055 srcs = [
4056 "test/f32-vmaxc.cc",
4057 "test/vbinaryc-microkernel-tester.h",
4058 ] + MICROKERNEL_TEST_HDRS,
4059 deps = MICROKERNEL_TEST_DEPS,
4060)
4061
4062xnnpack_unit_test(
4063 name = "f32_vmin_test",
4064 srcs = [
4065 "test/f32-vmin.cc",
4066 "test/vbinary-microkernel-tester.h",
4067 ] + MICROKERNEL_TEST_HDRS,
4068 deps = MICROKERNEL_TEST_DEPS,
4069)
4070
4071xnnpack_unit_test(
4072 name = "f32_vminc_test",
4073 srcs = [
4074 "test/f32-vminc.cc",
4075 "test/vbinaryc-microkernel-tester.h",
4076 ] + MICROKERNEL_TEST_HDRS,
4077 deps = MICROKERNEL_TEST_DEPS,
4078)
4079
4080xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004081 name = "f32_vmul_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004082 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004083 "test/f32-vmul-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004084 "test/vbinary-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004085 ] + MICROKERNEL_TEST_HDRS,
4086 deps = MICROKERNEL_TEST_DEPS,
4087)
4088
4089xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004090 name = "f32_vmulc_minmax_test",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004091 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004092 "test/f32-vmulc-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004093 "test/vbinaryc-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004094 ] + MICROKERNEL_TEST_HDRS,
4095 deps = MICROKERNEL_TEST_DEPS,
4096)
4097
4098xnnpack_unit_test(
Frank Barchard2a1049c2020-06-03 02:31:27 -07004099 name = "f16_vmulcaddc_minmax_test",
4100 srcs = [
4101 "test/f16-vmulcaddc-minmax.cc",
4102 "test/vmulcaddc-microkernel-tester.h",
4103 "src/xnnpack/AlignedAllocator.h",
4104 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
4105 deps = MICROKERNEL_TEST_DEPS,
4106)
4107
4108xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07004109 name = "f32_vmulcaddc_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004110 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07004111 "test/f32-vmulcaddc-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004112 "test/vmulcaddc-microkernel-tester.h",
4113 "src/xnnpack/AlignedAllocator.h",
4114 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
4115 deps = MICROKERNEL_TEST_DEPS,
4116)
4117
4118xnnpack_unit_test(
Marat Dukhan5020b962020-06-08 13:30:10 -07004119 name = "f32_vneg_test",
4120 srcs = [
4121 "test/f32-vneg.cc",
4122 "test/vunary-microkernel-tester.h",
4123 ] + MICROKERNEL_TEST_HDRS,
4124 deps = MICROKERNEL_TEST_DEPS,
4125)
4126
4127xnnpack_unit_test(
Marat Dukhaneecf8fd2020-06-09 08:59:37 -07004128 name = "f32_vrndne_test",
4129 srcs = [
4130 "test/f32-vrndne.cc",
4131 "test/vunary-microkernel-tester.h",
4132 ] + MICROKERNEL_TEST_HDRS,
4133 deps = MICROKERNEL_TEST_DEPS,
4134)
4135
4136xnnpack_unit_test(
4137 name = "f32_vrndz_test",
4138 srcs = [
4139 "test/f32-vrndz.cc",
4140 "test/vunary-microkernel-tester.h",
4141 ] + MICROKERNEL_TEST_HDRS,
4142 deps = MICROKERNEL_TEST_DEPS,
4143)
4144
4145xnnpack_unit_test(
4146 name = "f32_vrndu_test",
4147 srcs = [
4148 "test/f32-vrndu.cc",
4149 "test/vunary-microkernel-tester.h",
4150 ] + MICROKERNEL_TEST_HDRS,
4151 deps = MICROKERNEL_TEST_DEPS,
4152)
4153
4154xnnpack_unit_test(
4155 name = "f32_vrndd_test",
4156 srcs = [
4157 "test/f32-vrndd.cc",
4158 "test/vunary-microkernel-tester.h",
4159 ] + MICROKERNEL_TEST_HDRS,
4160 deps = MICROKERNEL_TEST_DEPS,
4161)
4162
4163xnnpack_unit_test(
Marat Dukhan05ac8e32019-10-21 15:39:33 -07004164 name = "f32_vscale_test",
4165 srcs = [
4166 "test/f32-vscale.cc",
4167 "test/vscale-microkernel-tester.h",
4168 ] + MICROKERNEL_TEST_HDRS,
4169 deps = MICROKERNEL_TEST_DEPS,
4170)
4171
4172xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07004173 name = "f32_vscaleexpminusmax_test",
4174 srcs = [
4175 "test/f32-vscaleexpminusmax.cc",
4176 "test/vscaleexpminusmax-microkernel-tester.h",
4177 ] + MICROKERNEL_TEST_HDRS,
4178 deps = MICROKERNEL_TEST_DEPS,
4179)
4180
4181xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07004182 name = "f32_vscaleextexp_test",
4183 srcs = [
4184 "test/f32-vscaleextexp.cc",
4185 "test/vscaleextexp-microkernel-tester.h",
4186 ] + MICROKERNEL_TEST_HDRS,
4187 deps = MICROKERNEL_TEST_DEPS,
4188)
4189
4190xnnpack_unit_test(
Marat Dukhan5020b962020-06-08 13:30:10 -07004191 name = "f32_vsqr_test",
4192 srcs = [
4193 "test/f32-vsqr.cc",
4194 "test/vunary-microkernel-tester.h",
4195 ] + MICROKERNEL_TEST_HDRS,
4196 deps = MICROKERNEL_TEST_DEPS,
4197)
4198
4199xnnpack_unit_test(
Marat Dukhan13bafb02020-06-05 00:43:11 -07004200 name = "f32_vsqrdiff_test",
4201 srcs = [
4202 "test/f32-vsqrdiff.cc",
4203 "test/vbinary-microkernel-tester.h",
4204 ] + MICROKERNEL_TEST_HDRS,
4205 deps = MICROKERNEL_TEST_DEPS,
4206)
4207
4208xnnpack_unit_test(
4209 name = "f32_vsqrdiffc_test",
4210 srcs = [
4211 "test/f32-vsqrdiffc.cc",
4212 "test/vbinaryc-microkernel-tester.h",
4213 ] + MICROKERNEL_TEST_HDRS,
4214 deps = MICROKERNEL_TEST_DEPS,
4215)
4216
4217xnnpack_unit_test(
4218 name = "f32_vrsqrdiffc_test",
4219 srcs = [
4220 "test/f32-vrsqrdiffc.cc",
4221 "test/vbinaryc-microkernel-tester.h",
4222 ] + MICROKERNEL_TEST_HDRS,
4223 deps = MICROKERNEL_TEST_DEPS,
4224)
4225
4226xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004227 name = "f32_vsub_minmax_test",
Marat Dukhan97579532019-10-18 16:40:39 -07004228 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004229 "test/f32-vsub-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004230 "test/vbinary-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004231 ] + MICROKERNEL_TEST_HDRS,
4232 deps = MICROKERNEL_TEST_DEPS,
4233)
4234
4235xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004236 name = "f32_vsubc_minmax_test",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004237 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004238 "test/f32-vsubc-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004239 "test/vbinaryc-microkernel-tester.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004240 ] + MICROKERNEL_TEST_HDRS,
4241 deps = MICROKERNEL_TEST_DEPS,
4242)
4243
4244xnnpack_unit_test(
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004245 name = "f32_vrsubc_minmax_test",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08004246 srcs = [
Marat Dukhan91cd2b72020-04-09 23:57:31 -07004247 "test/f32-vrsubc-minmax.cc",
Marat Dukhan1e782c42019-11-21 17:02:40 -08004248 "test/vbinaryc-microkernel-tester.h",
Marat Dukhan97579532019-10-18 16:40:39 -07004249 ] + MICROKERNEL_TEST_HDRS,
4250 deps = MICROKERNEL_TEST_DEPS,
4251)
4252
4253xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07004254 name = "q8_avgpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004255 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07004256 "test/q8-avgpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004257 "test/avgpool-microkernel-tester.h",
4258 "src/xnnpack/AlignedAllocator.h",
4259 ] + MICROKERNEL_TEST_HDRS,
4260 deps = MICROKERNEL_TEST_DEPS,
4261)
4262
4263xnnpack_unit_test(
Marat Dukhande06f492020-04-09 00:19:31 -07004264 name = "q8_igemm_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004265 srcs = [
Marat Dukhande06f492020-04-09 00:19:31 -07004266 "test/q8-igemm-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004267 "test/gemm-microkernel-tester.h",
4268 "src/xnnpack/AlignedAllocator.h",
4269 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
4270 deps = MICROKERNEL_TEST_DEPS,
4271)
4272
4273xnnpack_unit_test(
Marat Dukhande06f492020-04-09 00:19:31 -07004274 name = "q8_dwconv_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004275 srcs = [
Marat Dukhande06f492020-04-09 00:19:31 -07004276 "test/q8-dwconv-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004277 "test/dwconv-microkernel-tester.h",
4278 "src/xnnpack/AlignedAllocator.h",
4279 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
4280 deps = MICROKERNEL_TEST_DEPS,
4281)
4282
4283xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07004284 name = "q8_gavgpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004285 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07004286 "test/q8-gavgpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004287 "test/gavgpool-microkernel-tester.h",
4288 "src/xnnpack/AlignedAllocator.h",
4289 ] + MICROKERNEL_TEST_HDRS,
4290 deps = MICROKERNEL_TEST_DEPS,
4291)
4292
4293xnnpack_unit_test(
Marat Dukhande06f492020-04-09 00:19:31 -07004294 name = "q8_gemm_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004295 srcs = [
Marat Dukhande06f492020-04-09 00:19:31 -07004296 "test/q8-gemm-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004297 "test/gemm-microkernel-tester.h",
4298 "src/xnnpack/AlignedAllocator.h",
4299 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
4300 deps = MICROKERNEL_TEST_DEPS,
4301)
4302
4303xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07004304 name = "q8_vadd_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004305 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07004306 "test/q8-vadd-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004307 "test/vadd-microkernel-tester.h",
4308 ] + MICROKERNEL_TEST_HDRS,
4309 deps = MICROKERNEL_TEST_DEPS,
4310)
4311
4312xnnpack_unit_test(
4313 name = "u8_clamp_test",
4314 srcs = [
4315 "test/u8-clamp.cc",
4316 "test/clamp-microkernel-tester.h",
4317 ] + MICROKERNEL_TEST_HDRS,
4318 deps = MICROKERNEL_TEST_DEPS,
4319)
4320
4321xnnpack_unit_test(
4322 name = "u8_lut32norm_test",
4323 srcs = [
4324 "test/u8-lut32norm.cc",
4325 "test/lut-norm-microkernel-tester.h",
4326 ] + MICROKERNEL_TEST_HDRS,
4327 deps = MICROKERNEL_TEST_DEPS,
4328)
4329
4330xnnpack_unit_test(
Marat Dukhan99936602020-04-11 16:47:01 -07004331 name = "u8_maxpool_minmax_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004332 srcs = [
Marat Dukhan99936602020-04-11 16:47:01 -07004333 "test/u8-maxpool-minmax.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004334 "test/maxpool-microkernel-tester.h",
4335 ] + MICROKERNEL_TEST_HDRS,
4336 deps = MICROKERNEL_TEST_DEPS,
4337)
4338
4339xnnpack_unit_test(
4340 name = "u8_rmax_test",
4341 srcs = [
4342 "test/u8-rmax.cc",
4343 "test/rmax-microkernel-tester.h",
4344 ] + MICROKERNEL_TEST_HDRS,
4345 deps = MICROKERNEL_TEST_DEPS,
4346)
4347
4348xnnpack_unit_test(
Marat Dukhan3bb3bfc2020-05-19 17:42:46 -07004349 name = "x32_fill_test",
4350 srcs = [
4351 "test/x32-fill.cc",
4352 "test/fill-microkernel-tester.h",
4353 ] + MICROKERNEL_TEST_HDRS,
4354 deps = MICROKERNEL_TEST_DEPS,
4355)
4356
4357xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07004358 name = "x32_packx_test",
4359 srcs = [
4360 "test/x32-packx.cc",
4361 "test/pack-microkernel-tester.h",
4362 "src/xnnpack/AlignedAllocator.h",
4363 ] + MICROKERNEL_TEST_HDRS,
4364 deps = MICROKERNEL_TEST_DEPS,
4365)
4366
4367xnnpack_unit_test(
4368 name = "x32_pad_test",
4369 srcs = [
4370 "test/x32-pad.cc",
4371 "test/pad-microkernel-tester.h",
4372 ] + MICROKERNEL_TEST_HDRS,
4373 deps = MICROKERNEL_TEST_DEPS,
4374)
4375
4376xnnpack_unit_test(
4377 name = "x32_unpool_test",
4378 srcs = [
4379 "test/x32-unpool.cc",
4380 "test/unpool-microkernel-tester.h",
4381 ] + MICROKERNEL_TEST_HDRS,
4382 deps = MICROKERNEL_TEST_DEPS,
4383)
4384
4385xnnpack_unit_test(
4386 name = "x32_zip_test",
4387 srcs = [
4388 "test/x32-zip.cc",
4389 "test/zip-microkernel-tester.h",
4390 ] + MICROKERNEL_TEST_HDRS,
4391 deps = MICROKERNEL_TEST_DEPS,
4392)
4393
4394xnnpack_unit_test(
4395 name = "x8_lut_test",
4396 srcs = [
4397 "test/x8-lut.cc",
4398 "test/lut-microkernel-tester.h",
4399 ] + MICROKERNEL_TEST_HDRS,
4400 deps = MICROKERNEL_TEST_DEPS,
4401)
4402
4403xnnpack_unit_test(
4404 name = "x8_zip_test",
4405 srcs = [
4406 "test/x8-zip.cc",
4407 "test/zip-microkernel-tester.h",
4408 ] + MICROKERNEL_TEST_HDRS,
4409 deps = MICROKERNEL_TEST_DEPS,
4410)
4411
Marat Dukhanfe7acb62020-03-09 19:30:05 -07004412xnnpack_unit_test(
4413 name = "requantization_test",
4414 srcs = [
4415 "src/xnnpack/requantization-stubs.h",
4416 "test/requantization.cc",
4417 "test/requantization-tester.h",
4418 ] + MICROKERNEL_TEST_HDRS,
4419 deps = MICROKERNEL_TEST_DEPS,
4420)
4421
Marat Dukhan20c3b922020-03-10 03:45:06 -07004422########################## Size tests for the library #########################
Marat Dukhan08c4a432019-10-03 09:29:21 -07004423
4424xnnpack_binary(
Marat Dukhan20c3b922020-03-10 03:45:06 -07004425 name = "operator_size_test",
4426 srcs = ["test/operator-size.c"],
Marat Dukhan8fe54e42019-10-10 14:12:59 -07004427 deps = [":xnnpack_operators_nhwc_f32"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07004428)
4429
Marat Dukhan20c3b922020-03-10 03:45:06 -07004430xnnpack_binary(
4431 name = "subgraph_size_test",
4432 srcs = ["test/subgraph-size.c"],
4433 deps = [":XNNPACK"],
4434)
4435
4436########################### Unit tests for operators ##########################
Marat Dukhan08c4a432019-10-03 09:29:21 -07004437
4438xnnpack_unit_test(
Marat Dukhan5020b962020-06-08 13:30:10 -07004439 name = "abs_nc_test",
4440 srcs = [
4441 "test/abs-nc.cc",
4442 "test/abs-operator-tester.h",
4443 ],
4444 deps = OPERATOR_TEST_DEPS,
4445)
4446
4447xnnpack_unit_test(
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08004448 name = "add_nd_test",
4449 srcs = [
4450 "test/add-nd.cc",
4451 "test/binary-elementwise-operator-tester.h",
4452 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004453 deps = OPERATOR_TEST_DEPS,
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08004454)
4455
4456xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004457 name = "argmax_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004458 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004459 "test/argmax-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004460 "test/argmax-pooling-operator-tester.h",
4461 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004462 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004463)
4464
4465xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004466 name = "average_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004467 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004468 "test/average-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004469 "test/average-pooling-operator-tester.h",
4470 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004471 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004472)
4473
4474xnnpack_unit_test(
Marat Dukhan64e52512020-06-09 13:41:16 -07004475 name = "bankers_rounding_nc_test",
4476 srcs = [
4477 "test/bankers-rounding-nc.cc",
4478 "test/bankers-rounding-operator-tester.h",
4479 ],
4480 deps = OPERATOR_TEST_DEPS,
4481)
4482
4483xnnpack_unit_test(
4484 name = "ceiling_nc_test",
4485 srcs = [
4486 "test/ceiling-nc.cc",
4487 "test/ceiling-operator-tester.h",
4488 ],
4489 deps = OPERATOR_TEST_DEPS,
4490)
4491
4492xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004493 name = "channel_shuffle_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004494 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004495 "test/channel-shuffle-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004496 "test/channel-shuffle-operator-tester.h",
4497 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004498 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004499)
4500
4501xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004502 name = "clamp_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004503 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004504 "test/clamp-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004505 "test/clamp-operator-tester.h",
4506 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004507 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004508)
4509
4510xnnpack_unit_test(
Marat Dukhan065b11e2020-05-22 09:49:41 -07004511 name = "constant_pad_nd_test",
4512 srcs = [
4513 "test/constant-pad-nd.cc",
4514 "test/constant-pad-operator-tester.h",
4515 ],
4516 deps = OPERATOR_TEST_DEPS,
4517)
4518
4519xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004520 name = "convolution_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004521 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004522 "test/convolution-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004523 "test/convolution-operator-tester.h",
4524 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004525 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004526)
4527
4528xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004529 name = "convolution_nchw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004530 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004531 "test/convolution-nchw.cc",
4532 "test/convolution-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004533 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004534 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004535)
4536
4537xnnpack_unit_test(
Marat Dukhan4e21b272020-06-04 18:45:01 -07004538 name = "copy_nc_test",
4539 srcs = [
4540 "test/copy-nc.cc",
4541 "test/copy-operator-tester.h",
4542 ],
4543 deps = OPERATOR_TEST_DEPS,
4544)
4545
4546xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004547 name = "deconvolution_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004548 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004549 "test/deconvolution-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004550 "test/deconvolution-operator-tester.h",
4551 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004552 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004553)
4554
4555xnnpack_unit_test(
Marat Dukhan69180502019-12-06 15:00:31 -08004556 name = "divide_nd_test",
4557 srcs = [
4558 "test/binary-elementwise-operator-tester.h",
4559 "test/divide-nd.cc",
4560 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004561 deps = OPERATOR_TEST_DEPS,
Marat Dukhan69180502019-12-06 15:00:31 -08004562)
4563
4564xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004565 name = "fully_connected_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004566 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004567 "test/fully-connected-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004568 "test/fully-connected-operator-tester.h",
4569 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004570 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004571)
4572
4573xnnpack_unit_test(
Marat Dukhan64e52512020-06-09 13:41:16 -07004574 name = "floor_nc_test",
4575 srcs = [
4576 "test/floor-nc.cc",
4577 "test/floor-operator-tester.h",
4578 ],
4579 deps = OPERATOR_TEST_DEPS,
4580)
4581
4582xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004583 name = "global_average_pooling_nwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004584 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004585 "test/global-average-pooling-nwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004586 "test/global-average-pooling-operator-tester.h",
4587 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004588 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004589)
4590
4591xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004592 name = "global_average_pooling_ncw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004593 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004594 "test/global-average-pooling-ncw.cc",
4595 "test/global-average-pooling-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004596 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004597 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004598)
4599
4600xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004601 name = "hardswish_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004602 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004603 "test/hardswish-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004604 "test/hardswish-operator-tester.h",
4605 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004606 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004607)
4608
4609xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004610 name = "leaky_relu_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004611 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004612 "test/leaky-relu-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004613 "test/leaky-relu-operator-tester.h",
4614 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004615 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004616)
4617
4618xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004619 name = "max_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004620 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004621 "test/max-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004622 "test/max-pooling-operator-tester.h",
4623 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004624 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004625)
4626
4627xnnpack_unit_test(
Marat Dukhan79e7f842019-12-05 14:35:50 -08004628 name = "maximum_nd_test",
4629 srcs = [
4630 "test/binary-elementwise-operator-tester.h",
4631 "test/maximum-nd.cc",
4632 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004633 deps = OPERATOR_TEST_DEPS,
Marat Dukhan79e7f842019-12-05 14:35:50 -08004634)
4635
4636xnnpack_unit_test(
4637 name = "minimum_nd_test",
4638 srcs = [
4639 "test/binary-elementwise-operator-tester.h",
4640 "test/minimum-nd.cc",
4641 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004642 deps = OPERATOR_TEST_DEPS,
Marat Dukhan79e7f842019-12-05 14:35:50 -08004643)
4644
4645xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004646 name = "multiply_nd_test",
Marat Dukhanca2733c2019-11-15 23:21:17 -08004647 srcs = [
Marat Dukhanb1a0fc32019-12-02 19:32:02 -08004648 "test/binary-elementwise-operator-tester.h",
Marat Dukhanefc47b82019-11-18 09:25:38 -08004649 "test/multiply-nd.cc",
Marat Dukhanca2733c2019-11-15 23:21:17 -08004650 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004651 deps = OPERATOR_TEST_DEPS,
Marat Dukhanca2733c2019-11-15 23:21:17 -08004652)
4653
4654xnnpack_unit_test(
Marat Dukhan5020b962020-06-08 13:30:10 -07004655 name = "negate_nc_test",
4656 srcs = [
4657 "test/negate-nc.cc",
4658 "test/negate-operator-tester.h",
4659 ],
4660 deps = OPERATOR_TEST_DEPS,
4661)
4662
4663xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004664 name = "prelu_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004665 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004666 "test/prelu-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004667 "test/prelu-operator-tester.h",
4668 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004669 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004670)
4671
4672xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004673 name = "resize_bilinear_nhwc_test",
Marat Dukhan69722492019-11-11 19:55:50 -08004674 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004675 "test/resize-bilinear-nhwc.cc",
Marat Dukhan69722492019-11-11 19:55:50 -08004676 "test/resize-bilinear-operator-tester.h",
4677 ] + OPERATOR_TEST_PARAMS_HDRS,
Marat Dukhan1b354632020-03-23 12:50:22 -07004678 deps = OPERATOR_TEST_DEPS,
Marat Dukhan69722492019-11-11 19:55:50 -08004679)
4680
4681xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004682 name = "sigmoid_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004683 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004684 "test/sigmoid-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004685 "test/sigmoid-operator-tester.h",
4686 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004687 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004688)
4689
4690xnnpack_unit_test(
Marat Dukhanfd8e6892020-01-27 15:25:25 -08004691 name = "softmax_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004692 srcs = [
Marat Dukhanfd8e6892020-01-27 15:25:25 -08004693 "test/softmax-nc.cc",
4694 "test/softmax-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004695 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004696 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004697)
4698
4699xnnpack_unit_test(
Marat Dukhan5020b962020-06-08 13:30:10 -07004700 name = "square_nc_test",
4701 srcs = [
4702 "test/square-nc.cc",
4703 "test/square-operator-tester.h",
4704 ],
4705 deps = OPERATOR_TEST_DEPS,
4706)
4707
4708xnnpack_unit_test(
Marat Dukhanf7399262020-06-05 10:58:44 -07004709 name = "squared_difference_nd_test",
4710 srcs = [
4711 "test/binary-elementwise-operator-tester.h",
4712 "test/squared-difference-nd.cc",
4713 ],
4714 deps = OPERATOR_TEST_DEPS,
4715)
4716
4717xnnpack_unit_test(
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08004718 name = "subtract_nd_test",
4719 srcs = [
4720 "test/binary-elementwise-operator-tester.h",
4721 "test/subtract-nd.cc",
4722 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004723 deps = OPERATOR_TEST_DEPS,
Marat Dukhan05f3f6d2019-12-03 15:13:53 -08004724)
4725
4726xnnpack_unit_test(
Marat Dukhan64e52512020-06-09 13:41:16 -07004727 name = "truncation_nc_test",
4728 srcs = [
4729 "test/truncation-nc.cc",
4730 "test/truncation-operator-tester.h",
4731 ],
4732 deps = OPERATOR_TEST_DEPS,
4733)
4734
4735xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08004736 name = "unpooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004737 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08004738 "test/unpooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004739 "test/unpooling-operator-tester.h",
4740 ],
Marat Dukhan1b354632020-03-23 12:50:22 -07004741 deps = OPERATOR_TEST_DEPS,
Marat Dukhan08c4a432019-10-03 09:29:21 -07004742)
4743
Chao Mei6ddfc602020-05-13 22:29:36 -07004744############################### Misc unit tests ###############################
4745
4746xnnpack_unit_test(
4747 name = "memory_planner_test",
4748 srcs = [
4749 "test/memory-planner-test.cc",
4750 ],
4751 deps = [
4752 ":XNNPACK",
4753 ":memory_planner",
4754 ],
4755)
4756
Marat Dukhan08c4a432019-10-03 09:29:21 -07004757############################# Build configurations #############################
4758
Marat Dukhanb8642352019-10-30 15:43:02 -07004759# Enables usage of assembly kernels.
Marat Dukhan08c4a432019-10-03 09:29:21 -07004760config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07004761 name = "xnn_enable_assembly_explicit_true",
4762 define_values = {"xnn_enable_assembly": "true"},
4763)
4764
4765# Disables usage of assembly kernels.
4766config_setting(
4767 name = "xnn_enable_assembly_explicit_false",
4768 define_values = {"xnn_enable_assembly": "false"},
4769)
4770
Marat Dukhan05702cf2020-03-26 15:41:33 -07004771# Disables usage of HMP-aware optimizations.
4772config_setting(
4773 name = "xnn_enable_hmp_explicit_false",
4774 define_values = {"xnn_enable_hmp": "false"},
4775)
4776
Chao Mei6ddfc602020-05-13 22:29:36 -07004777# Enable usage of optimized memory allocation
4778config_setting(
4779 name = "xnn_enable_memopt_explicit_true",
4780 define_values = {"xnnpack_enable_memopt": "true"},
4781)
4782
4783# Disable usage of optimized memory allocation
4784config_setting(
4785 name = "xnn_enable_memopt_explicit_false",
4786 define_values = {"xnnpack_enable_memopt": "false"},
4787)
4788
Marat Dukhanb8642352019-10-30 15:43:02 -07004789# Builds with -c dbg
4790config_setting(
4791 name = "debug_build",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004792 values = {
Marat Dukhanb8642352019-10-30 15:43:02 -07004793 "compilation_mode": "dbg",
4794 },
4795)
4796
4797# Builds with -c opt
4798config_setting(
4799 name = "optimized_build",
4800 values = {
4801 "compilation_mode": "opt",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004802 },
4803)
4804
4805config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07004806 name = "linux_k8",
4807 values = {"cpu": "k8"},
4808)
4809
4810config_setting(
Marat Dukhan582094e2020-04-30 17:21:25 -07004811 name = "linux_arm",
4812 values = {"cpu": "arm"},
Marat Dukhan4e45e662019-10-03 15:40:24 -07004813)
4814
4815config_setting(
Terry Heo68eef3f2020-04-13 22:53:52 -07004816 name = "linux_armhf",
4817 values = {"cpu": "armhf"},
4818)
4819
4820config_setting(
Marat Dukhan582094e2020-04-30 17:21:25 -07004821 name = "linux_aarch64",
4822 values = {"cpu": "aarch64"},
4823)
4824
4825config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07004826 name = "android",
4827 values = {"crosstool_top": "//external:android/crosstool"},
4828)
4829
4830config_setting(
4831 name = "android_armv7",
4832 values = {
4833 "crosstool_top": "//external:android/crosstool",
4834 "cpu": "armeabi-v7a",
4835 },
4836)
4837
4838config_setting(
4839 name = "android_arm64",
4840 values = {
4841 "crosstool_top": "//external:android/crosstool",
4842 "cpu": "arm64-v8a",
4843 },
4844)
4845
4846config_setting(
4847 name = "android_x86",
4848 values = {
4849 "crosstool_top": "//external:android/crosstool",
4850 "cpu": "x86",
4851 },
4852)
4853
4854config_setting(
4855 name = "android_x86_64",
4856 values = {
4857 "crosstool_top": "//external:android/crosstool",
4858 "cpu": "x86_64",
4859 },
4860)
4861
4862config_setting(
Marat Dukhan10a38082020-04-17 03:58:35 -07004863 name = "windows_x86_64",
4864 values = {"cpu": "x64_windows"},
Marat Dukhan9fe932e2020-04-11 17:14:15 -07004865)
4866
4867config_setting(
Marat Dukhan10a38082020-04-17 03:58:35 -07004868 name = "windows_x86_64_clang",
4869 values = {
4870 "compiler": "clang-cl",
4871 "cpu": "x64_windows",
4872 },
4873)
4874
4875config_setting(
4876 name = "windows_x86_64_mingw",
4877 values = {
4878 "compiler": "mingw-gcc",
4879 "cpu": "x64_windows",
4880 },
4881)
4882
4883config_setting(
4884 name = "windows_x86_64_msys",
4885 values = {
4886 "compiler": "msys-gcc",
4887 "cpu": "x64_windows",
4888 },
Marat Dukhan9fe932e2020-04-11 17:14:15 -07004889)
4890
4891config_setting(
Marat Dukhan885ca242019-10-07 09:17:32 -07004892 name = "macos_x86_64",
4893 values = {
4894 "apple_platform_type": "macos",
4895 "cpu": "darwin",
4896 },
4897)
4898
4899config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07004900 name = "emscripten",
Marat Dukhan1a729ec2019-10-07 09:31:44 -07004901 values = {"crosstool_top": "//toolchain:emscripten"},
Marat Dukhan08c4a432019-10-03 09:29:21 -07004902)
4903
4904config_setting(
4905 name = "emscripten_wasm",
4906 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07004907 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004908 "cpu": "wasm",
4909 },
4910)
4911
4912config_setting(
4913 name = "emscripten_wasmsimd",
4914 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07004915 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004916 "cpu": "wasm",
Marat Dukhan81c62602020-05-29 13:22:49 -07004917 "copt": "-msimd128",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004918 },
4919)
4920
4921config_setting(
4922 name = "emscripten_asmjs",
4923 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07004924 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07004925 "cpu": "asmjs",
4926 },
4927)
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004928
4929config_setting(
4930 name = "ios_armv7",
4931 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004932 "apple_platform_type": "ios",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004933 "cpu": "ios_armv7",
4934 },
4935)
4936
4937config_setting(
4938 name = "ios_arm64",
4939 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004940 "apple_platform_type": "ios",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004941 "cpu": "ios_arm64",
4942 },
4943)
4944
4945config_setting(
4946 name = "ios_arm64e",
4947 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004948 "apple_platform_type": "ios",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004949 "cpu": "ios_arm64e",
4950 },
4951)
4952
4953config_setting(
4954 name = "ios_x86",
4955 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004956 "apple_platform_type": "ios",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004957 "cpu": "ios_i386",
4958 },
4959)
4960
4961config_setting(
4962 name = "ios_x86_64",
4963 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004964 "apple_platform_type": "ios",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004965 "cpu": "ios_x86_64",
4966 },
4967)
4968
4969config_setting(
4970 name = "watchos_armv7k",
4971 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004972 "apple_platform_type": "watchos",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004973 "cpu": "watchos_armv7k",
4974 },
4975)
4976
4977config_setting(
4978 name = "watchos_arm64_32",
4979 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004980 "apple_platform_type": "watchos",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004981 "cpu": "watchos_arm64_32",
4982 },
4983)
4984
4985config_setting(
4986 name = "watchos_x86",
4987 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004988 "apple_platform_type": "watchos",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004989 "cpu": "watchos_i386",
4990 },
4991)
4992
4993config_setting(
4994 name = "watchos_x86_64",
4995 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08004996 "apple_platform_type": "watchos",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08004997 "cpu": "watchos_x86_64",
4998 },
4999)
5000
5001config_setting(
5002 name = "tvos_arm64",
5003 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08005004 "apple_platform_type": "tvos",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08005005 "cpu": "tvos_arm64",
5006 },
5007)
5008
5009config_setting(
5010 name = "tvos_x86_64",
5011 values = {
Marat Dukhanf85fc332020-02-13 00:05:20 -08005012 "apple_platform_type": "tvos",
Marat Dukhan1498d1d2020-02-11 20:00:05 -08005013 "cpu": "tvos_x86_64",
5014 },
5015)