blob: 84f565e50cb44a24720f46018b5386b63128d1b1 [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08009load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
10
Marat Dukhan08c4a432019-10-03 09:29:21 -070011licenses(["notice"])
12
13exports_files(["LICENSE"])
14
Marat Dukhan08c4a432019-10-03 09:29:21 -070015OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
22MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
Frank Barchard7e955972019-10-11 10:34:25 -070025 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070026 "@cpuinfo",
27 "@FP16",
28 "@pthreadpool",
29]
30
Marat Dukhan6adff4e2019-10-14 18:32:07 -070031ACCURACY_EVAL_DEPS = [
32 ":XNNPACK",
33 ":ukernels",
34 "@FP16",
35 "@pthreadpool",
36]
37
Marat Dukhan08c4a432019-10-03 09:29:21 -070038MICROKERNEL_TEST_DEPS = [
39 ":ukernels",
Frank Barchard7e955972019-10-11 10:34:25 -070040 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070041 "@cpuinfo",
42 "@FP16",
43 "@pthreadpool",
44]
45
46OPERATOR_TEST_DEPS = [
47 ":XNNPACK",
48 "@pthreadpool",
49 "@FP16",
50]
51
52OPERATOR_SRCS = [
Marat Dukhanefc47b82019-11-18 09:25:38 -080053 "src/add-nc.c",
54 "src/argmax-pooling-nhwc.c",
55 "src/average-pooling-nhwc.c",
56 "src/channel-pad-nc.c",
57 "src/channel-shuffle-nc.c",
58 "src/clamp-nc.c",
59 "src/convolution-nchw.c",
60 "src/convolution-nhwc.c",
61 "src/deconvolution-nhwc.c",
62 "src/fully-connected-nc.c",
63 "src/global-average-pooling-ncw.c",
64 "src/global-average-pooling-nwc.c",
65 "src/hardswish-nc.c",
66 "src/leaky-relu-nc.c",
67 "src/max-pooling-nhwc.c",
68 "src/multiply-nd.c",
69 "src/prelu-nc.c",
70 "src/resize-bilinear-nhwc.c",
71 "src/sigmoid-nc.c",
72 "src/softargmax-nc.c",
73 "src/unpooling-nhwc.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070074]
75
76SCALAR_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -080077 "src/f32-argmaxpool/9p8x-scalar-c1.c",
78 "src/f32-argmaxpool/4x-scalar-c1.c",
79 "src/f32-argmaxpool/9x-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070080 "src/f32-avgpool/mp9p8q-scalar.c",
81 "src/f32-avgpool/up9-scalar.c",
Marat Dukhan35dacfb2019-11-07 19:18:16 -080082 "src/f32-bilinear/scalar-c1.c",
83 "src/f32-bilinear/scalar-c2.c",
84 "src/f32-bilinear/scalar-c4.c",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080085 "src/f32-binop/vadd-scalar-x1.c",
86 "src/f32-binop/vadd-scalar-x2.c",
87 "src/f32-binop/vadd-scalar-x4.c",
88 "src/f32-binop/vaddc-scalar-x1.c",
89 "src/f32-binop/vaddc-scalar-x2.c",
90 "src/f32-binop/vaddc-scalar-x4.c",
91 "src/f32-binop/vmul-scalar-x1.c",
92 "src/f32-binop/vmul-scalar-x2.c",
93 "src/f32-binop/vmul-scalar-x4.c",
94 "src/f32-binop/vmulc-scalar-x1.c",
95 "src/f32-binop/vmulc-scalar-x2.c",
96 "src/f32-binop/vmulc-scalar-x4.c",
97 "src/f32-binop/vsub-scalar-x1.c",
98 "src/f32-binop/vsub-scalar-x2.c",
99 "src/f32-binop/vsub-scalar-x4.c",
100 "src/f32-binop/vsubc-scalar-x1.c",
101 "src/f32-binop/vsubc-scalar-x2.c",
102 "src/f32-binop/vsubc-scalar-x4.c",
103 "src/f32-binop/vrsubc-scalar-x1.c",
104 "src/f32-binop/vrsubc-scalar-x2.c",
105 "src/f32-binop/vrsubc-scalar-x4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700106 "src/f32-clamp/scalar.c",
Erich Elsen563df5f2019-10-23 08:02:21 -0700107 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700108 "src/f32-igemm/1x4-scalar.c",
109 "src/f32-igemm/2x4-scalar.c",
110 "src/f32-igemm/4x2-scalar.c",
111 "src/f32-igemm/4x4-scalar.c",
112 "src/f32-dwconv/up1x25-scalar.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800113 "src/f32-dwconv/up1x25-scalar-acc2.c",
114 "src/f32-dwconv/up2x25-scalar.c",
115 "src/f32-dwconv/up2x25-scalar-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700116 "src/f32-dwconv/up1x4-scalar.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800117 "src/f32-dwconv/up1x4-scalar-acc2.c",
118 "src/f32-dwconv/up2x4-scalar.c",
119 "src/f32-dwconv/up2x4-scalar-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700120 "src/f32-dwconv/up1x9-scalar.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800121 "src/f32-dwconv/up1x9-scalar-acc2.c",
122 "src/f32-dwconv/up2x9-scalar.c",
123 "src/f32-dwconv/up2x9-scalar-acc2.c",
Erich Elsen0cc2c532019-10-15 04:44:18 -0700124 "src/f32-dwconv-spchw/3x3p1-scalar.c",
Erich Elsen38709a62019-11-08 11:58:45 -0800125 "src/f32-dwconv-spchw/5x5p2-scalar.c",
Erich Elsenac4de802019-10-16 04:35:30 -0700126 "src/f32-dwconv-spchw/3x3s2p1-scalar.c",
Erich Elsen38709a62019-11-08 11:58:45 -0800127 "src/f32-dwconv-spchw/5x5s2p2-scalar.c",
Erich Elsen34dc2c02019-10-16 05:11:41 -0700128 "src/f32-gavgpool-spchw/scalar-x1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700129 "src/f32-gavgpool/mp7p7q-scalar.c",
130 "src/f32-gavgpool/up7-scalar.c",
131 "src/f32-gemm/1x4-scalar.c",
132 "src/f32-gemm/2x4-scalar.c",
133 "src/f32-gemm/4x2-scalar.c",
134 "src/f32-gemm/4x4-scalar.c",
135 "src/f32-gemminc/1x4-scalar.c",
136 "src/f32-gemminc/2x4-scalar.c",
137 "src/f32-gemminc/4x4-scalar.c",
138 "src/f32-hswish/scalar.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800139 "src/f32-maxpool/9p8x-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700140 "src/f32-pavgpool/mp9p8q-scalar.c",
141 "src/f32-pavgpool/up9-scalar.c",
142 "src/f32-ppmm/2x4-scalar.c",
143 "src/f32-ppmm/3x3-scalar.c",
144 "src/f32-ppmm/4x2-scalar.c",
145 "src/f32-ppmm/4x4-scalar.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800146 "src/f32-prelu/scalar-2x1.c",
147 "src/f32-prelu/scalar-2x4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700148 "src/f32-rmax/scalar.c",
149 "src/f32-spmm/1x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700150 "src/f32-spmm/1x1-scalar.c",
151 "src/f32-spmm/2x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700152 "src/f32-spmm/2x1-scalar.c",
153 "src/f32-spmm/4x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700154 "src/f32-spmm/4x1-scalar.c",
155 "src/f32-spmm/8x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700156 "src/f32-spmm/8x1-scalar.c",
Erich Elsenc6afd9b2019-10-24 16:10:53 -0700157 "src/f32-spmm/8x2-scalar.c",
158 "src/f32-spmm/8x4-scalar.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800159 "src/f32-vmulcaddc/c1-scalar-2x.c",
160 "src/f32-vmulcaddc/c2-scalar-2x.c",
161 "src/f32-vmulcaddc/c4-scalar-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700162 "src/q8-avgpool/mp9p8q-scalar.c",
163 "src/q8-avgpool/up9-scalar.c",
164 "src/q8-igemm/2x2-scalar.c",
165 "src/q8-dwconv/up1x9-scalar.c",
166 "src/q8-gavgpool/mp7p7q-scalar.c",
167 "src/q8-gavgpool/up7-scalar.c",
168 "src/q8-gemm/2x2-scalar.c",
169 "src/q8-vadd/scalar.c",
170 "src/u8-clamp/scalar.c",
171 "src/u8-lut32norm/scalar.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800172 "src/u8-maxpool/9p8x-scalar-c1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700173 "src/u8-rmax/scalar.c",
174 "src/x32-packx/x2-scalar.c",
175 "src/x32-packx/x3-scalar.c",
176 "src/x32-packx/x4-scalar.c",
177 "src/x32-pad/x2-scalar.c",
178 "src/x32-unpool/scalar.c",
179 "src/x32-zip/x2-scalar.c",
180 "src/x32-zip/x3-scalar.c",
181 "src/x32-zip/x4-scalar.c",
182 "src/x32-zip/xm-scalar.c",
183 "src/x8-lut/scalar.c",
184 "src/x8-zip/x2-scalar.c",
185 "src/x8-zip/x3-scalar.c",
186 "src/x8-zip/x4-scalar.c",
187 "src/x8-zip/xm-scalar.c",
188]
189
190PSIMD_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -0800191 "src/f32-argmaxpool/9p8x-psimd-c4.c",
192 "src/f32-argmaxpool/4x-psimd-c4.c",
193 "src/f32-argmaxpool/9x-psimd-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700194 "src/f32-avgpool/mp9p8q-psimd.c",
195 "src/f32-avgpool/up9-psimd.c",
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800196 "src/f32-bilinear/psimd-c4.c",
197 "src/f32-bilinear/psimd-c8.c",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800198 "src/f32-binop/vadd-psimd-x4.c",
199 "src/f32-binop/vadd-psimd-x8.c",
200 "src/f32-binop/vaddc-psimd-x4.c",
201 "src/f32-binop/vaddc-psimd-x8.c",
202 "src/f32-binop/vmul-psimd-x4.c",
203 "src/f32-binop/vmul-psimd-x8.c",
204 "src/f32-binop/vmulc-psimd-x4.c",
205 "src/f32-binop/vmulc-psimd-x8.c",
206 "src/f32-binop/vrsubc-psimd-x4.c",
207 "src/f32-binop/vrsubc-psimd-x8.c",
208 "src/f32-binop/vsub-psimd-x4.c",
209 "src/f32-binop/vsub-psimd-x8.c",
210 "src/f32-binop/vsubc-psimd-x4.c",
211 "src/f32-binop/vsubc-psimd-x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700212 "src/f32-clamp/psimd.c",
213 "src/f32-igemm/1x8-psimd-loadsplat.c",
214 "src/f32-igemm/1x8-psimd-splat.c",
215 "src/f32-igemm/1x8s4-psimd.c",
216 "src/f32-igemm/4x2c4-psimd.c",
217 "src/f32-igemm/4x8-psimd-loadsplat.c",
218 "src/f32-igemm/4x8-psimd-splat.c",
219 "src/f32-igemm/4x8s4-psimd.c",
220 "src/f32-igemm/6x8-psimd-loadsplat.c",
221 "src/f32-igemm/6x8-psimd-splat.c",
222 "src/f32-igemm/6x8s4-psimd.c",
223 "src/f32-dwconv/up4x25-psimd.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800224 "src/f32-dwconv/up4x25-psimd-acc2.c",
225 "src/f32-dwconv/up8x25-psimd.c",
226 "src/f32-dwconv/up8x25-psimd-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700227 "src/f32-dwconv/up4x4-psimd.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800228 "src/f32-dwconv/up4x4-psimd-acc2.c",
229 "src/f32-dwconv/up8x4-psimd.c",
230 "src/f32-dwconv/up8x4-psimd-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700231 "src/f32-dwconv/up4x9-psimd.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800232 "src/f32-dwconv/up4x9-psimd-acc2.c",
233 "src/f32-dwconv/up8x9-psimd.c",
234 "src/f32-dwconv/up8x9-psimd-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700235 "src/f32-gavgpool/mp7p7q-psimd.c",
236 "src/f32-gavgpool/up7-psimd.c",
237 "src/f32-gemm/1x8-psimd-loadsplat.c",
238 "src/f32-gemm/1x8-psimd-splat.c",
239 "src/f32-gemm/1x8s4-psimd.c",
240 "src/f32-gemm/4x8-psimd-loadsplat.c",
241 "src/f32-gemm/4x8-psimd-splat.c",
242 "src/f32-gemm/4x8s4-psimd.c",
243 "src/f32-gemm/6x8-psimd-loadsplat.c",
244 "src/f32-gemm/6x8-psimd-splat.c",
245 "src/f32-gemm/6x8s4-psimd.c",
246 "src/f32-gemminc/1x8-psimd-loadsplat.c",
247 "src/f32-gemminc/1x8-psimd-splat.c",
248 "src/f32-gemminc/1x8s4-psimd.c",
249 "src/f32-gemminc/4x8-psimd-loadsplat.c",
250 "src/f32-gemminc/4x8-psimd-splat.c",
251 "src/f32-gemminc/4x8s4-psimd.c",
252 "src/f32-gemminc/6x8-psimd-loadsplat.c",
253 "src/f32-gemminc/6x8-psimd-splat.c",
254 "src/f32-gemminc/6x8s4-psimd.c",
255 "src/f32-hswish/psimd.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800256 "src/f32-maxpool/9p8x-psimd-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700257 "src/f32-pavgpool/mp9p8q-psimd.c",
258 "src/f32-pavgpool/up9-psimd.c",
259 "src/f32-ppmm/4x8-psimd.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800260 "src/f32-prelu/psimd-2x4.c",
261 "src/f32-prelu/psimd-2x8.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800262 "src/f32-vmulcaddc/c4-psimd-2x.c",
263 "src/f32-vmulcaddc/c8-psimd-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700264 "src/x32-packx/x4-psimd.c",
265 "src/x32-pad/x2-psimd.c",
266 "src/x32-unpool/psimd.c",
267 "src/x32-zip/x2-psimd.c",
268 "src/x32-zip/x3-psimd.c",
269 "src/x32-zip/x4-psimd.c",
270 "src/x32-zip/xm-psimd.c",
271]
272
273# ISA-specific micro-kernels
274NEON_UKERNELS = [
275 "src/f32-avgpool/mp9p8q-neon.c",
276 "src/f32-avgpool/up9-neon.c",
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800277 "src/f32-bilinear/neon-c4.c",
278 "src/f32-bilinear/neon-c8.c",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800279 "src/f32-binop/vadd-neon-x4.c",
280 "src/f32-binop/vadd-neon-x8.c",
281 "src/f32-binop/vaddc-neon-x4.c",
282 "src/f32-binop/vaddc-neon-x8.c",
283 "src/f32-binop/vmul-neon-x4.c",
284 "src/f32-binop/vmul-neon-x8.c",
285 "src/f32-binop/vmulc-neon-x4.c",
286 "src/f32-binop/vmulc-neon-x8.c",
287 "src/f32-binop/vrsubc-neon-x4.c",
288 "src/f32-binop/vrsubc-neon-x8.c",
289 "src/f32-binop/vsub-neon-x4.c",
290 "src/f32-binop/vsub-neon-x8.c",
291 "src/f32-binop/vsubc-neon-x4.c",
292 "src/f32-binop/vsubc-neon-x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700293 "src/f32-clamp/neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700294 "src/f32-dwconv/up4x9-neon.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800295 "src/f32-dwconv/up4x9-neon-acc2.c",
296 "src/f32-dwconv/up8x9-neon.c",
297 "src/f32-dwconv/up8x9-neon-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700298 "src/f32-gavgpool-spchw/neon-x4.c",
299 "src/f32-gavgpool/mp7p7q-neon.c",
300 "src/f32-gavgpool/up7-neon.c",
301 "src/f32-gemm/1x8-neon-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700302 "src/f32-gemm/4x2-neon-ld64.c",
303 "src/f32-gemm/4x8-neon-ld128.c",
304 "src/f32-gemm/4x8-neon-ld64.c",
305 "src/f32-gemm/5x8-neon-ld64.c",
306 "src/f32-gemm/6x8-neon-ld64.c",
Frank Barcharddf06d802019-11-20 15:53:46 -0800307 "src/f32-gemm/1x8s4-neon.c",
308 "src/f32-gemm/4x8s4-neon.c",
309 "src/f32-gemm/6x8s4-neon.c",
310 "src/f32-gemm/8x8s4-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700311 "src/f32-gemminc/1x8-neon-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700312 "src/f32-gemminc/4x8-neon-ld128.c",
313 "src/f32-gemminc/4x8-neon-ld64.c",
314 "src/f32-gemminc/5x8-neon-ld64.c",
315 "src/f32-gemminc/6x8-neon-ld64.c",
Frank Barcharddf06d802019-11-20 15:53:46 -0800316 "src/f32-gemminc/1x8s4-neon.c",
317 "src/f32-gemminc/4x8s4-neon.c",
318 "src/f32-gemminc/6x8s4-neon.c",
319 "src/f32-gemminc/8x8s4-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700320 "src/f32-hswish/neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800321 "src/f32-igemm/1x8-neon-ld64.c",
322 "src/f32-igemm/4x2-neon-ld64.c",
323 "src/f32-igemm/4x4-neon-ld64.c",
324 "src/f32-igemm/4x8-neon-ld128.c",
325 "src/f32-igemm/4x8-neon-ld64.c",
326 "src/f32-igemm/6x8-neon-ld64.c",
Frank Barcharddf06d802019-11-20 15:53:46 -0800327 "src/f32-igemm/1x8s4-neon.c",
328 "src/f32-igemm/4x8s4-neon.c",
329 "src/f32-igemm/6x8s4-neon.c",
330 "src/f32-igemm/8x8s4-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700331 "src/f32-pavgpool/mp9p8q-neon.c",
332 "src/f32-pavgpool/up9-neon.c",
333 "src/f32-ppmm/4x8-neon.c",
334 "src/f32-ppmm/8x8-neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800335 "src/f32-prelu/neon-2x4.c",
336 "src/f32-prelu/neon-2x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700337 "src/f32-rmax/neon.c",
Marat Dukhan14bec502019-11-18 11:35:31 -0800338 "src/f32-sigmoid/neon-frac-p9-p10-nr1recps-x16.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800339 "src/f32-vmulcaddc/c4-neon-2x.c",
340 "src/f32-vmulcaddc/c8-neon-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700341 "src/q8-avgpool/mp9p8q-neon.c",
342 "src/q8-avgpool/up9-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700343 "src/q8-dwconv/up8x9-neon.c",
344 "src/q8-gavgpool/mp7p7q-neon.c",
345 "src/q8-gavgpool/up7-neon.c",
346 "src/q8-gemm/4x8-neon.c",
347 "src/q8-gemm/8x8-neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800348 "src/q8-igemm/4x8-neon.c",
349 "src/q8-igemm/8x8-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700350 "src/q8-vadd/neon.c",
351 "src/u8-clamp/neon.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800352 "src/u8-maxpool/9p8x-neon-c16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700353 "src/u8-rmax/neon.c",
354 "src/x32-packx/x4-neon-st4.c",
355 "src/x32-pad/x2-neon.c",
356 "src/x32-zip/x2-neon.c",
357 "src/x32-zip/x3-neon.c",
358 "src/x32-zip/x4-neon.c",
359 "src/x32-zip/xm-neon.c",
360 "src/x8-zip/x2-neon.c",
361 "src/x8-zip/x3-neon.c",
362 "src/x8-zip/x4-neon.c",
363 "src/x8-zip/xm-neon.c",
364]
365
366NEONFMA_UKERNELS = [
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800367 "src/f32-bilinear/neonfma-c4.c",
368 "src/f32-bilinear/neonfma-c8.c",
Frank Barcharddb45b6a2019-10-09 16:42:45 -0700369 "src/f32-igemm/1x8-neonfma-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700370 "src/f32-igemm/4x2-neonfma-ld64.c",
371 "src/f32-igemm/4x4-neonfma-ld64.c",
372 "src/f32-igemm/4x8-neonfma-ld128.c",
373 "src/f32-igemm/4x8-neonfma-ld64.c",
374 "src/f32-igemm/6x8-neonfma-ld64.c",
Frank Barcharddf06d802019-11-20 15:53:46 -0800375 "src/f32-igemm/1x8s4-neonfma.c",
376 "src/f32-igemm/4x8s4-neonfma.c",
377 "src/f32-igemm/6x8s4-neonfma.c",
378 "src/f32-igemm/8x8s4-neonfma.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700379 "src/f32-dwconv/up4x9-neonfma.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800380 "src/f32-dwconv/up4x9-neonfma-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700381 "src/f32-dwconv/up8x9-neonfma.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800382 "src/f32-dwconv/up8x9-neonfma-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700383 "src/f32-gemm/1x8-neonfma-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700384 "src/f32-gemm/4x2-neonfma-ld64.c",
385 "src/f32-gemm/4x8-neonfma-ld128.c",
386 "src/f32-gemm/4x8-neonfma-ld64.c",
387 "src/f32-gemm/5x8-neonfma-ld64.c",
388 "src/f32-gemm/6x8-neonfma-ld64.c",
Frank Barcharddf06d802019-11-20 15:53:46 -0800389 "src/f32-gemm/1x8s4-neonfma.c",
390 "src/f32-gemm/4x8s4-neonfma.c",
391 "src/f32-gemm/6x8s4-neonfma.c",
392 "src/f32-gemm/8x8s4-neonfma.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700393 "src/f32-gemminc/1x8-neonfma-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700394 "src/f32-gemminc/4x8-neonfma-ld128.c",
395 "src/f32-gemminc/4x8-neonfma-ld64.c",
396 "src/f32-gemminc/5x8-neonfma-ld64.c",
397 "src/f32-gemminc/6x8-neonfma-ld64.c",
Frank Barcharddf06d802019-11-20 15:53:46 -0800398 "src/f32-gemminc/1x8s4-neonfma.c",
399 "src/f32-gemminc/4x8s4-neonfma.c",
400 "src/f32-gemminc/6x8s4-neonfma.c",
401 "src/f32-gemminc/8x8s4-neonfma.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700402 "src/f32-hswish/neonfma.c",
403 "src/f32-ppmm/4x8-neonfma.c",
404 "src/f32-ppmm/8x8-neonfma.c",
Marat Dukhan14bec502019-11-18 11:35:31 -0800405 "src/f32-sigmoid/neonfma-p5-nr2fma-x16.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800406 "src/f32-vmulcaddc/c4-neonfma-2x.c",
407 "src/f32-vmulcaddc/c8-neonfma-2x.c",
Marat Dukhan797a8fe2019-11-14 20:21:57 -0800408 "src/math/exp-neonfma-lut64-p2.c",
409 "src/math/exp-neonfma-p5.c",
Marat Dukhan346a9e52019-11-15 09:06:30 -0800410 "src/math/expminus-neonfma-p5.c",
Marat Dukhan80bafd22019-11-18 10:16:01 -0800411 "src/math/sigmoid-neonfma-p5-nr2fma.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700412]
413
414AARCH64_NEONFMA_UKERNELS = [
415 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
416 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
417 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
418 "src/f32-dwconv-spchw/3x3p1-neonfma.c",
419 "src/f32-dwconv-spchw/5x5p2-neonfma.c",
420 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
421 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
422 "src/f32-spmm/12x1-neonfma.c",
423 "src/f32-spmm/12x2-neonfma.c",
424 "src/f32-spmm/12x4-neonfma.c",
425 "src/f32-spmm/16x1-neonfma-pipelined.c",
426 "src/f32-spmm/16x1-neonfma-unroll2.c",
427 "src/f32-spmm/16x1-neonfma.c",
428 "src/f32-spmm/16x2-neonfma.c",
429 "src/f32-spmm/16x4-neonfma.c",
430 "src/f32-spmm/4x1-neonfma-pipelined.c",
431 "src/f32-spmm/4x1-neonfma-unroll2.c",
432 "src/f32-spmm/4x1-neonfma.c",
433 "src/f32-spmm/4x2-neonfma.c",
434 "src/f32-spmm/4x4-neonfma.c",
435 "src/f32-spmm/8x1-neonfma-pipelined.c",
436 "src/f32-spmm/8x1-neonfma-unroll2.c",
437 "src/f32-spmm/8x1-neonfma.c",
438 "src/f32-spmm/8x2-neonfma.c",
439 "src/f32-spmm/8x4-neonfma.c",
440]
441
442AARCH64_NEONFP16ARITH_UKERNELS = [
443 "src/f16-gemm/4x8-neonfp16arith-ld64.c",
444 "src/f16-gemm/6x8-neonfp16arith-ld64.c",
445 "src/f16-gemm/8x8-neonfp16arith-ld64.c",
446]
447
448SSE_UKERNELS = [
449 "src/f32-avgpool/mp9p8q-sse.c",
450 "src/f32-avgpool/up9-sse.c",
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800451 "src/f32-bilinear/sse-c4.c",
452 "src/f32-bilinear/sse-c8.c",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800453 "src/f32-binop/vadd-sse-x4.c",
454 "src/f32-binop/vadd-sse-x8.c",
455 "src/f32-binop/vaddc-sse-x4.c",
456 "src/f32-binop/vaddc-sse-x8.c",
457 "src/f32-binop/vmul-sse-x4.c",
458 "src/f32-binop/vmul-sse-x8.c",
459 "src/f32-binop/vmulc-sse-x4.c",
460 "src/f32-binop/vmulc-sse-x8.c",
461 "src/f32-binop/vrsubc-sse-x4.c",
462 "src/f32-binop/vrsubc-sse-x8.c",
463 "src/f32-binop/vsub-sse-x4.c",
464 "src/f32-binop/vsub-sse-x8.c",
465 "src/f32-binop/vsubc-sse-x4.c",
466 "src/f32-binop/vsubc-sse-x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700467 "src/f32-clamp/sse.c",
468 "src/f32-igemm/1x8-sse-dup.c",
469 "src/f32-igemm/1x8-sse-load1.c",
470 "src/f32-igemm/1x8s4-sse.c",
471 "src/f32-igemm/4x2c4-sse.c",
472 "src/f32-igemm/4x8-sse-dup.c",
473 "src/f32-igemm/4x8-sse-load1.c",
474 "src/f32-igemm/4x8s4-sse.c",
475 "src/f32-dwconv/up4x25-sse.c",
476 "src/f32-dwconv/up4x4-sse.c",
477 "src/f32-dwconv/up4x9-sse.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800478 "src/f32-dwconv/up4x25-sse-acc2.c",
479 "src/f32-dwconv/up4x4-sse-acc2.c",
480 "src/f32-dwconv/up4x9-sse-acc2.c",
481 "src/f32-dwconv/up8x25-sse.c",
482 "src/f32-dwconv/up8x4-sse.c",
483 "src/f32-dwconv/up8x9-sse.c",
484 "src/f32-dwconv/up8x25-sse-acc2.c",
485 "src/f32-dwconv/up8x4-sse-acc2.c",
486 "src/f32-dwconv/up8x9-sse-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700487 "src/f32-gavgpool-spchw/sse-x4.c",
488 "src/f32-gavgpool/mp7p7q-sse.c",
489 "src/f32-gavgpool/up7-sse.c",
490 "src/f32-gemm/1x8-sse-dup.c",
491 "src/f32-gemm/1x8-sse-load1.c",
492 "src/f32-gemm/1x8s4-sse.c",
493 "src/f32-gemm/4x8-sse-dup.c",
494 "src/f32-gemm/4x8-sse-load1.c",
495 "src/f32-gemm/4x8s4-sse.c",
496 "src/f32-gemminc/1x8-sse-dup.c",
497 "src/f32-gemminc/1x8-sse-load1.c",
498 "src/f32-gemminc/1x8s4-sse.c",
499 "src/f32-gemminc/4x8-sse-dup.c",
500 "src/f32-gemminc/4x8-sse-load1.c",
501 "src/f32-gemminc/4x8s4-sse.c",
502 "src/f32-hswish/sse.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800503 "src/f32-maxpool/9p8x-sse-c4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700504 "src/f32-pavgpool/mp9p8q-sse.c",
505 "src/f32-pavgpool/up9-sse.c",
506 "src/f32-dwconv-spchw/3x3p1-sse.c",
507 "src/f32-dwconv-spchw/3x3s2p1-sse.c",
508 "src/f32-ppmm/4x8-sse.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700509 "src/f32-rmax/sse.c",
510 "src/f32-spmm/4x1-sse.c",
511 "src/f32-spmm/8x1-sse.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800512 "src/f32-vmulcaddc/c4-sse-2x.c",
513 "src/f32-vmulcaddc/c8-sse-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700514 "src/x32-packx/x4-sse.c",
515]
516
517SSE2_UKERNELS = [
Marat Dukhan329da642019-11-19 21:44:39 -0800518 "src/f32-argmaxpool/9p8x-sse2-c4.c",
519 "src/f32-argmaxpool/4x-sse2-c4.c",
520 "src/f32-argmaxpool/9x-sse2-c4.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800521 "src/f32-prelu/sse2-2x4.c",
522 "src/f32-prelu/sse2-2x8.c",
Marat Dukhan7bee7512019-11-18 15:15:48 -0800523 "src/f32-sigmoid/sse2-p5-div-x8.c",
524 "src/f32-sigmoid/sse2-p5-div-x16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700525 "src/q8-avgpool/mp9p8q-sse2.c",
526 "src/q8-avgpool/up9-sse2.c",
527 "src/q8-igemm/4x4c2-sse2.c",
528 "src/q8-dwconv/up8x9-sse2.c",
529 "src/q8-gavgpool/mp7p7q-sse2.c",
530 "src/q8-gavgpool/up7-sse2.c",
531 "src/q8-gemm/2x4c8-sse2.c",
532 "src/q8-gemm/4x4c2-sse2.c",
533 "src/q8-vadd/sse2.c",
534 "src/u8-clamp/sse2.c",
Marat Dukhan329da642019-11-19 21:44:39 -0800535 "src/u8-maxpool/9p8x-sse2-c16.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700536 "src/u8-rmax/sse2.c",
537 "src/x32-pad/x2-sse2.c",
538 "src/x32-zip/x2-sse2.c",
539 "src/x32-zip/x3-sse2.c",
540 "src/x32-zip/x4-sse2.c",
541 "src/x32-zip/xm-sse2.c",
542 "src/x8-zip/x2-sse2.c",
543 "src/x8-zip/x3-sse2.c",
544 "src/x8-zip/x4-sse2.c",
545 "src/x8-zip/xm-sse2.c",
Marat Dukhanffd68402019-11-15 15:19:11 -0800546 "src/math/exp-sse2-p5.c",
547 "src/math/expminus-sse2-p5.c",
Marat Dukhan80bafd22019-11-18 10:16:01 -0800548 "src/math/sigmoid-sse2-p5-div.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700549]
550
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800551SSE41_UKERNELS = [
552 "src/f32-prelu/sse41-2x4.c",
553 "src/f32-prelu/sse41-2x8.c",
554]
555
Marat Dukhan08c4a432019-10-03 09:29:21 -0700556AVX_UKERNELS = [
Marat Dukhanfda12b82019-11-21 12:27:59 -0800557 "src/f32-gemm/1x8-avx-broadcast.c",
558 "src/f32-gemm/4x8-avx-broadcast.c",
559 "src/f32-gemm/5x8-avx-broadcast.c",
560 "src/f32-gemm/6x8-avx-broadcast.c",
561 "src/f32-gemm/7x8-avx-broadcast.c",
562 "src/f32-gemminc/1x8-avx-broadcast.c",
563 "src/f32-gemminc/4x8-avx-broadcast.c",
564 "src/f32-gemminc/5x8-avx-broadcast.c",
565 "src/f32-gemminc/6x8-avx-broadcast.c",
566 "src/f32-gemminc/7x8-avx-broadcast.c",
567 "src/f32-igemm/1x8-avx-broadcast.c",
568 "src/f32-igemm/4x8-avx-broadcast.c",
569 "src/f32-igemm/5x8-avx-broadcast.c",
570 "src/f32-igemm/6x8-avx-broadcast.c",
571 "src/f32-igemm/7x8-avx-broadcast.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700572 "src/f32-rmax/avx.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700573 "src/f32-vscale/avx-unroll32.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700574]
575
Marat Dukhanfda12b82019-11-21 12:27:59 -0800576FMA3_UKERNELS = [
577 "src/f32-gemm/1x8-fma3-broadcast.c",
578 "src/f32-gemm/4x8-fma3-broadcast.c",
579 "src/f32-gemm/5x8-fma3-broadcast.c",
580 "src/f32-gemm/6x8-fma3-broadcast.c",
581 "src/f32-gemm/7x8-fma3-broadcast.c",
582 "src/f32-gemm/8x8-fma3-broadcast.c",
583 "src/f32-gemminc/1x8-fma3-broadcast.c",
584 "src/f32-gemminc/4x8-fma3-broadcast.c",
585 "src/f32-gemminc/5x8-fma3-broadcast.c",
586 "src/f32-gemminc/6x8-fma3-broadcast.c",
587 "src/f32-gemminc/7x8-fma3-broadcast.c",
588 "src/f32-gemminc/8x8-fma3-broadcast.c",
589 "src/f32-igemm/1x8-fma3-broadcast.c",
590 "src/f32-igemm/4x8-fma3-broadcast.c",
591 "src/f32-igemm/5x8-fma3-broadcast.c",
592 "src/f32-igemm/6x8-fma3-broadcast.c",
593 "src/f32-igemm/7x8-fma3-broadcast.c",
594 "src/f32-igemm/8x8-fma3-broadcast.c",
595]
596
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700597AVX2_UKERNELS = [
Marat Dukhan97579532019-10-18 16:40:39 -0700598 "src/f32-raddexpminusmax/avx2-p5-unroll64.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700599 "src/f32-raddextexp/avx2-p5-unroll64.c",
Marat Dukhan97579532019-10-18 16:40:39 -0700600 "src/f32-raddstoreexpminusmax/avx2-p5-unroll64.c",
601 "src/f32-vscaleexpminusmax/avx2-p5-unroll64.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700602 "src/f32-vscaleextexp/avx2-p5-unroll64.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700603 "src/math/exp-avx2-p5.c",
604 "src/math/exp-avx2-perm-p3.c",
605 "src/math/exp-avx2-perm-p4.c",
Marat Dukhan515c9772019-10-17 18:07:57 -0700606 "src/math/expminus-avx2-p5.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -0700607 "src/math/extexp-avx2-p5.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700608]
609
Marat Dukhan08c4a432019-10-03 09:29:21 -0700610AVX512F_UKERNELS = [
Marat Dukhan97579532019-10-18 16:40:39 -0700611 "src/f32-raddexpminusmax/avx512f-p5-scalef-unroll128.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700612 "src/f32-raddextexp/avx512f-p5-scalef-unroll128.c",
Marat Dukhan97579532019-10-18 16:40:39 -0700613 "src/f32-raddstoreexpminusmax/avx512f-p5-scalef-unroll128.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700614 "src/f32-rmax/avx512f.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700615 "src/f32-vscale/avx512f-unroll64.c",
Marat Dukhan97579532019-10-18 16:40:39 -0700616 "src/f32-vscaleexpminusmax/avx512f-p5-scalef-unroll128.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700617 "src/f32-vscaleextexp/avx512f-p5-scalef-unroll128.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700618 "src/math/exp-avx512f-p5-scalef.c",
619 "src/math/exp-avx512f-p5.c",
620 "src/math/exp-avx512f-perm-p3.c",
Marat Dukhanfeb49232019-10-28 11:03:31 -0700621 "src/math/exp-avx512f-perm2-p2.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -0700622 "src/math/extexp-avx512f-p5.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700623]
624
625AARCH32_ASM_UKERNELS = [
626 "src/q8-dwconv/up8x9-aarch32-neon.S",
627]
628
629AARCH64_ASM_UKERNELS = [
630 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
631 "src/f32-dwconv/up4x9-aarch64-neonfma.S",
632 "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -0700633 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700634 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
635 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
636 "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -0700637 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700638 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
639 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
640 "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
641 "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
642 "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -0700643 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700644 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
645 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
646 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
647 "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
648 "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
649 "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -0700650 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700651 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
652 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
653 "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -0700654 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700655 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
656 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
657 "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
658 "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
659 "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -0700660 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700661 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
662 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
663 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
664 "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
665 "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
666 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -0700667 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700668 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
669 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
670 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -0700671 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700672 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
673 "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -0700674 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700675 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
676 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
677 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
678]
679
680INTERNAL_MICROKERNEL_HDRS = [
681 "src/xnnpack/argmaxpool.h",
682 "src/xnnpack/avgpool.h",
Marat Dukhan35dacfb2019-11-07 19:18:16 -0800683 "src/xnnpack/bilinear.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700684 "src/xnnpack/clamp.h",
685 "src/xnnpack/common.h",
686 "src/xnnpack/conv.h",
687 "src/xnnpack/dwconv.h",
688 "src/xnnpack/gavgpool.h",
689 "src/xnnpack/gemm.h",
690 "src/xnnpack/hswish.h",
691 "src/xnnpack/igemm.h",
692 "src/xnnpack/lut.h",
693 "src/xnnpack/math.h",
694 "src/xnnpack/maxpool.h",
Marat Dukhan04f03be2019-11-19 12:36:47 -0800695 "src/xnnpack/memory.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700696 "src/xnnpack/packx.h",
697 "src/xnnpack/pad.h",
698 "src/xnnpack/params.h",
699 "src/xnnpack/pavgpool.h",
700 "src/xnnpack/ppmm.h",
701 "src/xnnpack/prelu.h",
Marat Dukhan97579532019-10-18 16:40:39 -0700702 "src/xnnpack/raddexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700703 "src/xnnpack/raddextexp.h",
Marat Dukhan97579532019-10-18 16:40:39 -0700704 "src/xnnpack/raddstoreexpminusmax.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700705 "src/xnnpack/rmax.h",
706 "src/xnnpack/scalar-utils.h",
707 "src/xnnpack/spmm.h",
708 "src/xnnpack/unpool.h",
709 "src/xnnpack/vadd.h",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800710 "src/xnnpack/vbinop.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700711 "src/xnnpack/vmulcaddc.h",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700712 "src/xnnpack/vscale.h",
Marat Dukhan97579532019-10-18 16:40:39 -0700713 "src/xnnpack/vscaleexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700714 "src/xnnpack/vscaleextexp.h",
Marat Dukhan346a9e52019-11-15 09:06:30 -0800715 "src/xnnpack/vunop.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700716 "src/xnnpack/zip.h",
717]
718
719INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
720 "include/xnnpack.h",
721 "src/xnnpack/allocator.h",
722 "src/xnnpack/compute.h",
723 "src/xnnpack/im2col.h",
724 "src/xnnpack/indirection.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700725 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700726 "src/xnnpack/operator.h",
727 "src/xnnpack/pack.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700728 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700729 "src/xnnpack/requantization-stubs.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700730 "src/xnnpack/requantization.h",
731]
732
733ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
734 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700735]
736
737MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700738 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700739 "include/xnnpack.h",
740]
741
742MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
743 "src/xnnpack/isa-checks.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700744 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700745 "src/xnnpack/requantization.h",
746 "include/xnnpack.h",
747]
748
749OPERATOR_TEST_PARAMS_HDRS = [
750 "src/xnnpack/params.h",
751 "src/xnnpack/common.h",
752]
753
754WEIGHTS_PACK_HDRS = [
755 "src/xnnpack/pack.h",
756 "src/xnnpack/operator.h",
757 "src/xnnpack/compute.h",
758]
759
Marat Dukhanc8e00eb2019-10-04 14:55:26 -0700760LOGGING_COPTS = select({
761 # No logging in optimized mode
762 ":optimized_build": ["-DXNN_LOG_LEVEL=0"],
763 # Full logging in debug mode
764 ":debug_build": ["-DXNN_LOG_LEVEL=5"],
765 # Error-only logging in default (fastbuild) mode
766 "//conditions:default": ["-DXNN_LOG_LEVEL=2"],
767})
768
769LOGGING_HDRS = [
770 "src/xnnpack/log.h",
771]
772
Marat Dukhan08c4a432019-10-03 09:29:21 -0700773xnnpack_cc_library(
774 name = "scalar_ukernels",
775 srcs = SCALAR_UKERNELS,
776 hdrs = INTERNAL_HDRS,
777 aarch32_copts = ["-marm"],
778 copts = xnnpack_std_copts(),
779 deps = [
780 "@FP16",
781 "@FXdiv",
Marat Dukhan04f03be2019-11-19 12:36:47 -0800782 "@pthreadpool",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700783 ],
784)
785
786xnnpack_cc_library(
787 name = "psimd_ukernels",
788 srcs = PSIMD_UKERNELS,
789 hdrs = INTERNAL_HDRS,
790 aarch32_copts = [
791 "-marm",
792 "-mfpu=neon",
793 ],
794 copts = xnnpack_std_copts(),
795 optimized_copts = [
796 "-O3",
797 "-ffast-math",
798 ],
799 deps = [
800 "@FP16",
801 "@psimd",
Marat Dukhan04f03be2019-11-19 12:36:47 -0800802 "@pthreadpool",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700803 ],
804)
805
806xnnpack_cc_library(
807 name = "neon_ukernels",
808 hdrs = INTERNAL_HDRS,
809 aarch32_copts = [
810 "-marm",
811 "-mfpu=neon",
812 ],
813 aarch32_srcs = NEON_UKERNELS,
814 aarch64_srcs = NEON_UKERNELS,
815 copts = xnnpack_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -0800816 deps = [
817 "@FP16",
818 "@pthreadpool",
819 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700820)
821
822xnnpack_cc_library(
823 name = "neonfma_ukernels",
824 hdrs = INTERNAL_HDRS,
825 aarch32_copts = [
826 "-marm",
827 "-mfpu=neon-vfpv4",
828 ],
829 aarch32_srcs = NEONFMA_UKERNELS,
830 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
831 copts = xnnpack_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -0800832 deps = [
833 "@FP16",
834 "@pthreadpool",
835 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700836)
837
838xnnpack_cc_library(
839 name = "neonfp16arith_ukernels",
840 hdrs = INTERNAL_HDRS,
841 aarch64_copts = ["-march=armv8.2-a+fp16"],
842 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
843 copts = xnnpack_std_copts(),
Marat Dukhan04f03be2019-11-19 12:36:47 -0800844 deps = [
845 "@FP16",
846 "@pthreadpool",
847 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700848)
849
850xnnpack_cc_library(
851 name = "sse2_ukernels",
852 hdrs = INTERNAL_HDRS,
853 copts = xnnpack_std_copts(),
854 x86_copts = ["-msse2"],
855 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -0800856 deps = [
857 "@FP16",
858 "@pthreadpool",
859 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700860)
861
862xnnpack_cc_library(
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800863 name = "sse41_ukernels",
864 hdrs = INTERNAL_HDRS,
865 copts = xnnpack_std_copts(),
866 x86_copts = ["-msse4.1"],
867 x86_srcs = SSE41_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -0800868 deps = [
869 "@FP16",
870 "@pthreadpool",
871 ],
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800872)
873
874xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700875 name = "avx_ukernels",
876 hdrs = INTERNAL_HDRS,
877 copts = xnnpack_std_copts(),
878 x86_copts = ["-mavx"],
879 x86_srcs = AVX_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -0800880 deps = [
881 "@FP16",
882 "@pthreadpool",
883 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700884)
885
886xnnpack_cc_library(
Marat Dukhanfda12b82019-11-21 12:27:59 -0800887 name = "fma3_ukernels",
888 hdrs = INTERNAL_HDRS,
889 copts = xnnpack_std_copts(),
890 x86_copts = [
891 "-mfma",
892 ],
893 x86_srcs = FMA3_UKERNELS,
894 deps = [
895 "@FP16",
896 "@pthreadpool",
897 ],
898)
899
900xnnpack_cc_library(
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700901 name = "avx2_ukernels",
902 hdrs = INTERNAL_HDRS,
903 copts = xnnpack_std_copts(),
904 x86_copts = [
905 "-mfma",
906 "-mavx2",
907 ],
908 x86_srcs = AVX2_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -0800909 deps = [
910 "@FP16",
911 "@pthreadpool",
912 ],
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700913)
914
915xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700916 name = "avx512f_ukernels",
917 hdrs = INTERNAL_HDRS,
918 copts = xnnpack_std_copts(),
919 x86_copts = ["-mavx512f"],
920 x86_srcs = AVX512F_UKERNELS,
Marat Dukhan04f03be2019-11-19 12:36:47 -0800921 deps = [
922 "@FP16",
923 "@pthreadpool",
924 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700925)
926
927xnnpack_cc_library(
928 name = "asm_ukernels",
929 hdrs = ["src/xnnpack/assembly.h"],
930 aarch32_srcs = AARCH32_ASM_UKERNELS,
931 aarch64_srcs = AARCH64_ASM_UKERNELS,
932)
933
934xnnpack_aggregate_library(
935 name = "ukernels",
936 aarch32_deps = [
937 ":psimd_ukernels",
938 ":neon_ukernels",
939 ":neonfma_ukernels",
940 ":asm_ukernels",
941 ],
942 aarch64_deps = [
943 ":psimd_ukernels",
944 ":neon_ukernels",
945 ":neonfma_ukernels",
946 ":neonfp16arith_ukernels",
947 ":asm_ukernels",
948 ],
949 generic_deps = [":scalar_ukernels"],
950 wasmsimd_deps = [
951 ":psimd_ukernels",
952 ],
953 x86_deps = [
954 ":psimd_ukernels",
955 ":sse2_ukernels",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800956 ":sse41_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700957 ":avx_ukernels",
Marat Dukhanfda12b82019-11-21 12:27:59 -0800958 ":fma3_ukernels",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700959 ":avx2_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700960 ":avx512f_ukernels",
961 ],
962)
963
964xnnpack_cc_library(
965 name = "im2col",
966 srcs = ["src/im2col.c"],
967 hdrs = [
968 "src/xnnpack/common.h",
969 "src/xnnpack/im2col.h",
970 ],
971 copts = xnnpack_std_copts(),
972)
973
974xnnpack_cc_library(
975 name = "indirection",
976 srcs = ["src/indirection.c"],
977 hdrs = INTERNAL_HDRS,
978 copts = xnnpack_std_copts(),
979 deps = [
980 "@FP16",
981 "@FXdiv",
982 "@pthreadpool",
983 ],
984)
985
986xnnpack_cc_library(
987 name = "operator_run",
988 srcs = ["src/operator-run.c"],
Marat Dukhanc8e00eb2019-10-04 14:55:26 -0700989 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
990 copts = xnnpack_std_copts() + LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -0700991 # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
992 "-Wno-vla",
993 ],
994 deps = [
995 "@FP16",
996 "@FXdiv",
997 "@clog",
998 "@pthreadpool",
999 ],
1000)
1001
1002cc_library(
1003 name = "enable_assembly",
1004 defines = select({
1005 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
1006 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
Frank Barchard810171d2019-10-10 10:34:51 -07001007 "//conditions:default": ["XNN_ENABLE_ASSEMBLY=1"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001008 }),
1009)
1010
Marat Dukhancf056b22019-10-07 10:26:29 -07001011xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001012 name = "operators",
1013 srcs = OPERATOR_SRCS + [
Marat Dukhan04f03be2019-11-19 12:36:47 -08001014 "src/memory.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001015 "src/operator-delete.c",
Marat Dukhancf056b22019-10-07 10:26:29 -07001016 ],
1017 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
Marat Dukhanc8e00eb2019-10-04 14:55:26 -07001018 copts = xnnpack_std_copts() + LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001019 "-Isrc",
1020 "-Iinclude",
1021 ] + select({
1022 ":debug_build": [],
1023 "//conditions:default": xnnpack_min_size_copts(),
1024 }),
Marat Dukhancf056b22019-10-07 10:26:29 -07001025 wasm_srcs = ["src/wasm-stubs.c"],
1026 wasmsimd_srcs = ["src/wasm-stubs.c"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001027 deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -07001028 ":indirection",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001029 "@FP16",
1030 "@FXdiv",
1031 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001032 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001033 ],
1034)
1035
1036cc_library(
1037 name = "XNNPACK",
1038 srcs = [
1039 "src/init.c",
1040 ],
1041 copts = xnnpack_std_copts() + LOGGING_COPTS + [
1042 "-Isrc",
1043 "-Iinclude",
1044 ] + select({
1045 ":debug_build": [],
1046 "//conditions:default": xnnpack_min_size_copts(),
1047 }),
1048 includes = ["include"],
1049 linkstatic = True,
1050 textual_hdrs = ["include/xnnpack.h"],
1051 visibility = xnnpack_visibility(),
1052 deps = [
1053 ":enable_assembly",
1054 ":ukernels",
1055 ":operator_run",
1056 ":operators",
1057 "@clog",
1058 "@pthreadpool",
Marat Dukhand343c222019-10-07 09:22:14 -07001059 ] + select({
1060 ":emscripten": [],
1061 "//conditions:default": ["@cpuinfo"],
1062 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001063)
1064
1065cc_library(
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001066 name = "xnnpack_operators_nhwc_f32",
1067 srcs = [
1068 "src/init.c",
1069 ],
1070 copts = xnnpack_std_copts() + LOGGING_COPTS + [
1071 "-Isrc",
1072 "-Iinclude",
1073 ] + select({
1074 ":debug_build": [],
1075 "//conditions:default": xnnpack_min_size_copts(),
1076 }),
1077 defines = [
1078 "XNN_NO_Q8_OPERATORS",
1079 "XNN_NO_U8_OPERATORS",
1080 "XNN_NO_X8_OPERATORS",
Marat Dukhanefc47b82019-11-18 09:25:38 -08001081 "XNN_NO_NCHW_OPERATORS",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001082 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001083 includes = ["include"],
1084 linkstatic = True,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001085 textual_hdrs = ["include/xnnpack.h"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001086 visibility = xnnpack_visibility(),
1087 deps = [
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001088 ":enable_assembly",
1089 ":ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001090 ":operator_run",
1091 ":operators",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001092 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001093 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001094 ] + select({
1095 ":emscripten": [],
1096 "//conditions:default": ["@cpuinfo"],
1097 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001098)
1099
Marat Dukhancf056b22019-10-07 10:26:29 -07001100xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001101 name = "bench_utils",
1102 srcs = ["bench/utils.cc"],
1103 hdrs = ["bench/utils.h"],
1104 copts = ["-Wno-unused-result"],
Marat Dukhanbad48fe2019-11-04 10:35:22 -08001105 deps = [
1106 "@com_google_benchmark//:benchmark",
1107 "@cpuinfo",
1108 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001109)
1110
Frank Barchard7e955972019-10-11 10:34:25 -07001111######################### Benchmarks for micro-kernels #########################
Marat Dukhan08c4a432019-10-03 09:29:21 -07001112
1113xnnpack_benchmark(
1114 name = "q8_gemm_bench",
1115 srcs = [
1116 "bench/gemm.h",
1117 "bench/q8-gemm.cc",
1118 "src/xnnpack/AlignedAllocator.h",
1119 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1120 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
1121 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
1122)
1123
1124xnnpack_benchmark(
1125 name = "f16_gemm_bench",
1126 srcs = [
1127 "bench/f16-gemm.cc",
1128 "bench/gemm.h",
1129 "src/xnnpack/AlignedAllocator.h",
1130 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1131 copts = ["-Wno-unused-function"],
1132 deps = MICROKERNEL_BENCHMARK_DEPS,
1133)
1134
1135xnnpack_benchmark(
1136 name = "f32_igemm_bench",
1137 srcs = [
1138 "bench/f32-igemm.cc",
1139 "bench/conv.h",
1140 "src/xnnpack/AlignedAllocator.h",
1141 ] + MICROKERNEL_BENCHMARK_HDRS,
Frank Barchard7e955972019-10-11 10:34:25 -07001142 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001143)
1144
1145xnnpack_benchmark(
1146 name = "f32_conv_hwc_bench",
1147 srcs = [
1148 "bench/f32-conv-hwc.cc",
1149 "bench/dconv.h",
1150 "src/xnnpack/AlignedAllocator.h",
1151 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1152 copts = ["-Wno-unused-function"],
1153 deps = MICROKERNEL_BENCHMARK_DEPS,
1154)
1155
1156xnnpack_benchmark(
Erich Elsen563df5f2019-10-23 08:02:21 -07001157 name = "f32_conv_hwc2spchw_bench",
1158 srcs = [
1159 "bench/f32-conv-hwc2spchw.cc",
1160 "bench/dconv.h",
1161 "src/xnnpack/AlignedAllocator.h",
1162 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1163 copts = ["-Wno-unused-function"],
1164 deps = MICROKERNEL_BENCHMARK_DEPS,
1165)
1166
1167xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001168 name = "f32_dwconv_bench",
1169 srcs = [
1170 "bench/f32-dwconv.cc",
1171 "bench/dwconv.h",
1172 "src/xnnpack/AlignedAllocator.h",
1173 ] + MICROKERNEL_BENCHMARK_HDRS,
1174 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
1175)
1176
1177xnnpack_benchmark(
1178 name = "f32_dwconv_spchw_bench",
1179 srcs = [
1180 "bench/f32-dwconv-spchw.cc",
1181 "bench/dwconv.h",
1182 "src/xnnpack/AlignedAllocator.h",
1183 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1184 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
1185)
1186
1187xnnpack_benchmark(
1188 name = "f32_gemm_bench",
1189 srcs = [
1190 "bench/f32-gemm.cc",
1191 "bench/gemm.h",
1192 "src/xnnpack/AlignedAllocator.h",
1193 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1194 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
Frank Barchard7e955972019-10-11 10:34:25 -07001195 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001196)
1197
1198xnnpack_benchmark(
1199 name = "f32_rmax_bench",
1200 srcs = [
1201 "bench/f32-rmax.cc",
1202 "src/xnnpack/AlignedAllocator.h",
1203 ] + MICROKERNEL_BENCHMARK_HDRS,
1204 deps = MICROKERNEL_BENCHMARK_DEPS,
1205)
1206
1207xnnpack_benchmark(
Marat Dukhan14bec502019-11-18 11:35:31 -08001208 name = "f32_sigmoid_bench",
1209 srcs = [
1210 "bench/f32-sigmoid.cc",
1211 "src/xnnpack/AlignedAllocator.h",
1212 ] + MICROKERNEL_BENCHMARK_HDRS,
1213 copts = ["-Wno-unused-function"],
1214 deps = MICROKERNEL_BENCHMARK_DEPS,
1215)
1216
1217xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001218 name = "f32_spmm_bench",
1219 srcs = [
1220 "bench/f32-spmm.cc",
1221 "bench/gemm.h",
1222 "src/xnnpack/AlignedAllocator.h",
1223 ] + MICROKERNEL_BENCHMARK_HDRS,
1224 copts = ["-Wno-unused-function"],
1225 deps = MICROKERNEL_BENCHMARK_DEPS,
1226)
1227
1228xnnpack_benchmark(
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -07001229 name = "f32_softargmax_bench",
1230 srcs = [
1231 "bench/f32-softargmax.cc",
1232 ] + MICROKERNEL_BENCHMARK_HDRS,
1233 copts = ["-Wno-unused-function"],
1234 deps = MICROKERNEL_BENCHMARK_DEPS,
1235)
1236
1237xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001238 name = "f32_im2col_gemm_bench",
1239 srcs = [
1240 "bench/f32-im2col-gemm.cc",
1241 "bench/conv.h",
1242 "src/xnnpack/AlignedAllocator.h",
1243 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1244 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
1245)
1246
1247########################### Benchmarks for operators ###########################
1248
1249xnnpack_benchmark(
1250 name = "add_bench",
1251 srcs = ["bench/add.cc"],
1252 deps = OPERATOR_BENCHMARK_DEPS,
1253)
1254
1255xnnpack_benchmark(
1256 name = "average_pooling_bench",
1257 srcs = ["bench/average-pooling.cc"],
1258 deps = OPERATOR_BENCHMARK_DEPS,
1259)
1260
1261xnnpack_benchmark(
1262 name = "channel_shuffle_bench",
1263 srcs = ["bench/channel-shuffle.cc"],
1264 deps = OPERATOR_BENCHMARK_DEPS,
1265)
1266
1267xnnpack_benchmark(
1268 name = "convolution_bench",
1269 srcs = ["bench/convolution.cc"],
1270 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
1271 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
1272)
1273
1274xnnpack_benchmark(
1275 name = "deconvolution_bench",
1276 srcs = ["bench/deconvolution.cc"],
1277 copts = xnnpack_optional_tflite_copts(),
1278 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
1279)
1280
1281xnnpack_benchmark(
1282 name = "global_average_pooling_bench",
1283 srcs = ["bench/global-average-pooling.cc"],
1284 deps = OPERATOR_BENCHMARK_DEPS,
1285)
1286
1287xnnpack_benchmark(
1288 name = "max_pooling_bench",
1289 srcs = ["bench/max-pooling.cc"],
1290 deps = OPERATOR_BENCHMARK_DEPS,
1291)
1292
1293xnnpack_benchmark(
1294 name = "sigmoid_bench",
1295 srcs = ["bench/sigmoid.cc"],
Marat Dukhanc3b9e862019-11-17 13:18:54 -08001296 copts = xnnpack_optional_tflite_copts(),
1297 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001298)
1299
1300xnnpack_benchmark(
Marat Dukhan95b22432019-10-30 16:30:14 -07001301 name = "prelu_bench",
1302 srcs = ["bench/prelu.cc"],
1303 copts = xnnpack_optional_tflite_copts(),
1304 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
1305)
1306
1307xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001308 name = "softargmax_bench",
1309 srcs = ["bench/softargmax.cc"],
1310 deps = OPERATOR_BENCHMARK_DEPS,
1311)
1312
Marat Dukhanc068bb62019-10-04 13:24:39 -07001313############################# End-to-end benchmarks ############################
1314
1315cc_library(
1316 name = "mobilenet_v1",
1317 srcs = ["models/mobilenet-v1.cc"],
1318 hdrs = ["models/models.h"],
1319 linkstatic = True,
1320 deps = [
1321 ":XNNPACK",
1322 "@pthreadpool",
1323 ],
1324)
1325
1326cc_library(
1327 name = "mobilenet_v2",
1328 srcs = ["models/mobilenet-v2.cc"],
1329 hdrs = ["models/models.h"],
1330 linkstatic = True,
1331 deps = [
1332 ":XNNPACK",
1333 "@pthreadpool",
1334 ],
1335)
1336
1337xnnpack_benchmark(
Marat Dukhanef4416e2019-10-31 13:44:40 -07001338 name = "f32_dwconv_e2e_bench",
1339 srcs = ["bench/f32-dwconv-e2e.cc"] + MICROKERNEL_BENCHMARK_HDRS,
1340 copts = ["-Wno-unused-function"],
1341 deps = MICROKERNEL_BENCHMARK_DEPS + [
1342 ":XNNPACK",
1343 ":mobilenet_v1",
1344 ":mobilenet_v2",
1345 ],
1346)
1347
1348xnnpack_benchmark(
Marat Dukhan5f18d262019-10-31 10:24:14 -07001349 name = "f32_gemm_e2e_bench",
1350 srcs = ["bench/f32-gemm-e2e.cc"] + MICROKERNEL_BENCHMARK_HDRS,
1351 copts = ["-Wno-unused-function"],
1352 deps = MICROKERNEL_BENCHMARK_DEPS + [
1353 ":XNNPACK",
1354 ":mobilenet_v1",
1355 ":mobilenet_v2",
1356 ],
1357)
1358
1359xnnpack_benchmark(
Marat Dukhanc068bb62019-10-04 13:24:39 -07001360 name = "end2end_bench",
1361 srcs = ["bench/end2end.cc"],
1362 deps = [
1363 ":XNNPACK",
Frank Barchardc712fa42019-10-31 14:00:21 -07001364 ":bench_utils",
Marat Dukhanc068bb62019-10-04 13:24:39 -07001365 ":mobilenet_v1",
1366 ":mobilenet_v2",
1367 "@pthreadpool",
1368 ],
1369)
1370
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001371#################### Accuracy evaluation for math functions ####################
1372
1373xnnpack_benchmark(
1374 name = "f32_exp_eval",
1375 srcs = [
1376 "eval/f32-exp.cc",
1377 "src/xnnpack/AlignedAllocator.h",
1378 ] + ACCURACY_EVAL_HDRS,
1379 deps = ACCURACY_EVAL_DEPS,
1380)
1381
Marat Dukhan515c9772019-10-17 18:07:57 -07001382xnnpack_benchmark(
1383 name = "f32_expminus_eval",
1384 srcs = [
1385 "eval/f32-expminus.cc",
1386 "src/xnnpack/AlignedAllocator.h",
1387 ] + ACCURACY_EVAL_HDRS,
1388 deps = ACCURACY_EVAL_DEPS,
1389)
1390
Marat Dukhan98ba4412019-10-23 02:14:28 -07001391xnnpack_benchmark(
1392 name = "f32_extexp_eval",
1393 srcs = [
1394 "eval/f32-extexp.cc",
1395 "src/xnnpack/AlignedAllocator.h",
1396 ] + ACCURACY_EVAL_HDRS,
1397 deps = ACCURACY_EVAL_DEPS,
1398)
1399
Marat Dukhan346a9e52019-11-15 09:06:30 -08001400xnnpack_benchmark(
1401 name = "f32_sigmoid_eval",
1402 srcs = [
1403 "eval/f32-sigmoid.cc",
1404 "src/xnnpack/AlignedAllocator.h",
1405 ] + ACCURACY_EVAL_HDRS,
1406 deps = ACCURACY_EVAL_DEPS,
1407)
1408
Marat Dukhan08c4a432019-10-03 09:29:21 -07001409######################### Unit tests for micro-kernels #########################
1410
1411xnnpack_unit_test(
1412 name = "f16_gemm_test",
1413 srcs = [
1414 "test/f16-gemm.cc",
1415 "test/gemm-microkernel-tester.h",
1416 "src/xnnpack/AlignedAllocator.h",
1417 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1418 deps = MICROKERNEL_TEST_DEPS,
1419)
1420
1421xnnpack_unit_test(
1422 name = "f32_argmaxpool_test",
1423 srcs = [
1424 "test/f32-argmaxpool.cc",
1425 "test/argmaxpool-microkernel-tester.h",
1426 "src/xnnpack/AlignedAllocator.h",
1427 ] + MICROKERNEL_TEST_HDRS,
1428 deps = MICROKERNEL_TEST_DEPS,
1429)
1430
1431xnnpack_unit_test(
1432 name = "f32_avgpool_test",
1433 srcs = [
1434 "test/f32-avgpool.cc",
1435 "test/avgpool-microkernel-tester.h",
1436 "src/xnnpack/AlignedAllocator.h",
1437 ] + MICROKERNEL_TEST_HDRS,
1438 deps = MICROKERNEL_TEST_DEPS,
1439)
1440
1441xnnpack_unit_test(
Marat Dukhan35dacfb2019-11-07 19:18:16 -08001442 name = "f32_bilinear_test",
1443 srcs = [
1444 "test/f32-bilinear.cc",
1445 "test/bilinear-microkernel-tester.h",
1446 "src/xnnpack/AlignedAllocator.h",
1447 ] + MICROKERNEL_TEST_HDRS,
1448 deps = MICROKERNEL_TEST_DEPS,
1449)
1450
1451xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001452 name = "f32_clamp_test",
1453 srcs = [
1454 "test/f32-clamp.cc",
1455 "test/clamp-microkernel-tester.h",
1456 ] + MICROKERNEL_TEST_HDRS,
1457 deps = MICROKERNEL_TEST_DEPS,
1458)
1459
1460xnnpack_unit_test(
1461 name = "f32_igemm_test",
1462 srcs = [
1463 "test/f32-igemm.cc",
1464 "test/gemm-microkernel-tester.h",
1465 "src/xnnpack/AlignedAllocator.h",
1466 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1467 deps = MICROKERNEL_TEST_DEPS,
1468)
1469
1470xnnpack_unit_test(
1471 name = "f32_conv_hwc_test",
1472 srcs = [
1473 "test/f32-conv-hwc.cc",
1474 "test/conv-hwc-microkernel-tester.h",
1475 "src/xnnpack/AlignedAllocator.h",
1476 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1477 deps = MICROKERNEL_TEST_DEPS,
1478)
1479
1480xnnpack_unit_test(
1481 name = "f32_conv_hwc2spchw_test",
1482 srcs = [
1483 "test/f32-conv-hwc2spchw.cc",
1484 "test/conv-hwc2spchw-microkernel-tester.h",
1485 "src/xnnpack/AlignedAllocator.h",
1486 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1487 deps = MICROKERNEL_TEST_DEPS,
1488)
1489
1490xnnpack_unit_test(
1491 name = "f32_dwconv_test",
1492 srcs = [
1493 "test/f32-dwconv.cc",
1494 "test/dwconv-microkernel-tester.h",
1495 "src/xnnpack/AlignedAllocator.h",
1496 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1497 deps = MICROKERNEL_TEST_DEPS,
1498)
1499
1500xnnpack_unit_test(
1501 name = "f32_dwconv_spchw_test",
1502 srcs = [
1503 "test/f32-dwconv-spchw.cc",
1504 "test/dwconv-spchw-microkernel-tester.h",
1505 "src/xnnpack/AlignedAllocator.h",
1506 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1507 deps = MICROKERNEL_TEST_DEPS,
1508)
1509
1510xnnpack_unit_test(
1511 name = "f32_gavgpool_test",
1512 srcs = [
1513 "test/f32-gavgpool.cc",
1514 "test/gavgpool-microkernel-tester.h",
1515 "src/xnnpack/AlignedAllocator.h",
1516 ] + MICROKERNEL_TEST_HDRS,
1517 deps = MICROKERNEL_TEST_DEPS,
1518)
1519
1520xnnpack_unit_test(
1521 name = "f32_gavgpool_spchw_test",
1522 srcs = [
1523 "test/f32-gavgpool-spchw.cc",
1524 "test/gavgpool-spchw-microkernel-tester.h",
1525 "src/xnnpack/AlignedAllocator.h",
1526 ] + MICROKERNEL_TEST_HDRS,
1527 deps = MICROKERNEL_TEST_DEPS,
1528)
1529
1530xnnpack_unit_test(
1531 name = "f32_gemm_test",
1532 srcs = [
1533 "test/f32-gemm.cc",
1534 "test/gemm-microkernel-tester.h",
1535 "src/xnnpack/AlignedAllocator.h",
1536 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1537 deps = MICROKERNEL_TEST_DEPS,
1538)
1539
1540xnnpack_unit_test(
1541 name = "f32_gemminc_test",
1542 srcs = [
1543 "test/f32-gemminc.cc",
1544 "test/gemm-microkernel-tester.h",
1545 "src/xnnpack/AlignedAllocator.h",
1546 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1547 deps = MICROKERNEL_TEST_DEPS,
1548)
1549
1550xnnpack_unit_test(
1551 name = "f32_hswish_test",
1552 srcs = [
1553 "test/f32-hswish.cc",
1554 "test/hswish-microkernel-tester.h",
1555 ] + MICROKERNEL_TEST_HDRS,
1556 deps = MICROKERNEL_TEST_DEPS,
1557)
1558
1559xnnpack_unit_test(
1560 name = "f32_maxpool_test",
1561 srcs = [
1562 "test/f32-maxpool.cc",
1563 "test/maxpool-microkernel-tester.h",
1564 ] + MICROKERNEL_TEST_HDRS,
1565 deps = MICROKERNEL_TEST_DEPS,
1566)
1567
1568xnnpack_unit_test(
1569 name = "f32_pavgpool_test",
1570 srcs = [
1571 "test/f32-pavgpool.cc",
1572 "test/avgpool-microkernel-tester.h",
1573 "src/xnnpack/AlignedAllocator.h",
1574 ] + MICROKERNEL_TEST_HDRS,
1575 deps = MICROKERNEL_TEST_DEPS,
1576)
1577
1578xnnpack_unit_test(
1579 name = "f32_ppmm_test",
1580 srcs = [
1581 "test/f32-ppmm.cc",
1582 "test/gemm-microkernel-tester.h",
1583 "src/xnnpack/AlignedAllocator.h",
1584 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1585 deps = MICROKERNEL_TEST_DEPS,
1586)
1587
1588xnnpack_unit_test(
1589 name = "f32_prelu_test",
1590 srcs = [
1591 "test/f32-prelu.cc",
1592 "test/prelu-microkernel-tester.h",
1593 "src/xnnpack/AlignedAllocator.h",
1594 ] + MICROKERNEL_TEST_HDRS,
1595 deps = MICROKERNEL_TEST_DEPS,
1596)
1597
1598xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001599 name = "f32_raddexpminusmax_test",
1600 srcs = [
1601 "test/f32-raddexpminusmax.cc",
1602 "test/raddexpminusmax-microkernel-tester.h",
1603 ] + MICROKERNEL_TEST_HDRS,
1604 deps = MICROKERNEL_TEST_DEPS,
1605)
1606
1607xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001608 name = "f32_raddextexp_test",
1609 srcs = [
1610 "test/f32-raddextexp.cc",
1611 "test/raddextexp-microkernel-tester.h",
1612 ] + MICROKERNEL_TEST_HDRS,
1613 deps = MICROKERNEL_TEST_DEPS,
1614)
1615
1616xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001617 name = "f32_raddstoreexpminusmax_test",
1618 srcs = [
1619 "test/f32-raddstoreexpminusmax.cc",
1620 "test/raddstoreexpminusmax-microkernel-tester.h",
1621 ] + MICROKERNEL_TEST_HDRS,
1622 deps = MICROKERNEL_TEST_DEPS,
1623)
1624
1625xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001626 name = "f32_rmax_test",
1627 srcs = [
1628 "test/f32-rmax.cc",
1629 "test/rmax-microkernel-tester.h",
1630 ] + MICROKERNEL_TEST_HDRS,
1631 deps = MICROKERNEL_TEST_DEPS,
1632)
1633
1634xnnpack_unit_test(
Marat Dukhan346a9e52019-11-15 09:06:30 -08001635 name = "f32_sigmoid_test",
1636 srcs = [
1637 "test/f32-sigmoid.cc",
1638 "test/vunop-microkernel-tester.h",
1639 ] + MICROKERNEL_TEST_HDRS,
1640 deps = MICROKERNEL_TEST_DEPS,
1641)
1642
1643xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001644 name = "f32_spmm_test",
1645 srcs = [
1646 "test/f32-spmm.cc",
1647 "test/spmm-microkernel-tester.h",
1648 "src/xnnpack/AlignedAllocator.h",
1649 ] + MICROKERNEL_TEST_HDRS,
1650 deps = MICROKERNEL_TEST_DEPS,
1651)
1652
1653xnnpack_unit_test(
1654 name = "f32_vadd_test",
1655 srcs = [
1656 "test/f32-vadd.cc",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001657 "test/vbinop-microkernel-tester.h",
1658 ] + MICROKERNEL_TEST_HDRS,
1659 deps = MICROKERNEL_TEST_DEPS,
1660)
1661
1662xnnpack_unit_test(
1663 name = "f32_vaddc_test",
1664 srcs = [
1665 "test/f32-vaddc.cc",
1666 "test/vbinopc-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001667 ] + MICROKERNEL_TEST_HDRS,
1668 deps = MICROKERNEL_TEST_DEPS,
1669)
1670
1671xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001672 name = "f32_vmul_test",
1673 srcs = [
1674 "test/f32-vmul.cc",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001675 "test/vbinop-microkernel-tester.h",
1676 ] + MICROKERNEL_TEST_HDRS,
1677 deps = MICROKERNEL_TEST_DEPS,
1678)
1679
1680xnnpack_unit_test(
1681 name = "f32_vmulc_test",
1682 srcs = [
1683 "test/f32-vmulc.cc",
1684 "test/vbinopc-microkernel-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001685 ] + MICROKERNEL_TEST_HDRS,
1686 deps = MICROKERNEL_TEST_DEPS,
1687)
1688
1689xnnpack_unit_test(
1690 name = "f32_vmulcaddc_test",
1691 srcs = [
1692 "test/f32-vmulcaddc.cc",
1693 "test/vmulcaddc-microkernel-tester.h",
1694 "src/xnnpack/AlignedAllocator.h",
1695 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1696 deps = MICROKERNEL_TEST_DEPS,
1697)
1698
1699xnnpack_unit_test(
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001700 name = "f32_vscale_test",
1701 srcs = [
1702 "test/f32-vscale.cc",
1703 "test/vscale-microkernel-tester.h",
1704 ] + MICROKERNEL_TEST_HDRS,
1705 deps = MICROKERNEL_TEST_DEPS,
1706)
1707
1708xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001709 name = "f32_vscaleexpminusmax_test",
1710 srcs = [
1711 "test/f32-vscaleexpminusmax.cc",
1712 "test/vscaleexpminusmax-microkernel-tester.h",
1713 ] + MICROKERNEL_TEST_HDRS,
1714 deps = MICROKERNEL_TEST_DEPS,
1715)
1716
1717xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001718 name = "f32_vscaleextexp_test",
1719 srcs = [
1720 "test/f32-vscaleextexp.cc",
1721 "test/vscaleextexp-microkernel-tester.h",
1722 ] + MICROKERNEL_TEST_HDRS,
1723 deps = MICROKERNEL_TEST_DEPS,
1724)
1725
1726xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001727 name = "f32_vsub_test",
1728 srcs = [
1729 "test/f32-vsub.cc",
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001730 "test/vbinop-microkernel-tester.h",
1731 ] + MICROKERNEL_TEST_HDRS,
1732 deps = MICROKERNEL_TEST_DEPS,
1733)
1734
1735xnnpack_unit_test(
1736 name = "f32_vsubc_test",
1737 srcs = [
1738 "test/f32-vsubc.cc",
1739 "test/vbinopc-microkernel-tester.h",
1740 ] + MICROKERNEL_TEST_HDRS,
1741 deps = MICROKERNEL_TEST_DEPS,
1742)
1743
1744xnnpack_unit_test(
1745 name = "f32_vrsubc_test",
1746 srcs = [
1747 "test/f32-vrsubc.cc",
1748 "test/vbinopc-microkernel-tester.h",
Marat Dukhan97579532019-10-18 16:40:39 -07001749 ] + MICROKERNEL_TEST_HDRS,
1750 deps = MICROKERNEL_TEST_DEPS,
1751)
1752
1753xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001754 name = "q8_avgpool_test",
1755 srcs = [
1756 "test/q8-avgpool.cc",
1757 "test/avgpool-microkernel-tester.h",
1758 "src/xnnpack/AlignedAllocator.h",
1759 ] + MICROKERNEL_TEST_HDRS,
1760 deps = MICROKERNEL_TEST_DEPS,
1761)
1762
1763xnnpack_unit_test(
1764 name = "q8_igemm_test",
1765 srcs = [
1766 "test/q8-igemm.cc",
1767 "test/gemm-microkernel-tester.h",
1768 "src/xnnpack/AlignedAllocator.h",
1769 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1770 deps = MICROKERNEL_TEST_DEPS,
1771)
1772
1773xnnpack_unit_test(
1774 name = "q8_dwconv_test",
1775 srcs = [
1776 "test/q8-dwconv.cc",
1777 "test/dwconv-microkernel-tester.h",
1778 "src/xnnpack/AlignedAllocator.h",
1779 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1780 deps = MICROKERNEL_TEST_DEPS,
1781)
1782
1783xnnpack_unit_test(
1784 name = "q8_gavgpool_test",
1785 srcs = [
1786 "test/q8-gavgpool.cc",
1787 "test/gavgpool-microkernel-tester.h",
1788 "src/xnnpack/AlignedAllocator.h",
1789 ] + MICROKERNEL_TEST_HDRS,
1790 deps = MICROKERNEL_TEST_DEPS,
1791)
1792
1793xnnpack_unit_test(
1794 name = "q8_gemm_test",
1795 srcs = [
1796 "test/q8-gemm.cc",
1797 "test/gemm-microkernel-tester.h",
1798 "src/xnnpack/AlignedAllocator.h",
1799 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1800 deps = MICROKERNEL_TEST_DEPS,
1801)
1802
1803xnnpack_unit_test(
1804 name = "q8_vadd_test",
1805 srcs = [
1806 "test/q8-vadd.cc",
1807 "test/vadd-microkernel-tester.h",
1808 ] + MICROKERNEL_TEST_HDRS,
1809 deps = MICROKERNEL_TEST_DEPS,
1810)
1811
1812xnnpack_unit_test(
1813 name = "u8_clamp_test",
1814 srcs = [
1815 "test/u8-clamp.cc",
1816 "test/clamp-microkernel-tester.h",
1817 ] + MICROKERNEL_TEST_HDRS,
1818 deps = MICROKERNEL_TEST_DEPS,
1819)
1820
1821xnnpack_unit_test(
1822 name = "u8_lut32norm_test",
1823 srcs = [
1824 "test/u8-lut32norm.cc",
1825 "test/lut-norm-microkernel-tester.h",
1826 ] + MICROKERNEL_TEST_HDRS,
1827 deps = MICROKERNEL_TEST_DEPS,
1828)
1829
1830xnnpack_unit_test(
1831 name = "u8_maxpool_test",
1832 srcs = [
1833 "test/u8-maxpool.cc",
1834 "test/maxpool-microkernel-tester.h",
1835 ] + MICROKERNEL_TEST_HDRS,
1836 deps = MICROKERNEL_TEST_DEPS,
1837)
1838
1839xnnpack_unit_test(
1840 name = "u8_rmax_test",
1841 srcs = [
1842 "test/u8-rmax.cc",
1843 "test/rmax-microkernel-tester.h",
1844 ] + MICROKERNEL_TEST_HDRS,
1845 deps = MICROKERNEL_TEST_DEPS,
1846)
1847
1848xnnpack_unit_test(
1849 name = "x32_packx_test",
1850 srcs = [
1851 "test/x32-packx.cc",
1852 "test/pack-microkernel-tester.h",
1853 "src/xnnpack/AlignedAllocator.h",
1854 ] + MICROKERNEL_TEST_HDRS,
1855 deps = MICROKERNEL_TEST_DEPS,
1856)
1857
1858xnnpack_unit_test(
1859 name = "x32_pad_test",
1860 srcs = [
1861 "test/x32-pad.cc",
1862 "test/pad-microkernel-tester.h",
1863 ] + MICROKERNEL_TEST_HDRS,
1864 deps = MICROKERNEL_TEST_DEPS,
1865)
1866
1867xnnpack_unit_test(
1868 name = "x32_unpool_test",
1869 srcs = [
1870 "test/x32-unpool.cc",
1871 "test/unpool-microkernel-tester.h",
1872 ] + MICROKERNEL_TEST_HDRS,
1873 deps = MICROKERNEL_TEST_DEPS,
1874)
1875
1876xnnpack_unit_test(
1877 name = "x32_zip_test",
1878 srcs = [
1879 "test/x32-zip.cc",
1880 "test/zip-microkernel-tester.h",
1881 ] + MICROKERNEL_TEST_HDRS,
1882 deps = MICROKERNEL_TEST_DEPS,
1883)
1884
1885xnnpack_unit_test(
1886 name = "x8_lut_test",
1887 srcs = [
1888 "test/x8-lut.cc",
1889 "test/lut-microkernel-tester.h",
1890 ] + MICROKERNEL_TEST_HDRS,
1891 deps = MICROKERNEL_TEST_DEPS,
1892)
1893
1894xnnpack_unit_test(
1895 name = "x8_zip_test",
1896 srcs = [
1897 "test/x8-zip.cc",
1898 "test/zip-microkernel-tester.h",
1899 ] + MICROKERNEL_TEST_HDRS,
1900 deps = MICROKERNEL_TEST_DEPS,
1901)
1902
1903########################### Size test for the library ##########################
1904
1905xnnpack_binary(
1906 name = "size_test",
1907 srcs = ["test/size.c"],
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001908 deps = [":xnnpack_operators_nhwc_f32"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001909)
1910
1911########################### Unit tests for operators ###########################
1912
1913xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001914 name = "add_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001915 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001916 "test/add-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001917 "test/add-operator-tester.h",
1918 ],
1919 deps = OPERATOR_TEST_DEPS,
1920)
1921
1922xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001923 name = "argmax_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001924 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001925 "test/argmax-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001926 "test/argmax-pooling-operator-tester.h",
1927 ] + OPERATOR_TEST_PARAMS_HDRS,
1928 deps = OPERATOR_TEST_DEPS,
1929)
1930
1931xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001932 name = "average_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001933 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001934 "test/average-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001935 "test/average-pooling-operator-tester.h",
1936 ] + OPERATOR_TEST_PARAMS_HDRS,
1937 deps = OPERATOR_TEST_DEPS,
1938)
1939
1940xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001941 name = "channel_pad_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001942 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001943 "test/channel-pad-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001944 "test/channel-pad-operator-tester.h",
1945 ] + OPERATOR_TEST_PARAMS_HDRS,
1946 deps = OPERATOR_TEST_DEPS,
1947)
1948
1949xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001950 name = "channel_shuffle_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001951 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001952 "test/channel-shuffle-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001953 "test/channel-shuffle-operator-tester.h",
1954 ],
1955 deps = OPERATOR_TEST_DEPS,
1956)
1957
1958xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001959 name = "clamp_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001960 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001961 "test/clamp-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001962 "test/clamp-operator-tester.h",
1963 ],
1964 deps = OPERATOR_TEST_DEPS,
1965)
1966
1967xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001968 name = "convolution_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001969 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001970 "test/convolution-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001971 "test/convolution-operator-tester.h",
1972 ],
1973 deps = OPERATOR_TEST_DEPS,
1974)
1975
1976xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001977 name = "convolution_nchw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001978 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001979 "test/convolution-nchw.cc",
1980 "test/convolution-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001981 ],
1982 deps = OPERATOR_TEST_DEPS,
1983)
1984
1985xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001986 name = "deconvolution_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001987 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001988 "test/deconvolution-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001989 "test/deconvolution-operator-tester.h",
1990 ] + OPERATOR_TEST_PARAMS_HDRS,
1991 deps = OPERATOR_TEST_DEPS,
1992)
1993
1994xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08001995 name = "fully_connected_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001996 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08001997 "test/fully-connected-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001998 "test/fully-connected-operator-tester.h",
1999 ],
2000 deps = OPERATOR_TEST_DEPS,
2001)
2002
2003xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002004 name = "global_average_pooling_nwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002005 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002006 "test/global-average-pooling-nwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002007 "test/global-average-pooling-operator-tester.h",
2008 ] + OPERATOR_TEST_PARAMS_HDRS,
2009 deps = OPERATOR_TEST_DEPS,
2010)
2011
2012xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002013 name = "global_average_pooling_ncw_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002014 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002015 "test/global-average-pooling-ncw.cc",
2016 "test/global-average-pooling-operator-tester.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002017 ],
2018 deps = OPERATOR_TEST_DEPS,
2019)
2020
2021xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002022 name = "hardswish_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002023 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002024 "test/hardswish-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002025 "test/hardswish-operator-tester.h",
2026 ],
2027 deps = OPERATOR_TEST_DEPS,
2028)
2029
2030xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002031 name = "leaky_relu_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002032 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002033 "test/leaky-relu-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002034 "test/leaky-relu-operator-tester.h",
2035 ],
2036 deps = OPERATOR_TEST_DEPS,
2037)
2038
2039xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002040 name = "max_pooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002041 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002042 "test/max-pooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002043 "test/max-pooling-operator-tester.h",
2044 ] + OPERATOR_TEST_PARAMS_HDRS,
2045 deps = OPERATOR_TEST_DEPS,
2046)
2047
2048xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002049 name = "multiply_nd_test",
Marat Dukhanca2733c2019-11-15 23:21:17 -08002050 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002051 "test/multiply-nd.cc",
Marat Dukhanca2733c2019-11-15 23:21:17 -08002052 "test/multiply-operator-tester.h",
2053 ],
2054 deps = OPERATOR_TEST_DEPS,
2055)
2056
2057xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002058 name = "prelu_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002059 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002060 "test/prelu-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002061 "test/prelu-operator-tester.h",
2062 ] + OPERATOR_TEST_PARAMS_HDRS,
2063 deps = OPERATOR_TEST_DEPS,
2064)
2065
2066xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002067 name = "resize_bilinear_nhwc_test",
Marat Dukhan69722492019-11-11 19:55:50 -08002068 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002069 "test/resize-bilinear-nhwc.cc",
Marat Dukhan69722492019-11-11 19:55:50 -08002070 "test/resize-bilinear-operator-tester.h",
2071 ] + OPERATOR_TEST_PARAMS_HDRS,
2072 deps = OPERATOR_TEST_DEPS,
2073)
2074
2075xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002076 name = "sigmoid_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002077 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002078 "test/sigmoid-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002079 "test/sigmoid-operator-tester.h",
2080 ],
2081 deps = OPERATOR_TEST_DEPS,
2082)
2083
2084xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002085 name = "softargmax_nc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002086 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002087 "test/softargmax-nc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002088 "test/softargmax-operator-tester.h",
2089 ],
2090 deps = OPERATOR_TEST_DEPS,
2091)
2092
2093xnnpack_unit_test(
Marat Dukhanefc47b82019-11-18 09:25:38 -08002094 name = "unpooling_nhwc_test",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002095 srcs = [
Marat Dukhanefc47b82019-11-18 09:25:38 -08002096 "test/unpooling-nhwc.cc",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002097 "test/unpooling-operator-tester.h",
2098 ],
2099 deps = OPERATOR_TEST_DEPS,
2100)
2101
2102############################# Build configurations #############################
2103
Marat Dukhanb8642352019-10-30 15:43:02 -07002104# Enables usage of assembly kernels.
Marat Dukhan08c4a432019-10-03 09:29:21 -07002105config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07002106 name = "xnn_enable_assembly_explicit_true",
2107 define_values = {"xnn_enable_assembly": "true"},
2108)
2109
2110# Disables usage of assembly kernels.
2111config_setting(
2112 name = "xnn_enable_assembly_explicit_false",
2113 define_values = {"xnn_enable_assembly": "false"},
2114)
2115
2116# Builds with -c dbg
2117config_setting(
2118 name = "debug_build",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002119 values = {
Marat Dukhanb8642352019-10-30 15:43:02 -07002120 "compilation_mode": "dbg",
2121 },
2122)
2123
2124# Builds with -c opt
2125config_setting(
2126 name = "optimized_build",
2127 values = {
2128 "compilation_mode": "opt",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002129 },
2130)
2131
2132config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07002133 name = "linux_k8",
2134 values = {"cpu": "k8"},
2135)
2136
2137config_setting(
Marat Dukhan4e45e662019-10-03 15:40:24 -07002138 name = "linux_aarch64",
Marat Dukhanb8642352019-10-30 15:43:02 -07002139 values = {"cpu": "aarch64"},
Marat Dukhan4e45e662019-10-03 15:40:24 -07002140)
2141
2142config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002143 name = "android",
2144 values = {"crosstool_top": "//external:android/crosstool"},
2145)
2146
2147config_setting(
2148 name = "android_armv7",
2149 values = {
2150 "crosstool_top": "//external:android/crosstool",
2151 "cpu": "armeabi-v7a",
2152 },
2153)
2154
2155config_setting(
2156 name = "android_arm64",
2157 values = {
2158 "crosstool_top": "//external:android/crosstool",
2159 "cpu": "arm64-v8a",
2160 },
2161)
2162
2163config_setting(
2164 name = "android_x86",
2165 values = {
2166 "crosstool_top": "//external:android/crosstool",
2167 "cpu": "x86",
2168 },
2169)
2170
2171config_setting(
2172 name = "android_x86_64",
2173 values = {
2174 "crosstool_top": "//external:android/crosstool",
2175 "cpu": "x86_64",
2176 },
2177)
2178
2179config_setting(
Marat Dukhan885ca242019-10-07 09:17:32 -07002180 name = "macos_x86_64",
2181 values = {
2182 "apple_platform_type": "macos",
2183 "cpu": "darwin",
2184 },
2185)
2186
2187config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07002188 name = "emscripten",
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002189 values = {"crosstool_top": "//toolchain:emscripten"},
Marat Dukhan08c4a432019-10-03 09:29:21 -07002190)
2191
2192config_setting(
2193 name = "emscripten_wasm",
2194 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002195 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002196 "cpu": "wasm",
2197 },
2198)
2199
2200config_setting(
2201 name = "emscripten_wasmsimd",
2202 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002203 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002204 "cpu": "wasm",
Marat Dukhan8c19e3c2019-10-30 12:14:58 -07002205 "features": "wasm_simd",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002206 },
2207)
2208
2209config_setting(
2210 name = "emscripten_asmjs",
2211 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07002212 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07002213 "cpu": "asmjs",
2214 },
2215)