blob: 7a27c9b248f39c85d65e9165fa130361e55ffc81 [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
9licenses(["notice"])
10
11exports_files(["LICENSE"])
12
13load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
14
15OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
22MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
25 "@cpuinfo",
26 "@FP16",
27 "@pthreadpool",
28]
29
30MICROKERNEL_TEST_DEPS = [
31 ":ukernels",
32 "@cpuinfo",
33 "@FP16",
34 "@pthreadpool",
35]
36
37OPERATOR_TEST_DEPS = [
38 ":XNNPACK",
39 "@pthreadpool",
40 "@FP16",
41]
42
43OPERATOR_SRCS = [
44 "src/add.c",
45 "src/argmax-pooling.c",
46 "src/average-pooling.c",
47 "src/channel-pad.c",
48 "src/channel-shuffle.c",
49 "src/clamp.c",
50 "src/convolution-spnchw.c",
51 "src/convolution.c",
52 "src/deconvolution.c",
53 "src/fully-connected.c",
54 "src/global-average-pooling-spnchw.c",
55 "src/global-average-pooling.c",
56 "src/hardswish.c",
57 "src/leaky-relu.c",
58 "src/max-pooling.c",
59 "src/prelu.c",
60 "src/sigmoid.c",
61 "src/softargmax.c",
62 "src/unpooling.c",
63]
64
65SCALAR_UKERNELS = [
66 "src/f32-argmaxpool/mp9p8q-scalar.c",
67 "src/f32-argmaxpool/up4-scalar.c",
68 "src/f32-argmaxpool/up9-scalar.c",
69 "src/f32-avgpool/mp9p8q-scalar.c",
70 "src/f32-avgpool/up9-scalar.c",
71 "src/f32-clamp/scalar.c",
72 "src/f32-igemm/1x4-scalar.c",
73 "src/f32-igemm/2x4-scalar.c",
74 "src/f32-igemm/4x2-scalar.c",
75 "src/f32-igemm/4x4-scalar.c",
76 "src/f32-dwconv/up1x25-scalar.c",
77 "src/f32-dwconv/up1x4-scalar.c",
78 "src/f32-dwconv/up1x9-scalar.c",
79 "src/f32-gavgpool/mp7p7q-scalar.c",
80 "src/f32-gavgpool/up7-scalar.c",
81 "src/f32-gemm/1x4-scalar.c",
82 "src/f32-gemm/2x4-scalar.c",
83 "src/f32-gemm/4x2-scalar.c",
84 "src/f32-gemm/4x4-scalar.c",
85 "src/f32-gemminc/1x4-scalar.c",
86 "src/f32-gemminc/2x4-scalar.c",
87 "src/f32-gemminc/4x4-scalar.c",
88 "src/f32-hswish/scalar.c",
89 "src/f32-maxpool/9p8q-scalar.c",
90 "src/f32-pavgpool/mp9p8q-scalar.c",
91 "src/f32-pavgpool/up9-scalar.c",
92 "src/f32-ppmm/2x4-scalar.c",
93 "src/f32-ppmm/3x3-scalar.c",
94 "src/f32-ppmm/4x2-scalar.c",
95 "src/f32-ppmm/4x4-scalar.c",
96 "src/f32-prelu/x4-scalar.c",
97 "src/f32-rmax/scalar.c",
98 "src/f32-spmm/1x1-scalar-pipelined.c",
99 "src/f32-spmm/1x1-scalar-unroll2.c",
100 "src/f32-spmm/1x1-scalar.c",
101 "src/f32-spmm/2x1-scalar-pipelined.c",
102 "src/f32-spmm/2x1-scalar-unroll2.c",
103 "src/f32-spmm/2x1-scalar.c",
104 "src/f32-spmm/4x1-scalar-pipelined.c",
105 "src/f32-spmm/4x1-scalar-unroll2.c",
106 "src/f32-spmm/4x1-scalar.c",
107 "src/f32-spmm/8x1-scalar-pipelined.c",
108 "src/f32-spmm/8x1-scalar-unroll2.c",
109 "src/f32-spmm/8x1-scalar.c",
110 "src/f32-vadd/scalar.c",
111 "src/f32-vmul/scalar.c",
112 "src/f32-vmulcaddc/c1-scalar-x2.c",
113 "src/f32-vsub/scalar.c",
114 "src/q8-avgpool/mp9p8q-scalar.c",
115 "src/q8-avgpool/up9-scalar.c",
116 "src/q8-igemm/2x2-scalar.c",
117 "src/q8-dwconv/up1x9-scalar.c",
118 "src/q8-gavgpool/mp7p7q-scalar.c",
119 "src/q8-gavgpool/up7-scalar.c",
120 "src/q8-gemm/2x2-scalar.c",
121 "src/q8-vadd/scalar.c",
122 "src/u8-clamp/scalar.c",
123 "src/u8-lut32norm/scalar.c",
124 "src/u8-maxpool/9p8q-scalar.c",
125 "src/u8-rmax/scalar.c",
126 "src/x32-packx/x2-scalar.c",
127 "src/x32-packx/x3-scalar.c",
128 "src/x32-packx/x4-scalar.c",
129 "src/x32-pad/x2-scalar.c",
130 "src/x32-unpool/scalar.c",
131 "src/x32-zip/x2-scalar.c",
132 "src/x32-zip/x3-scalar.c",
133 "src/x32-zip/x4-scalar.c",
134 "src/x32-zip/xm-scalar.c",
135 "src/x8-lut/scalar.c",
136 "src/x8-zip/x2-scalar.c",
137 "src/x8-zip/x3-scalar.c",
138 "src/x8-zip/x4-scalar.c",
139 "src/x8-zip/xm-scalar.c",
140]
141
142PSIMD_UKERNELS = [
143 "src/f32-argmaxpool/mp9p8q-psimd.c",
144 "src/f32-argmaxpool/up4-psimd.c",
145 "src/f32-argmaxpool/up9-psimd.c",
146 "src/f32-avgpool/mp9p8q-psimd.c",
147 "src/f32-avgpool/up9-psimd.c",
148 "src/f32-clamp/psimd.c",
149 "src/f32-igemm/1x8-psimd-loadsplat.c",
150 "src/f32-igemm/1x8-psimd-splat.c",
151 "src/f32-igemm/1x8s4-psimd.c",
152 "src/f32-igemm/4x2c4-psimd.c",
153 "src/f32-igemm/4x8-psimd-loadsplat.c",
154 "src/f32-igemm/4x8-psimd-splat.c",
155 "src/f32-igemm/4x8s4-psimd.c",
156 "src/f32-igemm/6x8-psimd-loadsplat.c",
157 "src/f32-igemm/6x8-psimd-splat.c",
158 "src/f32-igemm/6x8s4-psimd.c",
159 "src/f32-dwconv/up4x25-psimd.c",
160 "src/f32-dwconv/up4x4-psimd.c",
161 "src/f32-dwconv/up4x9-psimd.c",
162 "src/f32-gavgpool/mp7p7q-psimd.c",
163 "src/f32-gavgpool/up7-psimd.c",
164 "src/f32-gemm/1x8-psimd-loadsplat.c",
165 "src/f32-gemm/1x8-psimd-splat.c",
166 "src/f32-gemm/1x8s4-psimd.c",
167 "src/f32-gemm/4x8-psimd-loadsplat.c",
168 "src/f32-gemm/4x8-psimd-splat.c",
169 "src/f32-gemm/4x8s4-psimd.c",
170 "src/f32-gemm/6x8-psimd-loadsplat.c",
171 "src/f32-gemm/6x8-psimd-splat.c",
172 "src/f32-gemm/6x8s4-psimd.c",
173 "src/f32-gemminc/1x8-psimd-loadsplat.c",
174 "src/f32-gemminc/1x8-psimd-splat.c",
175 "src/f32-gemminc/1x8s4-psimd.c",
176 "src/f32-gemminc/4x8-psimd-loadsplat.c",
177 "src/f32-gemminc/4x8-psimd-splat.c",
178 "src/f32-gemminc/4x8s4-psimd.c",
179 "src/f32-gemminc/6x8-psimd-loadsplat.c",
180 "src/f32-gemminc/6x8-psimd-splat.c",
181 "src/f32-gemminc/6x8s4-psimd.c",
182 "src/f32-hswish/psimd.c",
183 "src/f32-maxpool/9p8q-psimd.c",
184 "src/f32-pavgpool/mp9p8q-psimd.c",
185 "src/f32-pavgpool/up9-psimd.c",
186 "src/f32-ppmm/4x8-psimd.c",
187 "src/f32-prelu/x4-psimd.c",
188 "src/f32-vadd/psimd.c",
189 "src/f32-vmul/psimd.c",
190 "src/f32-vmulcaddc/c4-psimd-x2.c",
191 "src/f32-vsub/psimd.c",
192 "src/x32-packx/x4-psimd.c",
193 "src/x32-pad/x2-psimd.c",
194 "src/x32-unpool/psimd.c",
195 "src/x32-zip/x2-psimd.c",
196 "src/x32-zip/x3-psimd.c",
197 "src/x32-zip/x4-psimd.c",
198 "src/x32-zip/xm-psimd.c",
199]
200
201# ISA-specific micro-kernels
202NEON_UKERNELS = [
203 "src/f32-avgpool/mp9p8q-neon.c",
204 "src/f32-avgpool/up9-neon.c",
205 "src/f32-clamp/neon.c",
206 "src/f32-igemm/1x8-neon-ld64.c",
207 "src/f32-igemm/4x12-neon-ld64.c",
208 "src/f32-igemm/4x2-neon-ld64.c",
209 "src/f32-igemm/4x4-neon-ld64.c",
210 "src/f32-igemm/4x8-neon-ld128.c",
211 "src/f32-igemm/4x8-neon-ld64.c",
212 "src/f32-igemm/6x8-neon-ld64.c",
213 "src/f32-dwconv/up4x9-neon.c",
214 "src/f32-gavgpool-spchw/neon-x4.c",
215 "src/f32-gavgpool/mp7p7q-neon.c",
216 "src/f32-gavgpool/up7-neon.c",
217 "src/f32-gemm/1x8-neon-ld64.c",
218 "src/f32-gemm/4x12-neon-ld64.c",
219 "src/f32-gemm/4x2-neon-ld64.c",
220 "src/f32-gemm/4x8-neon-ld128.c",
221 "src/f32-gemm/4x8-neon-ld64.c",
222 "src/f32-gemm/5x8-neon-ld64.c",
223 "src/f32-gemm/6x8-neon-ld64.c",
224 "src/f32-gemminc/1x8-neon-ld64.c",
225 "src/f32-gemminc/4x12-neon-ld64.c",
226 "src/f32-gemminc/4x8-neon-ld128.c",
227 "src/f32-gemminc/4x8-neon-ld64.c",
228 "src/f32-gemminc/5x8-neon-ld64.c",
229 "src/f32-gemminc/6x8-neon-ld64.c",
230 "src/f32-hswish/neon.c",
231 "src/f32-pavgpool/mp9p8q-neon.c",
232 "src/f32-pavgpool/up9-neon.c",
233 "src/f32-ppmm/4x8-neon.c",
234 "src/f32-ppmm/8x8-neon.c",
235 "src/f32-rmax/neon.c",
236 "src/f32-vmulcaddc/c4-neon-x2.c",
237 "src/q8-avgpool/mp9p8q-neon.c",
238 "src/q8-avgpool/up9-neon.c",
239 "src/q8-igemm/4x8-neon.c",
240 "src/q8-igemm/8x8-neon.c",
241 "src/q8-dwconv/up8x9-neon.c",
242 "src/q8-gavgpool/mp7p7q-neon.c",
243 "src/q8-gavgpool/up7-neon.c",
244 "src/q8-gemm/4x8-neon.c",
245 "src/q8-gemm/8x8-neon.c",
246 "src/q8-vadd/neon.c",
247 "src/u8-clamp/neon.c",
248 "src/u8-maxpool/9p8q-neon.c",
249 "src/u8-rmax/neon.c",
250 "src/x32-packx/x4-neon-st4.c",
251 "src/x32-pad/x2-neon.c",
252 "src/x32-zip/x2-neon.c",
253 "src/x32-zip/x3-neon.c",
254 "src/x32-zip/x4-neon.c",
255 "src/x32-zip/xm-neon.c",
256 "src/x8-zip/x2-neon.c",
257 "src/x8-zip/x3-neon.c",
258 "src/x8-zip/x4-neon.c",
259 "src/x8-zip/xm-neon.c",
260]
261
262NEONFMA_UKERNELS = [
263 "src/f32-igemm/4x12-neonfma-ld64.c",
264 "src/f32-igemm/4x2-neonfma-ld64.c",
265 "src/f32-igemm/4x4-neonfma-ld64.c",
266 "src/f32-igemm/4x8-neonfma-ld128.c",
267 "src/f32-igemm/4x8-neonfma-ld64.c",
268 "src/f32-igemm/6x8-neonfma-ld64.c",
269 "src/f32-dwconv/up4x9-neonfma.c",
270 "src/f32-dwconv/up8x9-neonfma.c",
271 "src/f32-gemm/1x8-neonfma-ld64.c",
272 "src/f32-gemm/4x12-neonfma-ld64.c",
273 "src/f32-gemm/4x2-neonfma-ld64.c",
274 "src/f32-gemm/4x8-neonfma-ld128.c",
275 "src/f32-gemm/4x8-neonfma-ld64.c",
276 "src/f32-gemm/5x8-neonfma-ld64.c",
277 "src/f32-gemm/6x8-neonfma-ld64.c",
278 "src/f32-gemminc/1x8-neonfma-ld64.c",
279 "src/f32-gemminc/4x12-neonfma-ld64.c",
280 "src/f32-gemminc/4x8-neonfma-ld128.c",
281 "src/f32-gemminc/4x8-neonfma-ld64.c",
282 "src/f32-gemminc/5x8-neonfma-ld64.c",
283 "src/f32-gemminc/6x8-neonfma-ld64.c",
284 "src/f32-hswish/neonfma.c",
285 "src/f32-ppmm/4x8-neonfma.c",
286 "src/f32-ppmm/8x8-neonfma.c",
287 "src/f32-vmulcaddc/c4-neonfma-x2.c",
288]
289
290AARCH64_NEONFMA_UKERNELS = [
291 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
292 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
293 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
294 "src/f32-dwconv-spchw/3x3p1-neonfma.c",
295 "src/f32-dwconv-spchw/5x5p2-neonfma.c",
296 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
297 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
298 "src/f32-spmm/12x1-neonfma.c",
299 "src/f32-spmm/12x2-neonfma.c",
300 "src/f32-spmm/12x4-neonfma.c",
301 "src/f32-spmm/16x1-neonfma-pipelined.c",
302 "src/f32-spmm/16x1-neonfma-unroll2.c",
303 "src/f32-spmm/16x1-neonfma.c",
304 "src/f32-spmm/16x2-neonfma.c",
305 "src/f32-spmm/16x4-neonfma.c",
306 "src/f32-spmm/4x1-neonfma-pipelined.c",
307 "src/f32-spmm/4x1-neonfma-unroll2.c",
308 "src/f32-spmm/4x1-neonfma.c",
309 "src/f32-spmm/4x2-neonfma.c",
310 "src/f32-spmm/4x4-neonfma.c",
311 "src/f32-spmm/8x1-neonfma-pipelined.c",
312 "src/f32-spmm/8x1-neonfma-unroll2.c",
313 "src/f32-spmm/8x1-neonfma.c",
314 "src/f32-spmm/8x2-neonfma.c",
315 "src/f32-spmm/8x4-neonfma.c",
316]
317
318AARCH64_NEONFP16ARITH_UKERNELS = [
319 "src/f16-gemm/4x8-neonfp16arith-ld64.c",
320 "src/f16-gemm/6x8-neonfp16arith-ld64.c",
321 "src/f16-gemm/8x8-neonfp16arith-ld64.c",
322]
323
324SSE_UKERNELS = [
325 "src/f32-avgpool/mp9p8q-sse.c",
326 "src/f32-avgpool/up9-sse.c",
327 "src/f32-clamp/sse.c",
328 "src/f32-igemm/1x8-sse-dup.c",
329 "src/f32-igemm/1x8-sse-load1.c",
330 "src/f32-igemm/1x8s4-sse.c",
331 "src/f32-igemm/4x2c4-sse.c",
332 "src/f32-igemm/4x8-sse-dup.c",
333 "src/f32-igemm/4x8-sse-load1.c",
334 "src/f32-igemm/4x8s4-sse.c",
335 "src/f32-dwconv/up4x25-sse.c",
336 "src/f32-dwconv/up4x4-sse.c",
337 "src/f32-dwconv/up4x9-sse.c",
338 "src/f32-gavgpool-spchw/sse-x4.c",
339 "src/f32-gavgpool/mp7p7q-sse.c",
340 "src/f32-gavgpool/up7-sse.c",
341 "src/f32-gemm/1x8-sse-dup.c",
342 "src/f32-gemm/1x8-sse-load1.c",
343 "src/f32-gemm/1x8s4-sse.c",
344 "src/f32-gemm/4x8-sse-dup.c",
345 "src/f32-gemm/4x8-sse-load1.c",
346 "src/f32-gemm/4x8s4-sse.c",
347 "src/f32-gemminc/1x8-sse-dup.c",
348 "src/f32-gemminc/1x8-sse-load1.c",
349 "src/f32-gemminc/1x8s4-sse.c",
350 "src/f32-gemminc/4x8-sse-dup.c",
351 "src/f32-gemminc/4x8-sse-load1.c",
352 "src/f32-gemminc/4x8s4-sse.c",
353 "src/f32-hswish/sse.c",
354 "src/f32-maxpool/9p8q-sse.c",
355 "src/f32-pavgpool/mp9p8q-sse.c",
356 "src/f32-pavgpool/up9-sse.c",
357 "src/f32-dwconv-spchw/3x3p1-sse.c",
358 "src/f32-dwconv-spchw/3x3s2p1-sse.c",
359 "src/f32-ppmm/4x8-sse.c",
360 "src/f32-prelu/x4-sse.c",
361 "src/f32-rmax/sse.c",
362 "src/f32-spmm/4x1-sse.c",
363 "src/f32-spmm/8x1-sse.c",
364 "src/f32-vadd/sse.c",
365 "src/f32-vmul/sse.c",
366 "src/f32-vmulcaddc/c4-sse-x2.c",
367 "src/f32-vsub/sse.c",
368 "src/x32-packx/x4-sse.c",
369]
370
371SSE2_UKERNELS = [
372 "src/f32-argmaxpool/mp9p8q-sse2.c",
373 "src/f32-argmaxpool/up4-sse2.c",
374 "src/f32-argmaxpool/up9-sse2.c",
375 "src/q8-avgpool/mp9p8q-sse2.c",
376 "src/q8-avgpool/up9-sse2.c",
377 "src/q8-igemm/4x4c2-sse2.c",
378 "src/q8-dwconv/up8x9-sse2.c",
379 "src/q8-gavgpool/mp7p7q-sse2.c",
380 "src/q8-gavgpool/up7-sse2.c",
381 "src/q8-gemm/2x4c8-sse2.c",
382 "src/q8-gemm/4x4c2-sse2.c",
383 "src/q8-vadd/sse2.c",
384 "src/u8-clamp/sse2.c",
385 "src/u8-maxpool/9p8q-sse2.c",
386 "src/u8-rmax/sse2.c",
387 "src/x32-pad/x2-sse2.c",
388 "src/x32-zip/x2-sse2.c",
389 "src/x32-zip/x3-sse2.c",
390 "src/x32-zip/x4-sse2.c",
391 "src/x32-zip/xm-sse2.c",
392 "src/x8-zip/x2-sse2.c",
393 "src/x8-zip/x3-sse2.c",
394 "src/x8-zip/x4-sse2.c",
395 "src/x8-zip/xm-sse2.c",
396]
397
398AVX_UKERNELS = [
399 "src/f32-rmax/avx.c",
400]
401
402AVX512F_UKERNELS = [
403 "src/f32-rmax/avx512f.c",
404]
405
406AARCH32_ASM_UKERNELS = [
407 "src/q8-dwconv/up8x9-aarch32-neon.S",
408]
409
410AARCH64_ASM_UKERNELS = [
411 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
412 "src/f32-dwconv/up4x9-aarch64-neonfma.S",
413 "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
414 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
415 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
416 "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
417 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
418 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
419 "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
420 "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
421 "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
422 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
423 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
424 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
425 "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
426 "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
427 "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
428 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
429 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
430 "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
431 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
432 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
433 "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
434 "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
435 "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
436 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
437 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
438 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
439 "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
440 "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
441 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
442 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
443 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
444 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
445 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
446 "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
447 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
448 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
449 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
450]
451
452INTERNAL_MICROKERNEL_HDRS = [
453 "src/xnnpack/argmaxpool.h",
454 "src/xnnpack/avgpool.h",
455 "src/xnnpack/clamp.h",
456 "src/xnnpack/common.h",
457 "src/xnnpack/conv.h",
458 "src/xnnpack/dwconv.h",
459 "src/xnnpack/gavgpool.h",
460 "src/xnnpack/gemm.h",
461 "src/xnnpack/hswish.h",
462 "src/xnnpack/igemm.h",
463 "src/xnnpack/lut.h",
464 "src/xnnpack/math.h",
465 "src/xnnpack/maxpool.h",
466 "src/xnnpack/packx.h",
467 "src/xnnpack/pad.h",
468 "src/xnnpack/params.h",
469 "src/xnnpack/pavgpool.h",
470 "src/xnnpack/ppmm.h",
471 "src/xnnpack/prelu.h",
472 "src/xnnpack/rmax.h",
473 "src/xnnpack/scalar-utils.h",
474 "src/xnnpack/spmm.h",
475 "src/xnnpack/unpool.h",
476 "src/xnnpack/vadd.h",
477 "src/xnnpack/vmul.h",
478 "src/xnnpack/vmulcaddc.h",
479 "src/xnnpack/vsub.h",
480 "src/xnnpack/zip.h",
481]
482
483INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
484 "include/xnnpack.h",
485 "src/xnnpack/allocator.h",
486 "src/xnnpack/compute.h",
487 "src/xnnpack/im2col.h",
488 "src/xnnpack/indirection.h",
489 "src/xnnpack/log.h",
490 "src/xnnpack/operator.h",
491 "src/xnnpack/pack.h",
492 "src/xnnpack/requantization.h",
493 "src/xnnpack/requantization-stubs.h",
494]
495
496MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
497 "src/xnnpack/requantization.h",
498 "include/xnnpack.h",
499]
500
501MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
502 "src/xnnpack/isa-checks.h",
503 "src/xnnpack/requantization.h",
504 "include/xnnpack.h",
505]
506
507OPERATOR_TEST_PARAMS_HDRS = [
508 "src/xnnpack/params.h",
509 "src/xnnpack/common.h",
510]
511
512WEIGHTS_PACK_HDRS = [
513 "src/xnnpack/pack.h",
514 "src/xnnpack/operator.h",
515 "src/xnnpack/compute.h",
516]
517
518xnnpack_cc_library(
519 name = "scalar_ukernels",
520 srcs = SCALAR_UKERNELS,
521 hdrs = INTERNAL_HDRS,
522 aarch32_copts = ["-marm"],
523 copts = xnnpack_std_copts(),
524 deps = [
525 "@FP16",
526 "@FXdiv",
527 ],
528)
529
530xnnpack_cc_library(
531 name = "psimd_ukernels",
532 srcs = PSIMD_UKERNELS,
533 hdrs = INTERNAL_HDRS,
534 aarch32_copts = [
535 "-marm",
536 "-mfpu=neon",
537 ],
538 copts = xnnpack_std_copts(),
539 optimized_copts = [
540 "-O3",
541 "-ffast-math",
542 ],
543 deps = [
544 "@FP16",
545 "@psimd",
546 ],
547)
548
549xnnpack_cc_library(
550 name = "neon_ukernels",
551 hdrs = INTERNAL_HDRS,
552 aarch32_copts = [
553 "-marm",
554 "-mfpu=neon",
555 ],
556 aarch32_srcs = NEON_UKERNELS,
557 aarch64_srcs = NEON_UKERNELS,
558 copts = xnnpack_std_copts(),
559 deps = ["@FP16"],
560)
561
562xnnpack_cc_library(
563 name = "neonfma_ukernels",
564 hdrs = INTERNAL_HDRS,
565 aarch32_copts = [
566 "-marm",
567 "-mfpu=neon-vfpv4",
568 ],
569 aarch32_srcs = NEONFMA_UKERNELS,
570 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
571 copts = xnnpack_std_copts(),
572 deps = ["@FP16"],
573)
574
575xnnpack_cc_library(
576 name = "neonfp16arith_ukernels",
577 hdrs = INTERNAL_HDRS,
578 aarch64_copts = ["-march=armv8.2-a+fp16"],
579 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
580 copts = xnnpack_std_copts(),
581 deps = ["@FP16"],
582)
583
584xnnpack_cc_library(
585 name = "sse2_ukernels",
586 hdrs = INTERNAL_HDRS,
587 copts = xnnpack_std_copts(),
588 x86_copts = ["-msse2"],
589 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
590 deps = ["@FP16"],
591)
592
593xnnpack_cc_library(
594 name = "avx_ukernels",
595 hdrs = INTERNAL_HDRS,
596 copts = xnnpack_std_copts(),
597 x86_copts = ["-mavx"],
598 x86_srcs = AVX_UKERNELS,
599 deps = ["@FP16"],
600)
601
602xnnpack_cc_library(
603 name = "avx512f_ukernels",
604 hdrs = INTERNAL_HDRS,
605 copts = xnnpack_std_copts(),
606 x86_copts = ["-mavx512f"],
607 x86_srcs = AVX512F_UKERNELS,
608 deps = ["@FP16"],
609)
610
611xnnpack_cc_library(
612 name = "asm_ukernels",
613 hdrs = ["src/xnnpack/assembly.h"],
614 aarch32_srcs = AARCH32_ASM_UKERNELS,
615 aarch64_srcs = AARCH64_ASM_UKERNELS,
616)
617
618xnnpack_aggregate_library(
619 name = "ukernels",
620 aarch32_deps = [
621 ":psimd_ukernels",
622 ":neon_ukernels",
623 ":neonfma_ukernels",
624 ":asm_ukernels",
625 ],
626 aarch64_deps = [
627 ":psimd_ukernels",
628 ":neon_ukernels",
629 ":neonfma_ukernels",
630 ":neonfp16arith_ukernels",
631 ":asm_ukernels",
632 ],
633 generic_deps = [":scalar_ukernels"],
634 wasmsimd_deps = [
635 ":psimd_ukernels",
636 ],
637 x86_deps = [
638 ":psimd_ukernels",
639 ":sse2_ukernels",
640 ":avx_ukernels",
641 ":avx512f_ukernels",
642 ],
643)
644
645xnnpack_cc_library(
646 name = "im2col",
647 srcs = ["src/im2col.c"],
648 hdrs = [
649 "src/xnnpack/common.h",
650 "src/xnnpack/im2col.h",
651 ],
652 copts = xnnpack_std_copts(),
653)
654
655xnnpack_cc_library(
656 name = "indirection",
657 srcs = ["src/indirection.c"],
658 hdrs = INTERNAL_HDRS,
659 copts = xnnpack_std_copts(),
660 deps = [
661 "@FP16",
662 "@FXdiv",
663 "@pthreadpool",
664 ],
665)
666
667xnnpack_cc_library(
668 name = "operator_run",
669 srcs = ["src/operator-run.c"],
670 hdrs = INTERNAL_HDRS,
671 copts = xnnpack_std_copts() + [
672 # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
673 "-Wno-vla",
674 ],
675 deps = [
676 "@FP16",
677 "@FXdiv",
678 "@clog",
679 "@pthreadpool",
680 ],
681)
682
683cc_library(
684 name = "enable_assembly",
685 defines = select({
686 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
687 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
688 "//conditions:default": [],
689 }),
690)
691
692cc_library(
693 name = "operators",
694 srcs = OPERATOR_SRCS + [
695 "src/init.c",
696 "src/operator-delete.c",
697 ] + select({
698 ":emscripten_wasm": ["src/wasm-stubs.c"],
699 "//conditions:default": [],
700 }),
701 copts = xnnpack_std_copts() + [
702 "-Isrc",
703 "-Iinclude",
704 ] + select({
705 ":debug_build": [],
706 "//conditions:default": xnnpack_min_size_copts(),
707 }),
708 linkstatic = True,
709 textual_hdrs = INTERNAL_HDRS,
710 deps = [
711 ":enable_assembly",
712 ":indirection",
713 ":ukernels",
714 "@FP16",
715 "@FXdiv",
716 "@clog",
717 "@cpuinfo",
718 "@pthreadpool",
719 ],
720)
721
722cc_library(
723 name = "XNNPACK",
724 hdrs = ["include/xnnpack.h"],
725 includes = ["include"],
726 linkstatic = True,
Marat Dukhan08c4a432019-10-03 09:29:21 -0700727 visibility = xnnpack_visibility(),
728 deps = [
729 ":operator_run",
730 ":operators",
731 "@pthreadpool",
732 ],
733)
734
735cc_library(
736 name = "bench_utils",
737 srcs = ["bench/utils.cc"],
738 hdrs = ["bench/utils.h"],
739 copts = ["-Wno-unused-result"],
740 linkstatic = True,
741 deps = ["@cpuinfo"],
742)
743
744######################### Unit tests for micro-kernels #########################
745
746xnnpack_benchmark(
747 name = "q8_gemm_bench",
748 srcs = [
749 "bench/gemm.h",
750 "bench/q8-gemm.cc",
751 "src/xnnpack/AlignedAllocator.h",
752 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
753 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
754 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
755)
756
757xnnpack_benchmark(
758 name = "f16_gemm_bench",
759 srcs = [
760 "bench/f16-gemm.cc",
761 "bench/gemm.h",
762 "src/xnnpack/AlignedAllocator.h",
763 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
764 copts = ["-Wno-unused-function"],
765 deps = MICROKERNEL_BENCHMARK_DEPS,
766)
767
768xnnpack_benchmark(
769 name = "f32_igemm_bench",
770 srcs = [
771 "bench/f32-igemm.cc",
772 "bench/conv.h",
773 "src/xnnpack/AlignedAllocator.h",
774 ] + MICROKERNEL_BENCHMARK_HDRS,
775 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
776)
777
778xnnpack_benchmark(
779 name = "f32_conv_hwc_bench",
780 srcs = [
781 "bench/f32-conv-hwc.cc",
782 "bench/dconv.h",
783 "src/xnnpack/AlignedAllocator.h",
784 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
785 copts = ["-Wno-unused-function"],
786 deps = MICROKERNEL_BENCHMARK_DEPS,
787)
788
789xnnpack_benchmark(
790 name = "f32_dwconv_bench",
791 srcs = [
792 "bench/f32-dwconv.cc",
793 "bench/dwconv.h",
794 "src/xnnpack/AlignedAllocator.h",
795 ] + MICROKERNEL_BENCHMARK_HDRS,
796 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
797)
798
799xnnpack_benchmark(
800 name = "f32_dwconv_spchw_bench",
801 srcs = [
802 "bench/f32-dwconv-spchw.cc",
803 "bench/dwconv.h",
804 "src/xnnpack/AlignedAllocator.h",
805 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
806 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
807)
808
809xnnpack_benchmark(
810 name = "f32_gemm_bench",
811 srcs = [
812 "bench/f32-gemm.cc",
813 "bench/gemm.h",
814 "src/xnnpack/AlignedAllocator.h",
815 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
816 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
817 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
818)
819
820xnnpack_benchmark(
821 name = "f32_rmax_bench",
822 srcs = [
823 "bench/f32-rmax.cc",
824 "src/xnnpack/AlignedAllocator.h",
825 ] + MICROKERNEL_BENCHMARK_HDRS,
826 deps = MICROKERNEL_BENCHMARK_DEPS,
827)
828
829xnnpack_benchmark(
830 name = "f32_spmm_bench",
831 srcs = [
832 "bench/f32-spmm.cc",
833 "bench/gemm.h",
834 "src/xnnpack/AlignedAllocator.h",
835 ] + MICROKERNEL_BENCHMARK_HDRS,
836 copts = ["-Wno-unused-function"],
837 deps = MICROKERNEL_BENCHMARK_DEPS,
838)
839
840xnnpack_benchmark(
841 name = "f32_im2col_gemm_bench",
842 srcs = [
843 "bench/f32-im2col-gemm.cc",
844 "bench/conv.h",
845 "src/xnnpack/AlignedAllocator.h",
846 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
847 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
848)
849
850########################### Benchmarks for operators ###########################
851
852xnnpack_benchmark(
853 name = "add_bench",
854 srcs = ["bench/add.cc"],
855 deps = OPERATOR_BENCHMARK_DEPS,
856)
857
858xnnpack_benchmark(
859 name = "average_pooling_bench",
860 srcs = ["bench/average-pooling.cc"],
861 deps = OPERATOR_BENCHMARK_DEPS,
862)
863
864xnnpack_benchmark(
865 name = "channel_shuffle_bench",
866 srcs = ["bench/channel-shuffle.cc"],
867 deps = OPERATOR_BENCHMARK_DEPS,
868)
869
870xnnpack_benchmark(
871 name = "convolution_bench",
872 srcs = ["bench/convolution.cc"],
873 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
874 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
875)
876
877xnnpack_benchmark(
878 name = "deconvolution_bench",
879 srcs = ["bench/deconvolution.cc"],
880 copts = xnnpack_optional_tflite_copts(),
881 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
882)
883
884xnnpack_benchmark(
885 name = "global_average_pooling_bench",
886 srcs = ["bench/global-average-pooling.cc"],
887 deps = OPERATOR_BENCHMARK_DEPS,
888)
889
890xnnpack_benchmark(
891 name = "max_pooling_bench",
892 srcs = ["bench/max-pooling.cc"],
893 deps = OPERATOR_BENCHMARK_DEPS,
894)
895
896xnnpack_benchmark(
897 name = "sigmoid_bench",
898 srcs = ["bench/sigmoid.cc"],
899 deps = OPERATOR_BENCHMARK_DEPS,
900)
901
902xnnpack_benchmark(
903 name = "softargmax_bench",
904 srcs = ["bench/softargmax.cc"],
905 deps = OPERATOR_BENCHMARK_DEPS,
906)
907
Marat Dukhanc068bb62019-10-04 13:24:39 -0700908############################# End-to-end benchmarks ############################
909
910cc_library(
911 name = "mobilenet_v1",
912 srcs = ["models/mobilenet-v1.cc"],
913 hdrs = ["models/models.h"],
914 linkstatic = True,
915 deps = [
916 ":XNNPACK",
917 "@pthreadpool",
918 ],
919)
920
921cc_library(
922 name = "mobilenet_v2",
923 srcs = ["models/mobilenet-v2.cc"],
924 hdrs = ["models/models.h"],
925 linkstatic = True,
926 deps = [
927 ":XNNPACK",
928 "@pthreadpool",
929 ],
930)
931
932xnnpack_benchmark(
933 name = "end2end_bench",
934 srcs = ["bench/end2end.cc"],
935 deps = [
936 ":XNNPACK",
937 ":mobilenet_v1",
938 ":mobilenet_v2",
939 "@pthreadpool",
940 ],
941)
942
Marat Dukhan08c4a432019-10-03 09:29:21 -0700943######################### Unit tests for micro-kernels #########################
944
945xnnpack_unit_test(
946 name = "f16_gemm_test",
947 srcs = [
948 "test/f16-gemm.cc",
949 "test/gemm-microkernel-tester.h",
950 "src/xnnpack/AlignedAllocator.h",
951 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
952 deps = MICROKERNEL_TEST_DEPS,
953)
954
955xnnpack_unit_test(
956 name = "f32_argmaxpool_test",
957 srcs = [
958 "test/f32-argmaxpool.cc",
959 "test/argmaxpool-microkernel-tester.h",
960 "src/xnnpack/AlignedAllocator.h",
961 ] + MICROKERNEL_TEST_HDRS,
962 deps = MICROKERNEL_TEST_DEPS,
963)
964
965xnnpack_unit_test(
966 name = "f32_avgpool_test",
967 srcs = [
968 "test/f32-avgpool.cc",
969 "test/avgpool-microkernel-tester.h",
970 "src/xnnpack/AlignedAllocator.h",
971 ] + MICROKERNEL_TEST_HDRS,
972 deps = MICROKERNEL_TEST_DEPS,
973)
974
975xnnpack_unit_test(
976 name = "f32_clamp_test",
977 srcs = [
978 "test/f32-clamp.cc",
979 "test/clamp-microkernel-tester.h",
980 ] + MICROKERNEL_TEST_HDRS,
981 deps = MICROKERNEL_TEST_DEPS,
982)
983
984xnnpack_unit_test(
985 name = "f32_igemm_test",
986 srcs = [
987 "test/f32-igemm.cc",
988 "test/gemm-microkernel-tester.h",
989 "src/xnnpack/AlignedAllocator.h",
990 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
991 deps = MICROKERNEL_TEST_DEPS,
992)
993
994xnnpack_unit_test(
995 name = "f32_conv_hwc_test",
996 srcs = [
997 "test/f32-conv-hwc.cc",
998 "test/conv-hwc-microkernel-tester.h",
999 "src/xnnpack/AlignedAllocator.h",
1000 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1001 deps = MICROKERNEL_TEST_DEPS,
1002)
1003
1004xnnpack_unit_test(
1005 name = "f32_conv_hwc2spchw_test",
1006 srcs = [
1007 "test/f32-conv-hwc2spchw.cc",
1008 "test/conv-hwc2spchw-microkernel-tester.h",
1009 "src/xnnpack/AlignedAllocator.h",
1010 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1011 deps = MICROKERNEL_TEST_DEPS,
1012)
1013
1014xnnpack_unit_test(
1015 name = "f32_dwconv_test",
1016 srcs = [
1017 "test/f32-dwconv.cc",
1018 "test/dwconv-microkernel-tester.h",
1019 "src/xnnpack/AlignedAllocator.h",
1020 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1021 deps = MICROKERNEL_TEST_DEPS,
1022)
1023
1024xnnpack_unit_test(
1025 name = "f32_dwconv_spchw_test",
1026 srcs = [
1027 "test/f32-dwconv-spchw.cc",
1028 "test/dwconv-spchw-microkernel-tester.h",
1029 "src/xnnpack/AlignedAllocator.h",
1030 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1031 deps = MICROKERNEL_TEST_DEPS,
1032)
1033
1034xnnpack_unit_test(
1035 name = "f32_gavgpool_test",
1036 srcs = [
1037 "test/f32-gavgpool.cc",
1038 "test/gavgpool-microkernel-tester.h",
1039 "src/xnnpack/AlignedAllocator.h",
1040 ] + MICROKERNEL_TEST_HDRS,
1041 deps = MICROKERNEL_TEST_DEPS,
1042)
1043
1044xnnpack_unit_test(
1045 name = "f32_gavgpool_spchw_test",
1046 srcs = [
1047 "test/f32-gavgpool-spchw.cc",
1048 "test/gavgpool-spchw-microkernel-tester.h",
1049 "src/xnnpack/AlignedAllocator.h",
1050 ] + MICROKERNEL_TEST_HDRS,
1051 deps = MICROKERNEL_TEST_DEPS,
1052)
1053
1054xnnpack_unit_test(
1055 name = "f32_gemm_test",
1056 srcs = [
1057 "test/f32-gemm.cc",
1058 "test/gemm-microkernel-tester.h",
1059 "src/xnnpack/AlignedAllocator.h",
1060 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1061 deps = MICROKERNEL_TEST_DEPS,
1062)
1063
1064xnnpack_unit_test(
1065 name = "f32_gemminc_test",
1066 srcs = [
1067 "test/f32-gemminc.cc",
1068 "test/gemm-microkernel-tester.h",
1069 "src/xnnpack/AlignedAllocator.h",
1070 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1071 deps = MICROKERNEL_TEST_DEPS,
1072)
1073
1074xnnpack_unit_test(
1075 name = "f32_hswish_test",
1076 srcs = [
1077 "test/f32-hswish.cc",
1078 "test/hswish-microkernel-tester.h",
1079 ] + MICROKERNEL_TEST_HDRS,
1080 deps = MICROKERNEL_TEST_DEPS,
1081)
1082
1083xnnpack_unit_test(
1084 name = "f32_maxpool_test",
1085 srcs = [
1086 "test/f32-maxpool.cc",
1087 "test/maxpool-microkernel-tester.h",
1088 ] + MICROKERNEL_TEST_HDRS,
1089 deps = MICROKERNEL_TEST_DEPS,
1090)
1091
1092xnnpack_unit_test(
1093 name = "f32_pavgpool_test",
1094 srcs = [
1095 "test/f32-pavgpool.cc",
1096 "test/avgpool-microkernel-tester.h",
1097 "src/xnnpack/AlignedAllocator.h",
1098 ] + MICROKERNEL_TEST_HDRS,
1099 deps = MICROKERNEL_TEST_DEPS,
1100)
1101
1102xnnpack_unit_test(
1103 name = "f32_ppmm_test",
1104 srcs = [
1105 "test/f32-ppmm.cc",
1106 "test/gemm-microkernel-tester.h",
1107 "src/xnnpack/AlignedAllocator.h",
1108 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1109 deps = MICROKERNEL_TEST_DEPS,
1110)
1111
1112xnnpack_unit_test(
1113 name = "f32_prelu_test",
1114 srcs = [
1115 "test/f32-prelu.cc",
1116 "test/prelu-microkernel-tester.h",
1117 "src/xnnpack/AlignedAllocator.h",
1118 ] + MICROKERNEL_TEST_HDRS,
1119 deps = MICROKERNEL_TEST_DEPS,
1120)
1121
1122xnnpack_unit_test(
1123 name = "f32_rmax_test",
1124 srcs = [
1125 "test/f32-rmax.cc",
1126 "test/rmax-microkernel-tester.h",
1127 ] + MICROKERNEL_TEST_HDRS,
1128 deps = MICROKERNEL_TEST_DEPS,
1129)
1130
1131xnnpack_unit_test(
1132 name = "f32_spmm_test",
1133 srcs = [
1134 "test/f32-spmm.cc",
1135 "test/spmm-microkernel-tester.h",
1136 "src/xnnpack/AlignedAllocator.h",
1137 ] + MICROKERNEL_TEST_HDRS,
1138 deps = MICROKERNEL_TEST_DEPS,
1139)
1140
1141xnnpack_unit_test(
1142 name = "f32_vadd_test",
1143 srcs = [
1144 "test/f32-vadd.cc",
1145 "test/vadd-microkernel-tester.h",
1146 ] + MICROKERNEL_TEST_HDRS,
1147 deps = MICROKERNEL_TEST_DEPS,
1148)
1149
1150xnnpack_unit_test(
1151 name = "f32_vsub_test",
1152 srcs = [
1153 "test/f32-vsub.cc",
1154 "test/vsub-microkernel-tester.h",
1155 ] + MICROKERNEL_TEST_HDRS,
1156 deps = MICROKERNEL_TEST_DEPS,
1157)
1158
1159xnnpack_unit_test(
1160 name = "f32_vmul_test",
1161 srcs = [
1162 "test/f32-vmul.cc",
1163 "test/vmul-microkernel-tester.h",
1164 ] + MICROKERNEL_TEST_HDRS,
1165 deps = MICROKERNEL_TEST_DEPS,
1166)
1167
1168xnnpack_unit_test(
1169 name = "f32_vmulcaddc_test",
1170 srcs = [
1171 "test/f32-vmulcaddc.cc",
1172 "test/vmulcaddc-microkernel-tester.h",
1173 "src/xnnpack/AlignedAllocator.h",
1174 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1175 deps = MICROKERNEL_TEST_DEPS,
1176)
1177
1178xnnpack_unit_test(
1179 name = "q8_avgpool_test",
1180 srcs = [
1181 "test/q8-avgpool.cc",
1182 "test/avgpool-microkernel-tester.h",
1183 "src/xnnpack/AlignedAllocator.h",
1184 ] + MICROKERNEL_TEST_HDRS,
1185 deps = MICROKERNEL_TEST_DEPS,
1186)
1187
1188xnnpack_unit_test(
1189 name = "q8_igemm_test",
1190 srcs = [
1191 "test/q8-igemm.cc",
1192 "test/gemm-microkernel-tester.h",
1193 "src/xnnpack/AlignedAllocator.h",
1194 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1195 deps = MICROKERNEL_TEST_DEPS,
1196)
1197
1198xnnpack_unit_test(
1199 name = "q8_dwconv_test",
1200 srcs = [
1201 "test/q8-dwconv.cc",
1202 "test/dwconv-microkernel-tester.h",
1203 "src/xnnpack/AlignedAllocator.h",
1204 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1205 deps = MICROKERNEL_TEST_DEPS,
1206)
1207
1208xnnpack_unit_test(
1209 name = "q8_gavgpool_test",
1210 srcs = [
1211 "test/q8-gavgpool.cc",
1212 "test/gavgpool-microkernel-tester.h",
1213 "src/xnnpack/AlignedAllocator.h",
1214 ] + MICROKERNEL_TEST_HDRS,
1215 deps = MICROKERNEL_TEST_DEPS,
1216)
1217
1218xnnpack_unit_test(
1219 name = "q8_gemm_test",
1220 srcs = [
1221 "test/q8-gemm.cc",
1222 "test/gemm-microkernel-tester.h",
1223 "src/xnnpack/AlignedAllocator.h",
1224 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1225 deps = MICROKERNEL_TEST_DEPS,
1226)
1227
1228xnnpack_unit_test(
1229 name = "q8_vadd_test",
1230 srcs = [
1231 "test/q8-vadd.cc",
1232 "test/vadd-microkernel-tester.h",
1233 ] + MICROKERNEL_TEST_HDRS,
1234 deps = MICROKERNEL_TEST_DEPS,
1235)
1236
1237xnnpack_unit_test(
1238 name = "u8_clamp_test",
1239 srcs = [
1240 "test/u8-clamp.cc",
1241 "test/clamp-microkernel-tester.h",
1242 ] + MICROKERNEL_TEST_HDRS,
1243 deps = MICROKERNEL_TEST_DEPS,
1244)
1245
1246xnnpack_unit_test(
1247 name = "u8_lut32norm_test",
1248 srcs = [
1249 "test/u8-lut32norm.cc",
1250 "test/lut-norm-microkernel-tester.h",
1251 ] + MICROKERNEL_TEST_HDRS,
1252 deps = MICROKERNEL_TEST_DEPS,
1253)
1254
1255xnnpack_unit_test(
1256 name = "u8_maxpool_test",
1257 srcs = [
1258 "test/u8-maxpool.cc",
1259 "test/maxpool-microkernel-tester.h",
1260 ] + MICROKERNEL_TEST_HDRS,
1261 deps = MICROKERNEL_TEST_DEPS,
1262)
1263
1264xnnpack_unit_test(
1265 name = "u8_rmax_test",
1266 srcs = [
1267 "test/u8-rmax.cc",
1268 "test/rmax-microkernel-tester.h",
1269 ] + MICROKERNEL_TEST_HDRS,
1270 deps = MICROKERNEL_TEST_DEPS,
1271)
1272
1273xnnpack_unit_test(
1274 name = "x32_packx_test",
1275 srcs = [
1276 "test/x32-packx.cc",
1277 "test/pack-microkernel-tester.h",
1278 "src/xnnpack/AlignedAllocator.h",
1279 ] + MICROKERNEL_TEST_HDRS,
1280 deps = MICROKERNEL_TEST_DEPS,
1281)
1282
1283xnnpack_unit_test(
1284 name = "x32_pad_test",
1285 srcs = [
1286 "test/x32-pad.cc",
1287 "test/pad-microkernel-tester.h",
1288 ] + MICROKERNEL_TEST_HDRS,
1289 deps = MICROKERNEL_TEST_DEPS,
1290)
1291
1292xnnpack_unit_test(
1293 name = "x32_unpool_test",
1294 srcs = [
1295 "test/x32-unpool.cc",
1296 "test/unpool-microkernel-tester.h",
1297 ] + MICROKERNEL_TEST_HDRS,
1298 deps = MICROKERNEL_TEST_DEPS,
1299)
1300
1301xnnpack_unit_test(
1302 name = "x32_zip_test",
1303 srcs = [
1304 "test/x32-zip.cc",
1305 "test/zip-microkernel-tester.h",
1306 ] + MICROKERNEL_TEST_HDRS,
1307 deps = MICROKERNEL_TEST_DEPS,
1308)
1309
1310xnnpack_unit_test(
1311 name = "x8_lut_test",
1312 srcs = [
1313 "test/x8-lut.cc",
1314 "test/lut-microkernel-tester.h",
1315 ] + MICROKERNEL_TEST_HDRS,
1316 deps = MICROKERNEL_TEST_DEPS,
1317)
1318
1319xnnpack_unit_test(
1320 name = "x8_zip_test",
1321 srcs = [
1322 "test/x8-zip.cc",
1323 "test/zip-microkernel-tester.h",
1324 ] + MICROKERNEL_TEST_HDRS,
1325 deps = MICROKERNEL_TEST_DEPS,
1326)
1327
1328########################### Size test for the library ##########################
1329
1330xnnpack_binary(
1331 name = "size_test",
1332 srcs = ["test/size.c"],
1333 deps = [":XNNPACK"],
1334)
1335
1336########################### Unit tests for operators ###########################
1337
1338xnnpack_unit_test(
1339 name = "add_test",
1340 srcs = [
1341 "test/add.cc",
1342 "test/add-operator-tester.h",
1343 ],
1344 deps = OPERATOR_TEST_DEPS,
1345)
1346
1347xnnpack_unit_test(
1348 name = "argmax_pooling_test",
1349 srcs = [
1350 "test/argmax-pooling.cc",
1351 "test/argmax-pooling-operator-tester.h",
1352 ] + OPERATOR_TEST_PARAMS_HDRS,
1353 deps = OPERATOR_TEST_DEPS,
1354)
1355
1356xnnpack_unit_test(
1357 name = "average_pooling_test",
1358 srcs = [
1359 "test/average-pooling.cc",
1360 "test/average-pooling-operator-tester.h",
1361 ] + OPERATOR_TEST_PARAMS_HDRS,
1362 deps = OPERATOR_TEST_DEPS,
1363)
1364
1365xnnpack_unit_test(
1366 name = "channel_pad_test",
1367 srcs = [
1368 "test/channel-pad.cc",
1369 "test/channel-pad-operator-tester.h",
1370 ] + OPERATOR_TEST_PARAMS_HDRS,
1371 deps = OPERATOR_TEST_DEPS,
1372)
1373
1374xnnpack_unit_test(
1375 name = "channel_shuffle_test",
1376 srcs = [
1377 "test/channel-shuffle.cc",
1378 "test/channel-shuffle-operator-tester.h",
1379 ],
1380 deps = OPERATOR_TEST_DEPS,
1381)
1382
1383xnnpack_unit_test(
1384 name = "clamp_test",
1385 srcs = [
1386 "test/clamp.cc",
1387 "test/clamp-operator-tester.h",
1388 ],
1389 deps = OPERATOR_TEST_DEPS,
1390)
1391
1392xnnpack_unit_test(
1393 name = "convolution_test",
1394 srcs = [
1395 "test/convolution.cc",
1396 "test/convolution-operator-tester.h",
1397 ],
1398 deps = OPERATOR_TEST_DEPS,
1399)
1400
1401xnnpack_unit_test(
1402 name = "convolution_spnchw_test",
1403 srcs = [
1404 "test/convolution-spnchw.cc",
1405 "test/convolution-spnchw-operator-tester.h",
1406 ],
1407 deps = OPERATOR_TEST_DEPS,
1408)
1409
1410xnnpack_unit_test(
1411 name = "deconvolution_test",
1412 srcs = [
1413 "test/deconvolution.cc",
1414 "test/deconvolution-operator-tester.h",
1415 ] + OPERATOR_TEST_PARAMS_HDRS,
1416 deps = OPERATOR_TEST_DEPS,
1417)
1418
1419xnnpack_unit_test(
1420 name = "fully_connected_test",
1421 srcs = [
1422 "test/fully-connected.cc",
1423 "test/fully-connected-operator-tester.h",
1424 ],
1425 deps = OPERATOR_TEST_DEPS,
1426)
1427
1428xnnpack_unit_test(
1429 name = "global_average_pooling_test",
1430 srcs = [
1431 "test/global-average-pooling.cc",
1432 "test/global-average-pooling-operator-tester.h",
1433 ] + OPERATOR_TEST_PARAMS_HDRS,
1434 deps = OPERATOR_TEST_DEPS,
1435)
1436
1437xnnpack_unit_test(
1438 name = "global_average_pooling_spnchw_test",
1439 srcs = [
1440 "test/global-average-pooling-spnchw.cc",
1441 "test/global-average-pooling-spnchw-operator-tester.h",
1442 ],
1443 deps = OPERATOR_TEST_DEPS,
1444)
1445
1446xnnpack_unit_test(
1447 name = "hardswish_test",
1448 srcs = [
1449 "test/hardswish.cc",
1450 "test/hardswish-operator-tester.h",
1451 ],
1452 deps = OPERATOR_TEST_DEPS,
1453)
1454
1455xnnpack_unit_test(
1456 name = "leaky_relu_test",
1457 srcs = [
1458 "test/leaky-relu.cc",
1459 "test/leaky-relu-operator-tester.h",
1460 ],
1461 deps = OPERATOR_TEST_DEPS,
1462)
1463
1464xnnpack_unit_test(
1465 name = "max_pooling_test",
1466 srcs = [
1467 "test/max-pooling.cc",
1468 "test/max-pooling-operator-tester.h",
1469 ] + OPERATOR_TEST_PARAMS_HDRS,
1470 deps = OPERATOR_TEST_DEPS,
1471)
1472
1473xnnpack_unit_test(
1474 name = "prelu_test",
1475 srcs = [
1476 "test/prelu.cc",
1477 "test/prelu-operator-tester.h",
1478 ] + OPERATOR_TEST_PARAMS_HDRS,
1479 deps = OPERATOR_TEST_DEPS,
1480)
1481
1482xnnpack_unit_test(
1483 name = "sigmoid_test",
1484 srcs = [
1485 "test/sigmoid.cc",
1486 "test/sigmoid-operator-tester.h",
1487 ],
1488 deps = OPERATOR_TEST_DEPS,
1489)
1490
1491xnnpack_unit_test(
1492 name = "softargmax_test",
1493 srcs = [
1494 "test/softargmax.cc",
1495 "test/softargmax-operator-tester.h",
1496 ],
1497 deps = OPERATOR_TEST_DEPS,
1498)
1499
1500xnnpack_unit_test(
1501 name = "unpooling_test",
1502 srcs = [
1503 "test/unpooling.cc",
1504 "test/unpooling-operator-tester.h",
1505 ],
1506 deps = OPERATOR_TEST_DEPS,
1507)
1508
1509############################# Build configurations #############################
1510
1511config_setting(
1512 name = "linux_k8",
1513 values = {
1514 "cpu": "k8",
1515 },
1516)
1517
1518config_setting(
Marat Dukhan4e45e662019-10-03 15:40:24 -07001519 name = "linux_aarch64",
1520 values = {
1521 "cpu": "aarch64",
1522 },
1523)
1524
1525config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001526 name = "android",
1527 values = {"crosstool_top": "//external:android/crosstool"},
1528)
1529
1530config_setting(
1531 name = "android_armv7",
1532 values = {
1533 "crosstool_top": "//external:android/crosstool",
1534 "cpu": "armeabi-v7a",
1535 },
1536)
1537
1538config_setting(
1539 name = "android_arm64",
1540 values = {
1541 "crosstool_top": "//external:android/crosstool",
1542 "cpu": "arm64-v8a",
1543 },
1544)
1545
1546config_setting(
1547 name = "android_x86",
1548 values = {
1549 "crosstool_top": "//external:android/crosstool",
1550 "cpu": "x86",
1551 },
1552)
1553
1554config_setting(
1555 name = "android_x86_64",
1556 values = {
1557 "crosstool_top": "//external:android/crosstool",
1558 "cpu": "x86_64",
1559 },
1560)
1561
1562config_setting(
1563 name = "emscripten",
Marat Dukhan9d056a42019-10-03 12:13:35 -07001564 values = {"crosstool_top": "//toolchain/emscripten"},
Marat Dukhan08c4a432019-10-03 09:29:21 -07001565)
1566
1567config_setting(
1568 name = "emscripten_wasm",
1569 values = {
Marat Dukhan9d056a42019-10-03 12:13:35 -07001570 "crosstool_top": "//toolchain/emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001571 "cpu": "wasm",
1572 },
1573)
1574
1575config_setting(
1576 name = "emscripten_wasmsimd",
1577 values = {
Marat Dukhan9d056a42019-10-03 12:13:35 -07001578 "crosstool_top": "//toolchain/emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001579 "cpu": "wasm",
1580 "features": "wasmsimd",
1581 },
1582)
1583
1584config_setting(
1585 name = "emscripten_asmjs",
1586 values = {
Marat Dukhan9d056a42019-10-03 12:13:35 -07001587 "crosstool_top": "//toolchain/emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001588 "cpu": "asmjs",
1589 },
1590)
1591
1592# Builds with -c opt
1593config_setting(
1594 name = "debug_build",
1595 values = {
1596 "compilation_mode": "dbg",
1597 },
1598 visibility = ["//visibility:public"],
1599)
1600
1601# Builds with -c dbg
1602config_setting(
1603 name = "optimized_build",
1604 values = {
1605 "compilation_mode": "opt",
1606 },
1607 visibility = ["//visibility:public"],
1608)
1609
1610# Enables usage of assembly kernels.
1611config_setting(
1612 name = "xnn_enable_assembly_explicit_true",
1613 define_values = {"xnn_enable_assembly": "true"},
1614)
1615
1616# Disables usage of assembly kernels.
1617config_setting(
1618 name = "xnn_enable_assembly_explicit_false",
1619 define_values = {"xnn_enable_assembly": "false"},
1620)