blob: 42a3e1c3c4f590197c9f61c6cb13ed4cfcbbd292 [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
9licenses(["notice"])
10
11exports_files(["LICENSE"])
12
13load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
14
15OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
22MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
25 "@cpuinfo",
26 "@FP16",
27 "@pthreadpool",
28]
29
30MICROKERNEL_TEST_DEPS = [
31 ":ukernels",
32 "@cpuinfo",
33 "@FP16",
34 "@pthreadpool",
35]
36
37OPERATOR_TEST_DEPS = [
38 ":XNNPACK",
39 "@pthreadpool",
40 "@FP16",
41]
42
43OPERATOR_SRCS = [
44 "src/add.c",
45 "src/argmax-pooling.c",
46 "src/average-pooling.c",
47 "src/channel-pad.c",
48 "src/channel-shuffle.c",
49 "src/clamp.c",
50 "src/convolution-spnchw.c",
51 "src/convolution.c",
52 "src/deconvolution.c",
53 "src/fully-connected.c",
54 "src/global-average-pooling-spnchw.c",
55 "src/global-average-pooling.c",
56 "src/hardswish.c",
57 "src/leaky-relu.c",
58 "src/max-pooling.c",
59 "src/prelu.c",
60 "src/sigmoid.c",
61 "src/softargmax.c",
62 "src/unpooling.c",
63]
64
65SCALAR_UKERNELS = [
66 "src/f32-argmaxpool/mp9p8q-scalar.c",
67 "src/f32-argmaxpool/up4-scalar.c",
68 "src/f32-argmaxpool/up9-scalar.c",
69 "src/f32-avgpool/mp9p8q-scalar.c",
70 "src/f32-avgpool/up9-scalar.c",
71 "src/f32-clamp/scalar.c",
72 "src/f32-igemm/1x4-scalar.c",
73 "src/f32-igemm/2x4-scalar.c",
74 "src/f32-igemm/4x2-scalar.c",
75 "src/f32-igemm/4x4-scalar.c",
76 "src/f32-dwconv/up1x25-scalar.c",
77 "src/f32-dwconv/up1x4-scalar.c",
78 "src/f32-dwconv/up1x9-scalar.c",
79 "src/f32-gavgpool/mp7p7q-scalar.c",
80 "src/f32-gavgpool/up7-scalar.c",
81 "src/f32-gemm/1x4-scalar.c",
82 "src/f32-gemm/2x4-scalar.c",
83 "src/f32-gemm/4x2-scalar.c",
84 "src/f32-gemm/4x4-scalar.c",
85 "src/f32-gemminc/1x4-scalar.c",
86 "src/f32-gemminc/2x4-scalar.c",
87 "src/f32-gemminc/4x4-scalar.c",
88 "src/f32-hswish/scalar.c",
89 "src/f32-maxpool/9p8q-scalar.c",
90 "src/f32-pavgpool/mp9p8q-scalar.c",
91 "src/f32-pavgpool/up9-scalar.c",
92 "src/f32-ppmm/2x4-scalar.c",
93 "src/f32-ppmm/3x3-scalar.c",
94 "src/f32-ppmm/4x2-scalar.c",
95 "src/f32-ppmm/4x4-scalar.c",
96 "src/f32-prelu/x4-scalar.c",
97 "src/f32-rmax/scalar.c",
98 "src/f32-spmm/1x1-scalar-pipelined.c",
99 "src/f32-spmm/1x1-scalar-unroll2.c",
100 "src/f32-spmm/1x1-scalar.c",
101 "src/f32-spmm/2x1-scalar-pipelined.c",
102 "src/f32-spmm/2x1-scalar-unroll2.c",
103 "src/f32-spmm/2x1-scalar.c",
104 "src/f32-spmm/4x1-scalar-pipelined.c",
105 "src/f32-spmm/4x1-scalar-unroll2.c",
106 "src/f32-spmm/4x1-scalar.c",
107 "src/f32-spmm/8x1-scalar-pipelined.c",
108 "src/f32-spmm/8x1-scalar-unroll2.c",
109 "src/f32-spmm/8x1-scalar.c",
110 "src/f32-vadd/scalar.c",
111 "src/f32-vmul/scalar.c",
112 "src/f32-vmulcaddc/c1-scalar-x2.c",
113 "src/f32-vsub/scalar.c",
114 "src/q8-avgpool/mp9p8q-scalar.c",
115 "src/q8-avgpool/up9-scalar.c",
116 "src/q8-igemm/2x2-scalar.c",
117 "src/q8-dwconv/up1x9-scalar.c",
118 "src/q8-gavgpool/mp7p7q-scalar.c",
119 "src/q8-gavgpool/up7-scalar.c",
120 "src/q8-gemm/2x2-scalar.c",
121 "src/q8-vadd/scalar.c",
122 "src/u8-clamp/scalar.c",
123 "src/u8-lut32norm/scalar.c",
124 "src/u8-maxpool/9p8q-scalar.c",
125 "src/u8-rmax/scalar.c",
126 "src/x32-packx/x2-scalar.c",
127 "src/x32-packx/x3-scalar.c",
128 "src/x32-packx/x4-scalar.c",
129 "src/x32-pad/x2-scalar.c",
130 "src/x32-unpool/scalar.c",
131 "src/x32-zip/x2-scalar.c",
132 "src/x32-zip/x3-scalar.c",
133 "src/x32-zip/x4-scalar.c",
134 "src/x32-zip/xm-scalar.c",
135 "src/x8-lut/scalar.c",
136 "src/x8-zip/x2-scalar.c",
137 "src/x8-zip/x3-scalar.c",
138 "src/x8-zip/x4-scalar.c",
139 "src/x8-zip/xm-scalar.c",
140]
141
142PSIMD_UKERNELS = [
143 "src/f32-argmaxpool/mp9p8q-psimd.c",
144 "src/f32-argmaxpool/up4-psimd.c",
145 "src/f32-argmaxpool/up9-psimd.c",
146 "src/f32-avgpool/mp9p8q-psimd.c",
147 "src/f32-avgpool/up9-psimd.c",
148 "src/f32-clamp/psimd.c",
149 "src/f32-igemm/1x8-psimd-loadsplat.c",
150 "src/f32-igemm/1x8-psimd-splat.c",
151 "src/f32-igemm/1x8s4-psimd.c",
152 "src/f32-igemm/4x2c4-psimd.c",
153 "src/f32-igemm/4x8-psimd-loadsplat.c",
154 "src/f32-igemm/4x8-psimd-splat.c",
155 "src/f32-igemm/4x8s4-psimd.c",
156 "src/f32-igemm/6x8-psimd-loadsplat.c",
157 "src/f32-igemm/6x8-psimd-splat.c",
158 "src/f32-igemm/6x8s4-psimd.c",
159 "src/f32-dwconv/up4x25-psimd.c",
160 "src/f32-dwconv/up4x4-psimd.c",
161 "src/f32-dwconv/up4x9-psimd.c",
162 "src/f32-gavgpool/mp7p7q-psimd.c",
163 "src/f32-gavgpool/up7-psimd.c",
164 "src/f32-gemm/1x8-psimd-loadsplat.c",
165 "src/f32-gemm/1x8-psimd-splat.c",
166 "src/f32-gemm/1x8s4-psimd.c",
167 "src/f32-gemm/4x8-psimd-loadsplat.c",
168 "src/f32-gemm/4x8-psimd-splat.c",
169 "src/f32-gemm/4x8s4-psimd.c",
170 "src/f32-gemm/6x8-psimd-loadsplat.c",
171 "src/f32-gemm/6x8-psimd-splat.c",
172 "src/f32-gemm/6x8s4-psimd.c",
173 "src/f32-gemminc/1x8-psimd-loadsplat.c",
174 "src/f32-gemminc/1x8-psimd-splat.c",
175 "src/f32-gemminc/1x8s4-psimd.c",
176 "src/f32-gemminc/4x8-psimd-loadsplat.c",
177 "src/f32-gemminc/4x8-psimd-splat.c",
178 "src/f32-gemminc/4x8s4-psimd.c",
179 "src/f32-gemminc/6x8-psimd-loadsplat.c",
180 "src/f32-gemminc/6x8-psimd-splat.c",
181 "src/f32-gemminc/6x8s4-psimd.c",
182 "src/f32-hswish/psimd.c",
183 "src/f32-maxpool/9p8q-psimd.c",
184 "src/f32-pavgpool/mp9p8q-psimd.c",
185 "src/f32-pavgpool/up9-psimd.c",
186 "src/f32-ppmm/4x8-psimd.c",
187 "src/f32-prelu/x4-psimd.c",
188 "src/f32-vadd/psimd.c",
189 "src/f32-vmul/psimd.c",
190 "src/f32-vmulcaddc/c4-psimd-x2.c",
191 "src/f32-vsub/psimd.c",
192 "src/x32-packx/x4-psimd.c",
193 "src/x32-pad/x2-psimd.c",
194 "src/x32-unpool/psimd.c",
195 "src/x32-zip/x2-psimd.c",
196 "src/x32-zip/x3-psimd.c",
197 "src/x32-zip/x4-psimd.c",
198 "src/x32-zip/xm-psimd.c",
199]
200
201# ISA-specific micro-kernels
202NEON_UKERNELS = [
203 "src/f32-avgpool/mp9p8q-neon.c",
204 "src/f32-avgpool/up9-neon.c",
205 "src/f32-clamp/neon.c",
206 "src/f32-igemm/1x8-neon-ld64.c",
207 "src/f32-igemm/4x12-neon-ld64.c",
208 "src/f32-igemm/4x2-neon-ld64.c",
209 "src/f32-igemm/4x4-neon-ld64.c",
210 "src/f32-igemm/4x8-neon-ld128.c",
211 "src/f32-igemm/4x8-neon-ld64.c",
212 "src/f32-igemm/6x8-neon-ld64.c",
213 "src/f32-dwconv/up4x9-neon.c",
214 "src/f32-gavgpool-spchw/neon-x4.c",
215 "src/f32-gavgpool/mp7p7q-neon.c",
216 "src/f32-gavgpool/up7-neon.c",
217 "src/f32-gemm/1x8-neon-ld64.c",
218 "src/f32-gemm/4x12-neon-ld64.c",
219 "src/f32-gemm/4x2-neon-ld64.c",
220 "src/f32-gemm/4x8-neon-ld128.c",
221 "src/f32-gemm/4x8-neon-ld64.c",
222 "src/f32-gemm/5x8-neon-ld64.c",
223 "src/f32-gemm/6x8-neon-ld64.c",
224 "src/f32-gemminc/1x8-neon-ld64.c",
225 "src/f32-gemminc/4x12-neon-ld64.c",
226 "src/f32-gemminc/4x8-neon-ld128.c",
227 "src/f32-gemminc/4x8-neon-ld64.c",
228 "src/f32-gemminc/5x8-neon-ld64.c",
229 "src/f32-gemminc/6x8-neon-ld64.c",
230 "src/f32-hswish/neon.c",
231 "src/f32-pavgpool/mp9p8q-neon.c",
232 "src/f32-pavgpool/up9-neon.c",
233 "src/f32-ppmm/4x8-neon.c",
234 "src/f32-ppmm/8x8-neon.c",
235 "src/f32-rmax/neon.c",
236 "src/f32-vmulcaddc/c4-neon-x2.c",
237 "src/q8-avgpool/mp9p8q-neon.c",
238 "src/q8-avgpool/up9-neon.c",
239 "src/q8-igemm/4x8-neon.c",
240 "src/q8-igemm/8x8-neon.c",
241 "src/q8-dwconv/up8x9-neon.c",
242 "src/q8-gavgpool/mp7p7q-neon.c",
243 "src/q8-gavgpool/up7-neon.c",
244 "src/q8-gemm/4x8-neon.c",
245 "src/q8-gemm/8x8-neon.c",
246 "src/q8-vadd/neon.c",
247 "src/u8-clamp/neon.c",
248 "src/u8-maxpool/9p8q-neon.c",
249 "src/u8-rmax/neon.c",
250 "src/x32-packx/x4-neon-st4.c",
251 "src/x32-pad/x2-neon.c",
252 "src/x32-zip/x2-neon.c",
253 "src/x32-zip/x3-neon.c",
254 "src/x32-zip/x4-neon.c",
255 "src/x32-zip/xm-neon.c",
256 "src/x8-zip/x2-neon.c",
257 "src/x8-zip/x3-neon.c",
258 "src/x8-zip/x4-neon.c",
259 "src/x8-zip/xm-neon.c",
260]
261
262NEONFMA_UKERNELS = [
263 "src/f32-igemm/4x12-neonfma-ld64.c",
264 "src/f32-igemm/4x2-neonfma-ld64.c",
265 "src/f32-igemm/4x4-neonfma-ld64.c",
266 "src/f32-igemm/4x8-neonfma-ld128.c",
267 "src/f32-igemm/4x8-neonfma-ld64.c",
268 "src/f32-igemm/6x8-neonfma-ld64.c",
269 "src/f32-dwconv/up4x9-neonfma.c",
270 "src/f32-dwconv/up8x9-neonfma.c",
271 "src/f32-gemm/1x8-neonfma-ld64.c",
272 "src/f32-gemm/4x12-neonfma-ld64.c",
273 "src/f32-gemm/4x2-neonfma-ld64.c",
274 "src/f32-gemm/4x8-neonfma-ld128.c",
275 "src/f32-gemm/4x8-neonfma-ld64.c",
276 "src/f32-gemm/5x8-neonfma-ld64.c",
277 "src/f32-gemm/6x8-neonfma-ld64.c",
278 "src/f32-gemminc/1x8-neonfma-ld64.c",
279 "src/f32-gemminc/4x12-neonfma-ld64.c",
280 "src/f32-gemminc/4x8-neonfma-ld128.c",
281 "src/f32-gemminc/4x8-neonfma-ld64.c",
282 "src/f32-gemminc/5x8-neonfma-ld64.c",
283 "src/f32-gemminc/6x8-neonfma-ld64.c",
284 "src/f32-hswish/neonfma.c",
285 "src/f32-ppmm/4x8-neonfma.c",
286 "src/f32-ppmm/8x8-neonfma.c",
287 "src/f32-vmulcaddc/c4-neonfma-x2.c",
288]
289
290AARCH64_NEONFMA_UKERNELS = [
291 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
292 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
293 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
294 "src/f32-dwconv-spchw/3x3p1-neonfma.c",
295 "src/f32-dwconv-spchw/5x5p2-neonfma.c",
296 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
297 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
298 "src/f32-spmm/12x1-neonfma.c",
299 "src/f32-spmm/12x2-neonfma.c",
300 "src/f32-spmm/12x4-neonfma.c",
301 "src/f32-spmm/16x1-neonfma-pipelined.c",
302 "src/f32-spmm/16x1-neonfma-unroll2.c",
303 "src/f32-spmm/16x1-neonfma.c",
304 "src/f32-spmm/16x2-neonfma.c",
305 "src/f32-spmm/16x4-neonfma.c",
306 "src/f32-spmm/4x1-neonfma-pipelined.c",
307 "src/f32-spmm/4x1-neonfma-unroll2.c",
308 "src/f32-spmm/4x1-neonfma.c",
309 "src/f32-spmm/4x2-neonfma.c",
310 "src/f32-spmm/4x4-neonfma.c",
311 "src/f32-spmm/8x1-neonfma-pipelined.c",
312 "src/f32-spmm/8x1-neonfma-unroll2.c",
313 "src/f32-spmm/8x1-neonfma.c",
314 "src/f32-spmm/8x2-neonfma.c",
315 "src/f32-spmm/8x4-neonfma.c",
316]
317
318AARCH64_NEONFP16ARITH_UKERNELS = [
319 "src/f16-gemm/4x8-neonfp16arith-ld64.c",
320 "src/f16-gemm/6x8-neonfp16arith-ld64.c",
321 "src/f16-gemm/8x8-neonfp16arith-ld64.c",
322]
323
324SSE_UKERNELS = [
325 "src/f32-avgpool/mp9p8q-sse.c",
326 "src/f32-avgpool/up9-sse.c",
327 "src/f32-clamp/sse.c",
328 "src/f32-igemm/1x8-sse-dup.c",
329 "src/f32-igemm/1x8-sse-load1.c",
330 "src/f32-igemm/1x8s4-sse.c",
331 "src/f32-igemm/4x2c4-sse.c",
332 "src/f32-igemm/4x8-sse-dup.c",
333 "src/f32-igemm/4x8-sse-load1.c",
334 "src/f32-igemm/4x8s4-sse.c",
335 "src/f32-dwconv/up4x25-sse.c",
336 "src/f32-dwconv/up4x4-sse.c",
337 "src/f32-dwconv/up4x9-sse.c",
338 "src/f32-gavgpool-spchw/sse-x4.c",
339 "src/f32-gavgpool/mp7p7q-sse.c",
340 "src/f32-gavgpool/up7-sse.c",
341 "src/f32-gemm/1x8-sse-dup.c",
342 "src/f32-gemm/1x8-sse-load1.c",
343 "src/f32-gemm/1x8s4-sse.c",
344 "src/f32-gemm/4x8-sse-dup.c",
345 "src/f32-gemm/4x8-sse-load1.c",
346 "src/f32-gemm/4x8s4-sse.c",
347 "src/f32-gemminc/1x8-sse-dup.c",
348 "src/f32-gemminc/1x8-sse-load1.c",
349 "src/f32-gemminc/1x8s4-sse.c",
350 "src/f32-gemminc/4x8-sse-dup.c",
351 "src/f32-gemminc/4x8-sse-load1.c",
352 "src/f32-gemminc/4x8s4-sse.c",
353 "src/f32-hswish/sse.c",
354 "src/f32-maxpool/9p8q-sse.c",
355 "src/f32-pavgpool/mp9p8q-sse.c",
356 "src/f32-pavgpool/up9-sse.c",
357 "src/f32-dwconv-spchw/3x3p1-sse.c",
358 "src/f32-dwconv-spchw/3x3s2p1-sse.c",
359 "src/f32-ppmm/4x8-sse.c",
360 "src/f32-prelu/x4-sse.c",
361 "src/f32-rmax/sse.c",
362 "src/f32-spmm/4x1-sse.c",
363 "src/f32-spmm/8x1-sse.c",
364 "src/f32-vadd/sse.c",
365 "src/f32-vmul/sse.c",
366 "src/f32-vmulcaddc/c4-sse-x2.c",
367 "src/f32-vsub/sse.c",
368 "src/x32-packx/x4-sse.c",
369]
370
371SSE2_UKERNELS = [
372 "src/f32-argmaxpool/mp9p8q-sse2.c",
373 "src/f32-argmaxpool/up4-sse2.c",
374 "src/f32-argmaxpool/up9-sse2.c",
375 "src/q8-avgpool/mp9p8q-sse2.c",
376 "src/q8-avgpool/up9-sse2.c",
377 "src/q8-igemm/4x4c2-sse2.c",
378 "src/q8-dwconv/up8x9-sse2.c",
379 "src/q8-gavgpool/mp7p7q-sse2.c",
380 "src/q8-gavgpool/up7-sse2.c",
381 "src/q8-gemm/2x4c8-sse2.c",
382 "src/q8-gemm/4x4c2-sse2.c",
383 "src/q8-vadd/sse2.c",
384 "src/u8-clamp/sse2.c",
385 "src/u8-maxpool/9p8q-sse2.c",
386 "src/u8-rmax/sse2.c",
387 "src/x32-pad/x2-sse2.c",
388 "src/x32-zip/x2-sse2.c",
389 "src/x32-zip/x3-sse2.c",
390 "src/x32-zip/x4-sse2.c",
391 "src/x32-zip/xm-sse2.c",
392 "src/x8-zip/x2-sse2.c",
393 "src/x8-zip/x3-sse2.c",
394 "src/x8-zip/x4-sse2.c",
395 "src/x8-zip/xm-sse2.c",
396]
397
398AVX_UKERNELS = [
399 "src/f32-rmax/avx.c",
400]
401
402AVX512F_UKERNELS = [
403 "src/f32-rmax/avx512f.c",
404]
405
406AARCH32_ASM_UKERNELS = [
407 "src/q8-dwconv/up8x9-aarch32-neon.S",
408]
409
410AARCH64_ASM_UKERNELS = [
411 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
412 "src/f32-dwconv/up4x9-aarch64-neonfma.S",
413 "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
414 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
415 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
416 "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
417 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
418 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
419 "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
420 "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
421 "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
422 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
423 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
424 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
425 "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
426 "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
427 "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
428 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
429 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
430 "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
431 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
432 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
433 "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
434 "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
435 "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
436 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
437 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
438 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
439 "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
440 "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
441 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
442 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
443 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
444 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
445 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
446 "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
447 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
448 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
449 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
450]
451
452INTERNAL_MICROKERNEL_HDRS = [
453 "src/xnnpack/argmaxpool.h",
454 "src/xnnpack/avgpool.h",
455 "src/xnnpack/clamp.h",
456 "src/xnnpack/common.h",
457 "src/xnnpack/conv.h",
458 "src/xnnpack/dwconv.h",
459 "src/xnnpack/gavgpool.h",
460 "src/xnnpack/gemm.h",
461 "src/xnnpack/hswish.h",
462 "src/xnnpack/igemm.h",
463 "src/xnnpack/lut.h",
464 "src/xnnpack/math.h",
465 "src/xnnpack/maxpool.h",
466 "src/xnnpack/packx.h",
467 "src/xnnpack/pad.h",
468 "src/xnnpack/params.h",
469 "src/xnnpack/pavgpool.h",
470 "src/xnnpack/ppmm.h",
471 "src/xnnpack/prelu.h",
472 "src/xnnpack/rmax.h",
473 "src/xnnpack/scalar-utils.h",
474 "src/xnnpack/spmm.h",
475 "src/xnnpack/unpool.h",
476 "src/xnnpack/vadd.h",
477 "src/xnnpack/vmul.h",
478 "src/xnnpack/vmulcaddc.h",
479 "src/xnnpack/vsub.h",
480 "src/xnnpack/zip.h",
481]
482
483INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
484 "include/xnnpack.h",
485 "src/xnnpack/allocator.h",
486 "src/xnnpack/compute.h",
487 "src/xnnpack/im2col.h",
488 "src/xnnpack/indirection.h",
489 "src/xnnpack/log.h",
490 "src/xnnpack/operator.h",
491 "src/xnnpack/pack.h",
492 "src/xnnpack/requantization.h",
493 "src/xnnpack/requantization-stubs.h",
494]
495
496MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
497 "src/xnnpack/requantization.h",
498 "include/xnnpack.h",
499]
500
501MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
502 "src/xnnpack/isa-checks.h",
503 "src/xnnpack/requantization.h",
504 "include/xnnpack.h",
505]
506
507OPERATOR_TEST_PARAMS_HDRS = [
508 "src/xnnpack/params.h",
509 "src/xnnpack/common.h",
510]
511
512WEIGHTS_PACK_HDRS = [
513 "src/xnnpack/pack.h",
514 "src/xnnpack/operator.h",
515 "src/xnnpack/compute.h",
516]
517
518xnnpack_cc_library(
519 name = "scalar_ukernels",
520 srcs = SCALAR_UKERNELS,
521 hdrs = INTERNAL_HDRS,
522 aarch32_copts = ["-marm"],
523 copts = xnnpack_std_copts(),
524 deps = [
525 "@FP16",
526 "@FXdiv",
527 ],
528)
529
530xnnpack_cc_library(
531 name = "psimd_ukernels",
532 srcs = PSIMD_UKERNELS,
533 hdrs = INTERNAL_HDRS,
534 aarch32_copts = [
535 "-marm",
536 "-mfpu=neon",
537 ],
538 copts = xnnpack_std_copts(),
539 optimized_copts = [
540 "-O3",
541 "-ffast-math",
542 ],
543 deps = [
544 "@FP16",
545 "@psimd",
546 ],
547)
548
549xnnpack_cc_library(
550 name = "neon_ukernels",
551 hdrs = INTERNAL_HDRS,
552 aarch32_copts = [
553 "-marm",
554 "-mfpu=neon",
555 ],
556 aarch32_srcs = NEON_UKERNELS,
557 aarch64_srcs = NEON_UKERNELS,
558 copts = xnnpack_std_copts(),
559 deps = ["@FP16"],
560)
561
562xnnpack_cc_library(
563 name = "neonfma_ukernels",
564 hdrs = INTERNAL_HDRS,
565 aarch32_copts = [
566 "-marm",
567 "-mfpu=neon-vfpv4",
568 ],
569 aarch32_srcs = NEONFMA_UKERNELS,
570 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
571 copts = xnnpack_std_copts(),
572 deps = ["@FP16"],
573)
574
575xnnpack_cc_library(
576 name = "neonfp16arith_ukernels",
577 hdrs = INTERNAL_HDRS,
578 aarch64_copts = ["-march=armv8.2-a+fp16"],
579 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
580 copts = xnnpack_std_copts(),
581 deps = ["@FP16"],
582)
583
584xnnpack_cc_library(
585 name = "sse2_ukernels",
586 hdrs = INTERNAL_HDRS,
587 copts = xnnpack_std_copts(),
588 x86_copts = ["-msse2"],
589 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
590 deps = ["@FP16"],
591)
592
593xnnpack_cc_library(
594 name = "avx_ukernels",
595 hdrs = INTERNAL_HDRS,
596 copts = xnnpack_std_copts(),
597 x86_copts = ["-mavx"],
598 x86_srcs = AVX_UKERNELS,
599 deps = ["@FP16"],
600)
601
602xnnpack_cc_library(
603 name = "avx512f_ukernels",
604 hdrs = INTERNAL_HDRS,
605 copts = xnnpack_std_copts(),
606 x86_copts = ["-mavx512f"],
607 x86_srcs = AVX512F_UKERNELS,
608 deps = ["@FP16"],
609)
610
611xnnpack_cc_library(
612 name = "asm_ukernels",
613 hdrs = ["src/xnnpack/assembly.h"],
614 aarch32_srcs = AARCH32_ASM_UKERNELS,
615 aarch64_srcs = AARCH64_ASM_UKERNELS,
616)
617
618xnnpack_aggregate_library(
619 name = "ukernels",
620 aarch32_deps = [
621 ":psimd_ukernels",
622 ":neon_ukernels",
623 ":neonfma_ukernels",
624 ":asm_ukernels",
625 ],
626 aarch64_deps = [
627 ":psimd_ukernels",
628 ":neon_ukernels",
629 ":neonfma_ukernels",
630 ":neonfp16arith_ukernels",
631 ":asm_ukernels",
632 ],
633 generic_deps = [":scalar_ukernels"],
634 wasmsimd_deps = [
635 ":psimd_ukernels",
636 ],
637 x86_deps = [
638 ":psimd_ukernels",
639 ":sse2_ukernels",
640 ":avx_ukernels",
641 ":avx512f_ukernels",
642 ],
643)
644
645xnnpack_cc_library(
646 name = "im2col",
647 srcs = ["src/im2col.c"],
648 hdrs = [
649 "src/xnnpack/common.h",
650 "src/xnnpack/im2col.h",
651 ],
652 copts = xnnpack_std_copts(),
653)
654
655xnnpack_cc_library(
656 name = "indirection",
657 srcs = ["src/indirection.c"],
658 hdrs = INTERNAL_HDRS,
659 copts = xnnpack_std_copts(),
660 deps = [
661 "@FP16",
662 "@FXdiv",
663 "@pthreadpool",
664 ],
665)
666
667xnnpack_cc_library(
668 name = "operator_run",
669 srcs = ["src/operator-run.c"],
670 hdrs = INTERNAL_HDRS,
671 copts = xnnpack_std_copts() + [
672 # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
673 "-Wno-vla",
674 ],
675 deps = [
676 "@FP16",
677 "@FXdiv",
678 "@clog",
679 "@pthreadpool",
680 ],
681)
682
683cc_library(
684 name = "enable_assembly",
685 defines = select({
686 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
687 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
688 "//conditions:default": [],
689 }),
690)
691
692cc_library(
693 name = "operators",
694 srcs = OPERATOR_SRCS + [
695 "src/init.c",
696 "src/operator-delete.c",
697 ] + select({
698 ":emscripten_wasm": ["src/wasm-stubs.c"],
699 "//conditions:default": [],
700 }),
701 copts = xnnpack_std_copts() + [
702 "-Isrc",
703 "-Iinclude",
704 ] + select({
705 ":debug_build": [],
706 "//conditions:default": xnnpack_min_size_copts(),
707 }),
708 linkstatic = True,
709 textual_hdrs = INTERNAL_HDRS,
710 deps = [
711 ":enable_assembly",
712 ":indirection",
713 ":ukernels",
714 "@FP16",
715 "@FXdiv",
716 "@clog",
717 "@cpuinfo",
718 "@pthreadpool",
719 ],
720)
721
722cc_library(
723 name = "XNNPACK",
724 hdrs = ["include/xnnpack.h"],
725 includes = ["include"],
726 linkstatic = True,
Marat Dukhan08c4a432019-10-03 09:29:21 -0700727 visibility = xnnpack_visibility(),
728 deps = [
729 ":operator_run",
730 ":operators",
731 "@pthreadpool",
732 ],
733)
734
735cc_library(
736 name = "bench_utils",
737 srcs = ["bench/utils.cc"],
738 hdrs = ["bench/utils.h"],
739 copts = ["-Wno-unused-result"],
740 linkstatic = True,
741 deps = ["@cpuinfo"],
742)
743
744######################### Unit tests for micro-kernels #########################
745
746xnnpack_benchmark(
747 name = "q8_gemm_bench",
748 srcs = [
749 "bench/gemm.h",
750 "bench/q8-gemm.cc",
751 "src/xnnpack/AlignedAllocator.h",
752 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
753 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
754 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
755)
756
757xnnpack_benchmark(
758 name = "f16_gemm_bench",
759 srcs = [
760 "bench/f16-gemm.cc",
761 "bench/gemm.h",
762 "src/xnnpack/AlignedAllocator.h",
763 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
764 copts = ["-Wno-unused-function"],
765 deps = MICROKERNEL_BENCHMARK_DEPS,
766)
767
768xnnpack_benchmark(
769 name = "f32_igemm_bench",
770 srcs = [
771 "bench/f32-igemm.cc",
772 "bench/conv.h",
773 "src/xnnpack/AlignedAllocator.h",
774 ] + MICROKERNEL_BENCHMARK_HDRS,
775 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
776)
777
778xnnpack_benchmark(
779 name = "f32_conv_hwc_bench",
780 srcs = [
781 "bench/f32-conv-hwc.cc",
782 "bench/dconv.h",
783 "src/xnnpack/AlignedAllocator.h",
784 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
785 copts = ["-Wno-unused-function"],
786 deps = MICROKERNEL_BENCHMARK_DEPS,
787)
788
789xnnpack_benchmark(
790 name = "f32_dwconv_bench",
791 srcs = [
792 "bench/f32-dwconv.cc",
793 "bench/dwconv.h",
794 "src/xnnpack/AlignedAllocator.h",
795 ] + MICROKERNEL_BENCHMARK_HDRS,
796 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
797)
798
799xnnpack_benchmark(
800 name = "f32_dwconv_spchw_bench",
801 srcs = [
802 "bench/f32-dwconv-spchw.cc",
803 "bench/dwconv.h",
804 "src/xnnpack/AlignedAllocator.h",
805 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
806 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
807)
808
809xnnpack_benchmark(
810 name = "f32_gemm_bench",
811 srcs = [
812 "bench/f32-gemm.cc",
813 "bench/gemm.h",
814 "src/xnnpack/AlignedAllocator.h",
815 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
816 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
817 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
818)
819
820xnnpack_benchmark(
821 name = "f32_rmax_bench",
822 srcs = [
823 "bench/f32-rmax.cc",
824 "src/xnnpack/AlignedAllocator.h",
825 ] + MICROKERNEL_BENCHMARK_HDRS,
826 deps = MICROKERNEL_BENCHMARK_DEPS,
827)
828
829xnnpack_benchmark(
830 name = "f32_spmm_bench",
831 srcs = [
832 "bench/f32-spmm.cc",
833 "bench/gemm.h",
834 "src/xnnpack/AlignedAllocator.h",
835 ] + MICROKERNEL_BENCHMARK_HDRS,
836 copts = ["-Wno-unused-function"],
837 deps = MICROKERNEL_BENCHMARK_DEPS,
838)
839
840xnnpack_benchmark(
841 name = "f32_im2col_gemm_bench",
842 srcs = [
843 "bench/f32-im2col-gemm.cc",
844 "bench/conv.h",
845 "src/xnnpack/AlignedAllocator.h",
846 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
847 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
848)
849
850########################### Benchmarks for operators ###########################
851
852xnnpack_benchmark(
853 name = "add_bench",
854 srcs = ["bench/add.cc"],
855 deps = OPERATOR_BENCHMARK_DEPS,
856)
857
858xnnpack_benchmark(
859 name = "average_pooling_bench",
860 srcs = ["bench/average-pooling.cc"],
861 deps = OPERATOR_BENCHMARK_DEPS,
862)
863
864xnnpack_benchmark(
865 name = "channel_shuffle_bench",
866 srcs = ["bench/channel-shuffle.cc"],
867 deps = OPERATOR_BENCHMARK_DEPS,
868)
869
870xnnpack_benchmark(
871 name = "convolution_bench",
872 srcs = ["bench/convolution.cc"],
873 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
874 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
875)
876
877xnnpack_benchmark(
878 name = "deconvolution_bench",
879 srcs = ["bench/deconvolution.cc"],
880 copts = xnnpack_optional_tflite_copts(),
881 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
882)
883
884xnnpack_benchmark(
885 name = "global_average_pooling_bench",
886 srcs = ["bench/global-average-pooling.cc"],
887 deps = OPERATOR_BENCHMARK_DEPS,
888)
889
890xnnpack_benchmark(
891 name = "max_pooling_bench",
892 srcs = ["bench/max-pooling.cc"],
893 deps = OPERATOR_BENCHMARK_DEPS,
894)
895
896xnnpack_benchmark(
897 name = "sigmoid_bench",
898 srcs = ["bench/sigmoid.cc"],
899 deps = OPERATOR_BENCHMARK_DEPS,
900)
901
902xnnpack_benchmark(
903 name = "softargmax_bench",
904 srcs = ["bench/softargmax.cc"],
905 deps = OPERATOR_BENCHMARK_DEPS,
906)
907
908######################### Unit tests for micro-kernels #########################
909
910xnnpack_unit_test(
911 name = "f16_gemm_test",
912 srcs = [
913 "test/f16-gemm.cc",
914 "test/gemm-microkernel-tester.h",
915 "src/xnnpack/AlignedAllocator.h",
916 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
917 deps = MICROKERNEL_TEST_DEPS,
918)
919
920xnnpack_unit_test(
921 name = "f32_argmaxpool_test",
922 srcs = [
923 "test/f32-argmaxpool.cc",
924 "test/argmaxpool-microkernel-tester.h",
925 "src/xnnpack/AlignedAllocator.h",
926 ] + MICROKERNEL_TEST_HDRS,
927 deps = MICROKERNEL_TEST_DEPS,
928)
929
930xnnpack_unit_test(
931 name = "f32_avgpool_test",
932 srcs = [
933 "test/f32-avgpool.cc",
934 "test/avgpool-microkernel-tester.h",
935 "src/xnnpack/AlignedAllocator.h",
936 ] + MICROKERNEL_TEST_HDRS,
937 deps = MICROKERNEL_TEST_DEPS,
938)
939
940xnnpack_unit_test(
941 name = "f32_clamp_test",
942 srcs = [
943 "test/f32-clamp.cc",
944 "test/clamp-microkernel-tester.h",
945 ] + MICROKERNEL_TEST_HDRS,
946 deps = MICROKERNEL_TEST_DEPS,
947)
948
949xnnpack_unit_test(
950 name = "f32_igemm_test",
951 srcs = [
952 "test/f32-igemm.cc",
953 "test/gemm-microkernel-tester.h",
954 "src/xnnpack/AlignedAllocator.h",
955 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
956 deps = MICROKERNEL_TEST_DEPS,
957)
958
959xnnpack_unit_test(
960 name = "f32_conv_hwc_test",
961 srcs = [
962 "test/f32-conv-hwc.cc",
963 "test/conv-hwc-microkernel-tester.h",
964 "src/xnnpack/AlignedAllocator.h",
965 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
966 deps = MICROKERNEL_TEST_DEPS,
967)
968
969xnnpack_unit_test(
970 name = "f32_conv_hwc2spchw_test",
971 srcs = [
972 "test/f32-conv-hwc2spchw.cc",
973 "test/conv-hwc2spchw-microkernel-tester.h",
974 "src/xnnpack/AlignedAllocator.h",
975 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
976 deps = MICROKERNEL_TEST_DEPS,
977)
978
979xnnpack_unit_test(
980 name = "f32_dwconv_test",
981 srcs = [
982 "test/f32-dwconv.cc",
983 "test/dwconv-microkernel-tester.h",
984 "src/xnnpack/AlignedAllocator.h",
985 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
986 deps = MICROKERNEL_TEST_DEPS,
987)
988
989xnnpack_unit_test(
990 name = "f32_dwconv_spchw_test",
991 srcs = [
992 "test/f32-dwconv-spchw.cc",
993 "test/dwconv-spchw-microkernel-tester.h",
994 "src/xnnpack/AlignedAllocator.h",
995 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
996 deps = MICROKERNEL_TEST_DEPS,
997)
998
999xnnpack_unit_test(
1000 name = "f32_gavgpool_test",
1001 srcs = [
1002 "test/f32-gavgpool.cc",
1003 "test/gavgpool-microkernel-tester.h",
1004 "src/xnnpack/AlignedAllocator.h",
1005 ] + MICROKERNEL_TEST_HDRS,
1006 deps = MICROKERNEL_TEST_DEPS,
1007)
1008
1009xnnpack_unit_test(
1010 name = "f32_gavgpool_spchw_test",
1011 srcs = [
1012 "test/f32-gavgpool-spchw.cc",
1013 "test/gavgpool-spchw-microkernel-tester.h",
1014 "src/xnnpack/AlignedAllocator.h",
1015 ] + MICROKERNEL_TEST_HDRS,
1016 deps = MICROKERNEL_TEST_DEPS,
1017)
1018
1019xnnpack_unit_test(
1020 name = "f32_gemm_test",
1021 srcs = [
1022 "test/f32-gemm.cc",
1023 "test/gemm-microkernel-tester.h",
1024 "src/xnnpack/AlignedAllocator.h",
1025 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1026 deps = MICROKERNEL_TEST_DEPS,
1027)
1028
1029xnnpack_unit_test(
1030 name = "f32_gemminc_test",
1031 srcs = [
1032 "test/f32-gemminc.cc",
1033 "test/gemm-microkernel-tester.h",
1034 "src/xnnpack/AlignedAllocator.h",
1035 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1036 deps = MICROKERNEL_TEST_DEPS,
1037)
1038
1039xnnpack_unit_test(
1040 name = "f32_hswish_test",
1041 srcs = [
1042 "test/f32-hswish.cc",
1043 "test/hswish-microkernel-tester.h",
1044 ] + MICROKERNEL_TEST_HDRS,
1045 deps = MICROKERNEL_TEST_DEPS,
1046)
1047
1048xnnpack_unit_test(
1049 name = "f32_maxpool_test",
1050 srcs = [
1051 "test/f32-maxpool.cc",
1052 "test/maxpool-microkernel-tester.h",
1053 ] + MICROKERNEL_TEST_HDRS,
1054 deps = MICROKERNEL_TEST_DEPS,
1055)
1056
1057xnnpack_unit_test(
1058 name = "f32_pavgpool_test",
1059 srcs = [
1060 "test/f32-pavgpool.cc",
1061 "test/avgpool-microkernel-tester.h",
1062 "src/xnnpack/AlignedAllocator.h",
1063 ] + MICROKERNEL_TEST_HDRS,
1064 deps = MICROKERNEL_TEST_DEPS,
1065)
1066
1067xnnpack_unit_test(
1068 name = "f32_ppmm_test",
1069 srcs = [
1070 "test/f32-ppmm.cc",
1071 "test/gemm-microkernel-tester.h",
1072 "src/xnnpack/AlignedAllocator.h",
1073 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1074 deps = MICROKERNEL_TEST_DEPS,
1075)
1076
1077xnnpack_unit_test(
1078 name = "f32_prelu_test",
1079 srcs = [
1080 "test/f32-prelu.cc",
1081 "test/prelu-microkernel-tester.h",
1082 "src/xnnpack/AlignedAllocator.h",
1083 ] + MICROKERNEL_TEST_HDRS,
1084 deps = MICROKERNEL_TEST_DEPS,
1085)
1086
1087xnnpack_unit_test(
1088 name = "f32_rmax_test",
1089 srcs = [
1090 "test/f32-rmax.cc",
1091 "test/rmax-microkernel-tester.h",
1092 ] + MICROKERNEL_TEST_HDRS,
1093 deps = MICROKERNEL_TEST_DEPS,
1094)
1095
1096xnnpack_unit_test(
1097 name = "f32_spmm_test",
1098 srcs = [
1099 "test/f32-spmm.cc",
1100 "test/spmm-microkernel-tester.h",
1101 "src/xnnpack/AlignedAllocator.h",
1102 ] + MICROKERNEL_TEST_HDRS,
1103 deps = MICROKERNEL_TEST_DEPS,
1104)
1105
1106xnnpack_unit_test(
1107 name = "f32_vadd_test",
1108 srcs = [
1109 "test/f32-vadd.cc",
1110 "test/vadd-microkernel-tester.h",
1111 ] + MICROKERNEL_TEST_HDRS,
1112 deps = MICROKERNEL_TEST_DEPS,
1113)
1114
1115xnnpack_unit_test(
1116 name = "f32_vsub_test",
1117 srcs = [
1118 "test/f32-vsub.cc",
1119 "test/vsub-microkernel-tester.h",
1120 ] + MICROKERNEL_TEST_HDRS,
1121 deps = MICROKERNEL_TEST_DEPS,
1122)
1123
1124xnnpack_unit_test(
1125 name = "f32_vmul_test",
1126 srcs = [
1127 "test/f32-vmul.cc",
1128 "test/vmul-microkernel-tester.h",
1129 ] + MICROKERNEL_TEST_HDRS,
1130 deps = MICROKERNEL_TEST_DEPS,
1131)
1132
1133xnnpack_unit_test(
1134 name = "f32_vmulcaddc_test",
1135 srcs = [
1136 "test/f32-vmulcaddc.cc",
1137 "test/vmulcaddc-microkernel-tester.h",
1138 "src/xnnpack/AlignedAllocator.h",
1139 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1140 deps = MICROKERNEL_TEST_DEPS,
1141)
1142
1143xnnpack_unit_test(
1144 name = "q8_avgpool_test",
1145 srcs = [
1146 "test/q8-avgpool.cc",
1147 "test/avgpool-microkernel-tester.h",
1148 "src/xnnpack/AlignedAllocator.h",
1149 ] + MICROKERNEL_TEST_HDRS,
1150 deps = MICROKERNEL_TEST_DEPS,
1151)
1152
1153xnnpack_unit_test(
1154 name = "q8_igemm_test",
1155 srcs = [
1156 "test/q8-igemm.cc",
1157 "test/gemm-microkernel-tester.h",
1158 "src/xnnpack/AlignedAllocator.h",
1159 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1160 deps = MICROKERNEL_TEST_DEPS,
1161)
1162
1163xnnpack_unit_test(
1164 name = "q8_dwconv_test",
1165 srcs = [
1166 "test/q8-dwconv.cc",
1167 "test/dwconv-microkernel-tester.h",
1168 "src/xnnpack/AlignedAllocator.h",
1169 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1170 deps = MICROKERNEL_TEST_DEPS,
1171)
1172
1173xnnpack_unit_test(
1174 name = "q8_gavgpool_test",
1175 srcs = [
1176 "test/q8-gavgpool.cc",
1177 "test/gavgpool-microkernel-tester.h",
1178 "src/xnnpack/AlignedAllocator.h",
1179 ] + MICROKERNEL_TEST_HDRS,
1180 deps = MICROKERNEL_TEST_DEPS,
1181)
1182
1183xnnpack_unit_test(
1184 name = "q8_gemm_test",
1185 srcs = [
1186 "test/q8-gemm.cc",
1187 "test/gemm-microkernel-tester.h",
1188 "src/xnnpack/AlignedAllocator.h",
1189 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1190 deps = MICROKERNEL_TEST_DEPS,
1191)
1192
1193xnnpack_unit_test(
1194 name = "q8_vadd_test",
1195 srcs = [
1196 "test/q8-vadd.cc",
1197 "test/vadd-microkernel-tester.h",
1198 ] + MICROKERNEL_TEST_HDRS,
1199 deps = MICROKERNEL_TEST_DEPS,
1200)
1201
1202xnnpack_unit_test(
1203 name = "u8_clamp_test",
1204 srcs = [
1205 "test/u8-clamp.cc",
1206 "test/clamp-microkernel-tester.h",
1207 ] + MICROKERNEL_TEST_HDRS,
1208 deps = MICROKERNEL_TEST_DEPS,
1209)
1210
1211xnnpack_unit_test(
1212 name = "u8_lut32norm_test",
1213 srcs = [
1214 "test/u8-lut32norm.cc",
1215 "test/lut-norm-microkernel-tester.h",
1216 ] + MICROKERNEL_TEST_HDRS,
1217 deps = MICROKERNEL_TEST_DEPS,
1218)
1219
1220xnnpack_unit_test(
1221 name = "u8_maxpool_test",
1222 srcs = [
1223 "test/u8-maxpool.cc",
1224 "test/maxpool-microkernel-tester.h",
1225 ] + MICROKERNEL_TEST_HDRS,
1226 deps = MICROKERNEL_TEST_DEPS,
1227)
1228
1229xnnpack_unit_test(
1230 name = "u8_rmax_test",
1231 srcs = [
1232 "test/u8-rmax.cc",
1233 "test/rmax-microkernel-tester.h",
1234 ] + MICROKERNEL_TEST_HDRS,
1235 deps = MICROKERNEL_TEST_DEPS,
1236)
1237
1238xnnpack_unit_test(
1239 name = "x32_packx_test",
1240 srcs = [
1241 "test/x32-packx.cc",
1242 "test/pack-microkernel-tester.h",
1243 "src/xnnpack/AlignedAllocator.h",
1244 ] + MICROKERNEL_TEST_HDRS,
1245 deps = MICROKERNEL_TEST_DEPS,
1246)
1247
1248xnnpack_unit_test(
1249 name = "x32_pad_test",
1250 srcs = [
1251 "test/x32-pad.cc",
1252 "test/pad-microkernel-tester.h",
1253 ] + MICROKERNEL_TEST_HDRS,
1254 deps = MICROKERNEL_TEST_DEPS,
1255)
1256
1257xnnpack_unit_test(
1258 name = "x32_unpool_test",
1259 srcs = [
1260 "test/x32-unpool.cc",
1261 "test/unpool-microkernel-tester.h",
1262 ] + MICROKERNEL_TEST_HDRS,
1263 deps = MICROKERNEL_TEST_DEPS,
1264)
1265
1266xnnpack_unit_test(
1267 name = "x32_zip_test",
1268 srcs = [
1269 "test/x32-zip.cc",
1270 "test/zip-microkernel-tester.h",
1271 ] + MICROKERNEL_TEST_HDRS,
1272 deps = MICROKERNEL_TEST_DEPS,
1273)
1274
1275xnnpack_unit_test(
1276 name = "x8_lut_test",
1277 srcs = [
1278 "test/x8-lut.cc",
1279 "test/lut-microkernel-tester.h",
1280 ] + MICROKERNEL_TEST_HDRS,
1281 deps = MICROKERNEL_TEST_DEPS,
1282)
1283
1284xnnpack_unit_test(
1285 name = "x8_zip_test",
1286 srcs = [
1287 "test/x8-zip.cc",
1288 "test/zip-microkernel-tester.h",
1289 ] + MICROKERNEL_TEST_HDRS,
1290 deps = MICROKERNEL_TEST_DEPS,
1291)
1292
1293########################### Size test for the library ##########################
1294
1295xnnpack_binary(
1296 name = "size_test",
1297 srcs = ["test/size.c"],
1298 deps = [":XNNPACK"],
1299)
1300
1301########################### Unit tests for operators ###########################
1302
1303xnnpack_unit_test(
1304 name = "add_test",
1305 srcs = [
1306 "test/add.cc",
1307 "test/add-operator-tester.h",
1308 ],
1309 deps = OPERATOR_TEST_DEPS,
1310)
1311
1312xnnpack_unit_test(
1313 name = "argmax_pooling_test",
1314 srcs = [
1315 "test/argmax-pooling.cc",
1316 "test/argmax-pooling-operator-tester.h",
1317 ] + OPERATOR_TEST_PARAMS_HDRS,
1318 deps = OPERATOR_TEST_DEPS,
1319)
1320
1321xnnpack_unit_test(
1322 name = "average_pooling_test",
1323 srcs = [
1324 "test/average-pooling.cc",
1325 "test/average-pooling-operator-tester.h",
1326 ] + OPERATOR_TEST_PARAMS_HDRS,
1327 deps = OPERATOR_TEST_DEPS,
1328)
1329
1330xnnpack_unit_test(
1331 name = "channel_pad_test",
1332 srcs = [
1333 "test/channel-pad.cc",
1334 "test/channel-pad-operator-tester.h",
1335 ] + OPERATOR_TEST_PARAMS_HDRS,
1336 deps = OPERATOR_TEST_DEPS,
1337)
1338
1339xnnpack_unit_test(
1340 name = "channel_shuffle_test",
1341 srcs = [
1342 "test/channel-shuffle.cc",
1343 "test/channel-shuffle-operator-tester.h",
1344 ],
1345 deps = OPERATOR_TEST_DEPS,
1346)
1347
1348xnnpack_unit_test(
1349 name = "clamp_test",
1350 srcs = [
1351 "test/clamp.cc",
1352 "test/clamp-operator-tester.h",
1353 ],
1354 deps = OPERATOR_TEST_DEPS,
1355)
1356
1357xnnpack_unit_test(
1358 name = "convolution_test",
1359 srcs = [
1360 "test/convolution.cc",
1361 "test/convolution-operator-tester.h",
1362 ],
1363 deps = OPERATOR_TEST_DEPS,
1364)
1365
1366xnnpack_unit_test(
1367 name = "convolution_spnchw_test",
1368 srcs = [
1369 "test/convolution-spnchw.cc",
1370 "test/convolution-spnchw-operator-tester.h",
1371 ],
1372 deps = OPERATOR_TEST_DEPS,
1373)
1374
1375xnnpack_unit_test(
1376 name = "deconvolution_test",
1377 srcs = [
1378 "test/deconvolution.cc",
1379 "test/deconvolution-operator-tester.h",
1380 ] + OPERATOR_TEST_PARAMS_HDRS,
1381 deps = OPERATOR_TEST_DEPS,
1382)
1383
1384xnnpack_unit_test(
1385 name = "fully_connected_test",
1386 srcs = [
1387 "test/fully-connected.cc",
1388 "test/fully-connected-operator-tester.h",
1389 ],
1390 deps = OPERATOR_TEST_DEPS,
1391)
1392
1393xnnpack_unit_test(
1394 name = "global_average_pooling_test",
1395 srcs = [
1396 "test/global-average-pooling.cc",
1397 "test/global-average-pooling-operator-tester.h",
1398 ] + OPERATOR_TEST_PARAMS_HDRS,
1399 deps = OPERATOR_TEST_DEPS,
1400)
1401
1402xnnpack_unit_test(
1403 name = "global_average_pooling_spnchw_test",
1404 srcs = [
1405 "test/global-average-pooling-spnchw.cc",
1406 "test/global-average-pooling-spnchw-operator-tester.h",
1407 ],
1408 deps = OPERATOR_TEST_DEPS,
1409)
1410
1411xnnpack_unit_test(
1412 name = "hardswish_test",
1413 srcs = [
1414 "test/hardswish.cc",
1415 "test/hardswish-operator-tester.h",
1416 ],
1417 deps = OPERATOR_TEST_DEPS,
1418)
1419
1420xnnpack_unit_test(
1421 name = "leaky_relu_test",
1422 srcs = [
1423 "test/leaky-relu.cc",
1424 "test/leaky-relu-operator-tester.h",
1425 ],
1426 deps = OPERATOR_TEST_DEPS,
1427)
1428
1429xnnpack_unit_test(
1430 name = "max_pooling_test",
1431 srcs = [
1432 "test/max-pooling.cc",
1433 "test/max-pooling-operator-tester.h",
1434 ] + OPERATOR_TEST_PARAMS_HDRS,
1435 deps = OPERATOR_TEST_DEPS,
1436)
1437
1438xnnpack_unit_test(
1439 name = "prelu_test",
1440 srcs = [
1441 "test/prelu.cc",
1442 "test/prelu-operator-tester.h",
1443 ] + OPERATOR_TEST_PARAMS_HDRS,
1444 deps = OPERATOR_TEST_DEPS,
1445)
1446
1447xnnpack_unit_test(
1448 name = "sigmoid_test",
1449 srcs = [
1450 "test/sigmoid.cc",
1451 "test/sigmoid-operator-tester.h",
1452 ],
1453 deps = OPERATOR_TEST_DEPS,
1454)
1455
1456xnnpack_unit_test(
1457 name = "softargmax_test",
1458 srcs = [
1459 "test/softargmax.cc",
1460 "test/softargmax-operator-tester.h",
1461 ],
1462 deps = OPERATOR_TEST_DEPS,
1463)
1464
1465xnnpack_unit_test(
1466 name = "unpooling_test",
1467 srcs = [
1468 "test/unpooling.cc",
1469 "test/unpooling-operator-tester.h",
1470 ],
1471 deps = OPERATOR_TEST_DEPS,
1472)
1473
1474############################# Build configurations #############################
1475
1476config_setting(
1477 name = "linux_k8",
1478 values = {
1479 "cpu": "k8",
1480 },
1481)
1482
1483config_setting(
Marat Dukhan4e45e662019-10-03 15:40:24 -07001484 name = "linux_aarch64",
1485 values = {
1486 "cpu": "aarch64",
1487 },
1488)
1489
1490config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001491 name = "android",
1492 values = {"crosstool_top": "//external:android/crosstool"},
1493)
1494
1495config_setting(
1496 name = "android_armv7",
1497 values = {
1498 "crosstool_top": "//external:android/crosstool",
1499 "cpu": "armeabi-v7a",
1500 },
1501)
1502
1503config_setting(
1504 name = "android_arm64",
1505 values = {
1506 "crosstool_top": "//external:android/crosstool",
1507 "cpu": "arm64-v8a",
1508 },
1509)
1510
1511config_setting(
1512 name = "android_x86",
1513 values = {
1514 "crosstool_top": "//external:android/crosstool",
1515 "cpu": "x86",
1516 },
1517)
1518
1519config_setting(
1520 name = "android_x86_64",
1521 values = {
1522 "crosstool_top": "//external:android/crosstool",
1523 "cpu": "x86_64",
1524 },
1525)
1526
1527config_setting(
1528 name = "emscripten",
Marat Dukhan9d056a42019-10-03 12:13:35 -07001529 values = {"crosstool_top": "//toolchain/emscripten"},
Marat Dukhan08c4a432019-10-03 09:29:21 -07001530)
1531
1532config_setting(
1533 name = "emscripten_wasm",
1534 values = {
Marat Dukhan9d056a42019-10-03 12:13:35 -07001535 "crosstool_top": "//toolchain/emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001536 "cpu": "wasm",
1537 },
1538)
1539
1540config_setting(
1541 name = "emscripten_wasmsimd",
1542 values = {
Marat Dukhan9d056a42019-10-03 12:13:35 -07001543 "crosstool_top": "//toolchain/emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001544 "cpu": "wasm",
1545 "features": "wasmsimd",
1546 },
1547)
1548
1549config_setting(
1550 name = "emscripten_asmjs",
1551 values = {
Marat Dukhan9d056a42019-10-03 12:13:35 -07001552 "crosstool_top": "//toolchain/emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001553 "cpu": "asmjs",
1554 },
1555)
1556
1557# Builds with -c opt
1558config_setting(
1559 name = "debug_build",
1560 values = {
1561 "compilation_mode": "dbg",
1562 },
1563 visibility = ["//visibility:public"],
1564)
1565
1566# Builds with -c dbg
1567config_setting(
1568 name = "optimized_build",
1569 values = {
1570 "compilation_mode": "opt",
1571 },
1572 visibility = ["//visibility:public"],
1573)
1574
1575# Enables usage of assembly kernels.
1576config_setting(
1577 name = "xnn_enable_assembly_explicit_true",
1578 define_values = {"xnn_enable_assembly": "true"},
1579)
1580
1581# Disables usage of assembly kernels.
1582config_setting(
1583 name = "xnn_enable_assembly_explicit_false",
1584 define_values = {"xnn_enable_assembly": "false"},
1585)