blob: 7bbec91797bbce29b2a97c85533c192586243f3a [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
9licenses(["notice"])
10
11exports_files(["LICENSE"])
12
13load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
14
15OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
22MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
25 "@cpuinfo",
26 "@FP16",
27 "@pthreadpool",
28]
29
30MICROKERNEL_TEST_DEPS = [
31 ":ukernels",
32 "@cpuinfo",
33 "@FP16",
34 "@pthreadpool",
35]
36
37OPERATOR_TEST_DEPS = [
38 ":XNNPACK",
39 "@pthreadpool",
40 "@FP16",
41]
42
43OPERATOR_SRCS = [
44 "src/add.c",
45 "src/argmax-pooling.c",
46 "src/average-pooling.c",
47 "src/channel-pad.c",
48 "src/channel-shuffle.c",
49 "src/clamp.c",
50 "src/convolution-spnchw.c",
51 "src/convolution.c",
52 "src/deconvolution.c",
53 "src/fully-connected.c",
54 "src/global-average-pooling-spnchw.c",
55 "src/global-average-pooling.c",
56 "src/hardswish.c",
57 "src/leaky-relu.c",
58 "src/max-pooling.c",
59 "src/prelu.c",
60 "src/sigmoid.c",
61 "src/softargmax.c",
62 "src/unpooling.c",
63]
64
65SCALAR_UKERNELS = [
66 "src/f32-argmaxpool/mp9p8q-scalar.c",
67 "src/f32-argmaxpool/up4-scalar.c",
68 "src/f32-argmaxpool/up9-scalar.c",
69 "src/f32-avgpool/mp9p8q-scalar.c",
70 "src/f32-avgpool/up9-scalar.c",
71 "src/f32-clamp/scalar.c",
72 "src/f32-igemm/1x4-scalar.c",
73 "src/f32-igemm/2x4-scalar.c",
74 "src/f32-igemm/4x2-scalar.c",
75 "src/f32-igemm/4x4-scalar.c",
76 "src/f32-dwconv/up1x25-scalar.c",
77 "src/f32-dwconv/up1x4-scalar.c",
78 "src/f32-dwconv/up1x9-scalar.c",
79 "src/f32-gavgpool/mp7p7q-scalar.c",
80 "src/f32-gavgpool/up7-scalar.c",
81 "src/f32-gemm/1x4-scalar.c",
82 "src/f32-gemm/2x4-scalar.c",
83 "src/f32-gemm/4x2-scalar.c",
84 "src/f32-gemm/4x4-scalar.c",
85 "src/f32-gemminc/1x4-scalar.c",
86 "src/f32-gemminc/2x4-scalar.c",
87 "src/f32-gemminc/4x4-scalar.c",
88 "src/f32-hswish/scalar.c",
89 "src/f32-maxpool/9p8q-scalar.c",
90 "src/f32-pavgpool/mp9p8q-scalar.c",
91 "src/f32-pavgpool/up9-scalar.c",
92 "src/f32-ppmm/2x4-scalar.c",
93 "src/f32-ppmm/3x3-scalar.c",
94 "src/f32-ppmm/4x2-scalar.c",
95 "src/f32-ppmm/4x4-scalar.c",
96 "src/f32-prelu/x4-scalar.c",
97 "src/f32-rmax/scalar.c",
98 "src/f32-spmm/1x1-scalar-pipelined.c",
99 "src/f32-spmm/1x1-scalar-unroll2.c",
100 "src/f32-spmm/1x1-scalar.c",
101 "src/f32-spmm/2x1-scalar-pipelined.c",
102 "src/f32-spmm/2x1-scalar-unroll2.c",
103 "src/f32-spmm/2x1-scalar.c",
104 "src/f32-spmm/4x1-scalar-pipelined.c",
105 "src/f32-spmm/4x1-scalar-unroll2.c",
106 "src/f32-spmm/4x1-scalar.c",
107 "src/f32-spmm/8x1-scalar-pipelined.c",
108 "src/f32-spmm/8x1-scalar-unroll2.c",
109 "src/f32-spmm/8x1-scalar.c",
110 "src/f32-vadd/scalar.c",
111 "src/f32-vmul/scalar.c",
112 "src/f32-vmulcaddc/c1-scalar-x2.c",
113 "src/f32-vsub/scalar.c",
114 "src/q8-avgpool/mp9p8q-scalar.c",
115 "src/q8-avgpool/up9-scalar.c",
116 "src/q8-igemm/2x2-scalar.c",
117 "src/q8-dwconv/up1x9-scalar.c",
118 "src/q8-gavgpool/mp7p7q-scalar.c",
119 "src/q8-gavgpool/up7-scalar.c",
120 "src/q8-gemm/2x2-scalar.c",
121 "src/q8-vadd/scalar.c",
122 "src/u8-clamp/scalar.c",
123 "src/u8-lut32norm/scalar.c",
124 "src/u8-maxpool/9p8q-scalar.c",
125 "src/u8-rmax/scalar.c",
126 "src/x32-packx/x2-scalar.c",
127 "src/x32-packx/x3-scalar.c",
128 "src/x32-packx/x4-scalar.c",
129 "src/x32-pad/x2-scalar.c",
130 "src/x32-unpool/scalar.c",
131 "src/x32-zip/x2-scalar.c",
132 "src/x32-zip/x3-scalar.c",
133 "src/x32-zip/x4-scalar.c",
134 "src/x32-zip/xm-scalar.c",
135 "src/x8-lut/scalar.c",
136 "src/x8-zip/x2-scalar.c",
137 "src/x8-zip/x3-scalar.c",
138 "src/x8-zip/x4-scalar.c",
139 "src/x8-zip/xm-scalar.c",
140]
141
142PSIMD_UKERNELS = [
143 "src/f32-argmaxpool/mp9p8q-psimd.c",
144 "src/f32-argmaxpool/up4-psimd.c",
145 "src/f32-argmaxpool/up9-psimd.c",
146 "src/f32-avgpool/mp9p8q-psimd.c",
147 "src/f32-avgpool/up9-psimd.c",
148 "src/f32-clamp/psimd.c",
149 "src/f32-igemm/1x8-psimd-loadsplat.c",
150 "src/f32-igemm/1x8-psimd-splat.c",
151 "src/f32-igemm/1x8s4-psimd.c",
152 "src/f32-igemm/4x2c4-psimd.c",
153 "src/f32-igemm/4x8-psimd-loadsplat.c",
154 "src/f32-igemm/4x8-psimd-splat.c",
155 "src/f32-igemm/4x8s4-psimd.c",
156 "src/f32-igemm/6x8-psimd-loadsplat.c",
157 "src/f32-igemm/6x8-psimd-splat.c",
158 "src/f32-igemm/6x8s4-psimd.c",
159 "src/f32-dwconv/up4x25-psimd.c",
160 "src/f32-dwconv/up4x4-psimd.c",
161 "src/f32-dwconv/up4x9-psimd.c",
162 "src/f32-gavgpool/mp7p7q-psimd.c",
163 "src/f32-gavgpool/up7-psimd.c",
164 "src/f32-gemm/1x8-psimd-loadsplat.c",
165 "src/f32-gemm/1x8-psimd-splat.c",
166 "src/f32-gemm/1x8s4-psimd.c",
167 "src/f32-gemm/4x8-psimd-loadsplat.c",
168 "src/f32-gemm/4x8-psimd-splat.c",
169 "src/f32-gemm/4x8s4-psimd.c",
170 "src/f32-gemm/6x8-psimd-loadsplat.c",
171 "src/f32-gemm/6x8-psimd-splat.c",
172 "src/f32-gemm/6x8s4-psimd.c",
173 "src/f32-gemminc/1x8-psimd-loadsplat.c",
174 "src/f32-gemminc/1x8-psimd-splat.c",
175 "src/f32-gemminc/1x8s4-psimd.c",
176 "src/f32-gemminc/4x8-psimd-loadsplat.c",
177 "src/f32-gemminc/4x8-psimd-splat.c",
178 "src/f32-gemminc/4x8s4-psimd.c",
179 "src/f32-gemminc/6x8-psimd-loadsplat.c",
180 "src/f32-gemminc/6x8-psimd-splat.c",
181 "src/f32-gemminc/6x8s4-psimd.c",
182 "src/f32-hswish/psimd.c",
183 "src/f32-maxpool/9p8q-psimd.c",
184 "src/f32-pavgpool/mp9p8q-psimd.c",
185 "src/f32-pavgpool/up9-psimd.c",
186 "src/f32-ppmm/4x8-psimd.c",
187 "src/f32-prelu/x4-psimd.c",
188 "src/f32-vadd/psimd.c",
189 "src/f32-vmul/psimd.c",
190 "src/f32-vmulcaddc/c4-psimd-x2.c",
191 "src/f32-vsub/psimd.c",
192 "src/x32-packx/x4-psimd.c",
193 "src/x32-pad/x2-psimd.c",
194 "src/x32-unpool/psimd.c",
195 "src/x32-zip/x2-psimd.c",
196 "src/x32-zip/x3-psimd.c",
197 "src/x32-zip/x4-psimd.c",
198 "src/x32-zip/xm-psimd.c",
199]
200
201# ISA-specific micro-kernels
202NEON_UKERNELS = [
203 "src/f32-avgpool/mp9p8q-neon.c",
204 "src/f32-avgpool/up9-neon.c",
205 "src/f32-clamp/neon.c",
206 "src/f32-igemm/1x8-neon-ld64.c",
207 "src/f32-igemm/4x12-neon-ld64.c",
208 "src/f32-igemm/4x2-neon-ld64.c",
209 "src/f32-igemm/4x4-neon-ld64.c",
210 "src/f32-igemm/4x8-neon-ld128.c",
211 "src/f32-igemm/4x8-neon-ld64.c",
212 "src/f32-igemm/6x8-neon-ld64.c",
213 "src/f32-dwconv/up4x9-neon.c",
214 "src/f32-gavgpool-spchw/neon-x4.c",
215 "src/f32-gavgpool/mp7p7q-neon.c",
216 "src/f32-gavgpool/up7-neon.c",
217 "src/f32-gemm/1x8-neon-ld64.c",
218 "src/f32-gemm/4x12-neon-ld64.c",
219 "src/f32-gemm/4x2-neon-ld64.c",
220 "src/f32-gemm/4x8-neon-ld128.c",
221 "src/f32-gemm/4x8-neon-ld64.c",
222 "src/f32-gemm/5x8-neon-ld64.c",
223 "src/f32-gemm/6x8-neon-ld64.c",
224 "src/f32-gemminc/1x8-neon-ld64.c",
225 "src/f32-gemminc/4x12-neon-ld64.c",
226 "src/f32-gemminc/4x8-neon-ld128.c",
227 "src/f32-gemminc/4x8-neon-ld64.c",
228 "src/f32-gemminc/5x8-neon-ld64.c",
229 "src/f32-gemminc/6x8-neon-ld64.c",
230 "src/f32-hswish/neon.c",
231 "src/f32-pavgpool/mp9p8q-neon.c",
232 "src/f32-pavgpool/up9-neon.c",
233 "src/f32-ppmm/4x8-neon.c",
234 "src/f32-ppmm/8x8-neon.c",
235 "src/f32-rmax/neon.c",
236 "src/f32-vmulcaddc/c4-neon-x2.c",
237 "src/q8-avgpool/mp9p8q-neon.c",
238 "src/q8-avgpool/up9-neon.c",
239 "src/q8-igemm/4x8-neon.c",
240 "src/q8-igemm/8x8-neon.c",
241 "src/q8-dwconv/up8x9-neon.c",
242 "src/q8-gavgpool/mp7p7q-neon.c",
243 "src/q8-gavgpool/up7-neon.c",
244 "src/q8-gemm/4x8-neon.c",
245 "src/q8-gemm/8x8-neon.c",
246 "src/q8-vadd/neon.c",
247 "src/u8-clamp/neon.c",
248 "src/u8-maxpool/9p8q-neon.c",
249 "src/u8-rmax/neon.c",
250 "src/x32-packx/x4-neon-st4.c",
251 "src/x32-pad/x2-neon.c",
252 "src/x32-zip/x2-neon.c",
253 "src/x32-zip/x3-neon.c",
254 "src/x32-zip/x4-neon.c",
255 "src/x32-zip/xm-neon.c",
256 "src/x8-zip/x2-neon.c",
257 "src/x8-zip/x3-neon.c",
258 "src/x8-zip/x4-neon.c",
259 "src/x8-zip/xm-neon.c",
260]
261
262NEONFMA_UKERNELS = [
263 "src/f32-igemm/4x12-neonfma-ld64.c",
264 "src/f32-igemm/4x2-neonfma-ld64.c",
265 "src/f32-igemm/4x4-neonfma-ld64.c",
266 "src/f32-igemm/4x8-neonfma-ld128.c",
267 "src/f32-igemm/4x8-neonfma-ld64.c",
268 "src/f32-igemm/6x8-neonfma-ld64.c",
269 "src/f32-dwconv/up4x9-neonfma.c",
270 "src/f32-dwconv/up8x9-neonfma.c",
271 "src/f32-gemm/1x8-neonfma-ld64.c",
272 "src/f32-gemm/4x12-neonfma-ld64.c",
273 "src/f32-gemm/4x2-neonfma-ld64.c",
274 "src/f32-gemm/4x8-neonfma-ld128.c",
275 "src/f32-gemm/4x8-neonfma-ld64.c",
276 "src/f32-gemm/5x8-neonfma-ld64.c",
277 "src/f32-gemm/6x8-neonfma-ld64.c",
278 "src/f32-gemminc/1x8-neonfma-ld64.c",
279 "src/f32-gemminc/4x12-neonfma-ld64.c",
280 "src/f32-gemminc/4x8-neonfma-ld128.c",
281 "src/f32-gemminc/4x8-neonfma-ld64.c",
282 "src/f32-gemminc/5x8-neonfma-ld64.c",
283 "src/f32-gemminc/6x8-neonfma-ld64.c",
284 "src/f32-hswish/neonfma.c",
285 "src/f32-ppmm/4x8-neonfma.c",
286 "src/f32-ppmm/8x8-neonfma.c",
287 "src/f32-vmulcaddc/c4-neonfma-x2.c",
288]
289
290AARCH64_NEONFMA_UKERNELS = [
291 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
292 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
293 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
294 "src/f32-dwconv-spchw/3x3p1-neonfma.c",
295 "src/f32-dwconv-spchw/5x5p2-neonfma.c",
296 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
297 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
298 "src/f32-spmm/12x1-neonfma.c",
299 "src/f32-spmm/12x2-neonfma.c",
300 "src/f32-spmm/12x4-neonfma.c",
301 "src/f32-spmm/16x1-neonfma-pipelined.c",
302 "src/f32-spmm/16x1-neonfma-unroll2.c",
303 "src/f32-spmm/16x1-neonfma.c",
304 "src/f32-spmm/16x2-neonfma.c",
305 "src/f32-spmm/16x4-neonfma.c",
306 "src/f32-spmm/4x1-neonfma-pipelined.c",
307 "src/f32-spmm/4x1-neonfma-unroll2.c",
308 "src/f32-spmm/4x1-neonfma.c",
309 "src/f32-spmm/4x2-neonfma.c",
310 "src/f32-spmm/4x4-neonfma.c",
311 "src/f32-spmm/8x1-neonfma-pipelined.c",
312 "src/f32-spmm/8x1-neonfma-unroll2.c",
313 "src/f32-spmm/8x1-neonfma.c",
314 "src/f32-spmm/8x2-neonfma.c",
315 "src/f32-spmm/8x4-neonfma.c",
316]
317
318AARCH64_NEONFP16ARITH_UKERNELS = [
319 "src/f16-gemm/4x8-neonfp16arith-ld64.c",
320 "src/f16-gemm/6x8-neonfp16arith-ld64.c",
321 "src/f16-gemm/8x8-neonfp16arith-ld64.c",
322]
323
324SSE_UKERNELS = [
325 "src/f32-avgpool/mp9p8q-sse.c",
326 "src/f32-avgpool/up9-sse.c",
327 "src/f32-clamp/sse.c",
328 "src/f32-igemm/1x8-sse-dup.c",
329 "src/f32-igemm/1x8-sse-load1.c",
330 "src/f32-igemm/1x8s4-sse.c",
331 "src/f32-igemm/4x2c4-sse.c",
332 "src/f32-igemm/4x8-sse-dup.c",
333 "src/f32-igemm/4x8-sse-load1.c",
334 "src/f32-igemm/4x8s4-sse.c",
335 "src/f32-dwconv/up4x25-sse.c",
336 "src/f32-dwconv/up4x4-sse.c",
337 "src/f32-dwconv/up4x9-sse.c",
338 "src/f32-gavgpool-spchw/sse-x4.c",
339 "src/f32-gavgpool/mp7p7q-sse.c",
340 "src/f32-gavgpool/up7-sse.c",
341 "src/f32-gemm/1x8-sse-dup.c",
342 "src/f32-gemm/1x8-sse-load1.c",
343 "src/f32-gemm/1x8s4-sse.c",
344 "src/f32-gemm/4x8-sse-dup.c",
345 "src/f32-gemm/4x8-sse-load1.c",
346 "src/f32-gemm/4x8s4-sse.c",
347 "src/f32-gemminc/1x8-sse-dup.c",
348 "src/f32-gemminc/1x8-sse-load1.c",
349 "src/f32-gemminc/1x8s4-sse.c",
350 "src/f32-gemminc/4x8-sse-dup.c",
351 "src/f32-gemminc/4x8-sse-load1.c",
352 "src/f32-gemminc/4x8s4-sse.c",
353 "src/f32-hswish/sse.c",
354 "src/f32-maxpool/9p8q-sse.c",
355 "src/f32-pavgpool/mp9p8q-sse.c",
356 "src/f32-pavgpool/up9-sse.c",
357 "src/f32-dwconv-spchw/3x3p1-sse.c",
358 "src/f32-dwconv-spchw/3x3s2p1-sse.c",
359 "src/f32-ppmm/4x8-sse.c",
360 "src/f32-prelu/x4-sse.c",
361 "src/f32-rmax/sse.c",
362 "src/f32-spmm/4x1-sse.c",
363 "src/f32-spmm/8x1-sse.c",
364 "src/f32-vadd/sse.c",
365 "src/f32-vmul/sse.c",
366 "src/f32-vmulcaddc/c4-sse-x2.c",
367 "src/f32-vsub/sse.c",
368 "src/x32-packx/x4-sse.c",
369]
370
371SSE2_UKERNELS = [
372 "src/f32-argmaxpool/mp9p8q-sse2.c",
373 "src/f32-argmaxpool/up4-sse2.c",
374 "src/f32-argmaxpool/up9-sse2.c",
375 "src/q8-avgpool/mp9p8q-sse2.c",
376 "src/q8-avgpool/up9-sse2.c",
377 "src/q8-igemm/4x4c2-sse2.c",
378 "src/q8-dwconv/up8x9-sse2.c",
379 "src/q8-gavgpool/mp7p7q-sse2.c",
380 "src/q8-gavgpool/up7-sse2.c",
381 "src/q8-gemm/2x4c8-sse2.c",
382 "src/q8-gemm/4x4c2-sse2.c",
383 "src/q8-vadd/sse2.c",
384 "src/u8-clamp/sse2.c",
385 "src/u8-maxpool/9p8q-sse2.c",
386 "src/u8-rmax/sse2.c",
387 "src/x32-pad/x2-sse2.c",
388 "src/x32-zip/x2-sse2.c",
389 "src/x32-zip/x3-sse2.c",
390 "src/x32-zip/x4-sse2.c",
391 "src/x32-zip/xm-sse2.c",
392 "src/x8-zip/x2-sse2.c",
393 "src/x8-zip/x3-sse2.c",
394 "src/x8-zip/x4-sse2.c",
395 "src/x8-zip/xm-sse2.c",
396]
397
398AVX_UKERNELS = [
399 "src/f32-rmax/avx.c",
400]
401
402AVX512F_UKERNELS = [
403 "src/f32-rmax/avx512f.c",
404]
405
406AARCH32_ASM_UKERNELS = [
407 "src/q8-dwconv/up8x9-aarch32-neon.S",
408]
409
410AARCH64_ASM_UKERNELS = [
411 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
412 "src/f32-dwconv/up4x9-aarch64-neonfma.S",
413 "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
414 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
415 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
416 "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
417 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
418 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
419 "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
420 "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
421 "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
422 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
423 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
424 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
425 "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
426 "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
427 "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
428 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
429 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
430 "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
431 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
432 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
433 "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
434 "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
435 "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
436 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
437 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
438 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
439 "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
440 "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
441 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
442 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
443 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
444 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
445 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
446 "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
447 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
448 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
449 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
450]
451
452INTERNAL_MICROKERNEL_HDRS = [
453 "src/xnnpack/argmaxpool.h",
454 "src/xnnpack/avgpool.h",
455 "src/xnnpack/clamp.h",
456 "src/xnnpack/common.h",
457 "src/xnnpack/conv.h",
458 "src/xnnpack/dwconv.h",
459 "src/xnnpack/gavgpool.h",
460 "src/xnnpack/gemm.h",
461 "src/xnnpack/hswish.h",
462 "src/xnnpack/igemm.h",
463 "src/xnnpack/lut.h",
464 "src/xnnpack/math.h",
465 "src/xnnpack/maxpool.h",
466 "src/xnnpack/packx.h",
467 "src/xnnpack/pad.h",
468 "src/xnnpack/params.h",
469 "src/xnnpack/pavgpool.h",
470 "src/xnnpack/ppmm.h",
471 "src/xnnpack/prelu.h",
472 "src/xnnpack/rmax.h",
473 "src/xnnpack/scalar-utils.h",
474 "src/xnnpack/spmm.h",
475 "src/xnnpack/unpool.h",
476 "src/xnnpack/vadd.h",
477 "src/xnnpack/vmul.h",
478 "src/xnnpack/vmulcaddc.h",
479 "src/xnnpack/vsub.h",
480 "src/xnnpack/zip.h",
481]
482
483INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
484 "include/xnnpack.h",
485 "src/xnnpack/allocator.h",
486 "src/xnnpack/compute.h",
487 "src/xnnpack/im2col.h",
488 "src/xnnpack/indirection.h",
489 "src/xnnpack/log.h",
490 "src/xnnpack/operator.h",
491 "src/xnnpack/pack.h",
492 "src/xnnpack/requantization.h",
493 "src/xnnpack/requantization-stubs.h",
494]
495
496MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
497 "src/xnnpack/requantization.h",
498 "include/xnnpack.h",
499]
500
501MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
502 "src/xnnpack/isa-checks.h",
503 "src/xnnpack/requantization.h",
504 "include/xnnpack.h",
505]
506
507OPERATOR_TEST_PARAMS_HDRS = [
508 "src/xnnpack/params.h",
509 "src/xnnpack/common.h",
510]
511
512WEIGHTS_PACK_HDRS = [
513 "src/xnnpack/pack.h",
514 "src/xnnpack/operator.h",
515 "src/xnnpack/compute.h",
516]
517
518xnnpack_cc_library(
519 name = "scalar_ukernels",
520 srcs = SCALAR_UKERNELS,
521 hdrs = INTERNAL_HDRS,
522 aarch32_copts = ["-marm"],
523 copts = xnnpack_std_copts(),
524 deps = [
525 "@FP16",
526 "@FXdiv",
527 ],
528)
529
530xnnpack_cc_library(
531 name = "psimd_ukernels",
532 srcs = PSIMD_UKERNELS,
533 hdrs = INTERNAL_HDRS,
534 aarch32_copts = [
535 "-marm",
536 "-mfpu=neon",
537 ],
538 copts = xnnpack_std_copts(),
539 optimized_copts = [
540 "-O3",
541 "-ffast-math",
542 ],
543 deps = [
544 "@FP16",
545 "@psimd",
546 ],
547)
548
549xnnpack_cc_library(
550 name = "neon_ukernels",
551 hdrs = INTERNAL_HDRS,
552 aarch32_copts = [
553 "-marm",
554 "-mfpu=neon",
555 ],
556 aarch32_srcs = NEON_UKERNELS,
557 aarch64_srcs = NEON_UKERNELS,
558 copts = xnnpack_std_copts(),
559 deps = ["@FP16"],
560)
561
562xnnpack_cc_library(
563 name = "neonfma_ukernels",
564 hdrs = INTERNAL_HDRS,
565 aarch32_copts = [
566 "-marm",
567 "-mfpu=neon-vfpv4",
568 ],
569 aarch32_srcs = NEONFMA_UKERNELS,
570 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
571 copts = xnnpack_std_copts(),
572 deps = ["@FP16"],
573)
574
575xnnpack_cc_library(
576 name = "neonfp16arith_ukernels",
577 hdrs = INTERNAL_HDRS,
578 aarch64_copts = ["-march=armv8.2-a+fp16"],
579 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
580 copts = xnnpack_std_copts(),
581 deps = ["@FP16"],
582)
583
584xnnpack_cc_library(
585 name = "sse2_ukernels",
586 hdrs = INTERNAL_HDRS,
587 copts = xnnpack_std_copts(),
588 x86_copts = ["-msse2"],
589 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
590 deps = ["@FP16"],
591)
592
593xnnpack_cc_library(
594 name = "avx_ukernels",
595 hdrs = INTERNAL_HDRS,
596 copts = xnnpack_std_copts(),
597 x86_copts = ["-mavx"],
598 x86_srcs = AVX_UKERNELS,
599 deps = ["@FP16"],
600)
601
602xnnpack_cc_library(
603 name = "avx512f_ukernels",
604 hdrs = INTERNAL_HDRS,
605 copts = xnnpack_std_copts(),
606 x86_copts = ["-mavx512f"],
607 x86_srcs = AVX512F_UKERNELS,
608 deps = ["@FP16"],
609)
610
611xnnpack_cc_library(
612 name = "asm_ukernels",
613 hdrs = ["src/xnnpack/assembly.h"],
614 aarch32_srcs = AARCH32_ASM_UKERNELS,
615 aarch64_srcs = AARCH64_ASM_UKERNELS,
616)
617
618xnnpack_aggregate_library(
619 name = "ukernels",
620 aarch32_deps = [
621 ":psimd_ukernels",
622 ":neon_ukernels",
623 ":neonfma_ukernels",
624 ":asm_ukernels",
625 ],
626 aarch64_deps = [
627 ":psimd_ukernels",
628 ":neon_ukernels",
629 ":neonfma_ukernels",
630 ":neonfp16arith_ukernels",
631 ":asm_ukernels",
632 ],
633 generic_deps = [":scalar_ukernels"],
634 wasmsimd_deps = [
635 ":psimd_ukernels",
636 ],
637 x86_deps = [
638 ":psimd_ukernels",
639 ":sse2_ukernels",
640 ":avx_ukernels",
641 ":avx512f_ukernels",
642 ],
643)
644
645xnnpack_cc_library(
646 name = "im2col",
647 srcs = ["src/im2col.c"],
648 hdrs = [
649 "src/xnnpack/common.h",
650 "src/xnnpack/im2col.h",
651 ],
652 copts = xnnpack_std_copts(),
653)
654
655xnnpack_cc_library(
656 name = "indirection",
657 srcs = ["src/indirection.c"],
658 hdrs = INTERNAL_HDRS,
659 copts = xnnpack_std_copts(),
660 deps = [
661 "@FP16",
662 "@FXdiv",
663 "@pthreadpool",
664 ],
665)
666
667xnnpack_cc_library(
668 name = "operator_run",
669 srcs = ["src/operator-run.c"],
670 hdrs = INTERNAL_HDRS,
671 copts = xnnpack_std_copts() + [
672 # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
673 "-Wno-vla",
674 ],
675 deps = [
676 "@FP16",
677 "@FXdiv",
678 "@clog",
679 "@pthreadpool",
680 ],
681)
682
683cc_library(
684 name = "enable_assembly",
685 defines = select({
686 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
687 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
688 "//conditions:default": [],
689 }),
690)
691
692cc_library(
693 name = "operators",
694 srcs = OPERATOR_SRCS + [
695 "src/init.c",
696 "src/operator-delete.c",
697 ] + select({
698 ":emscripten_wasm": ["src/wasm-stubs.c"],
699 "//conditions:default": [],
700 }),
701 copts = xnnpack_std_copts() + [
702 "-Isrc",
703 "-Iinclude",
704 ] + select({
705 ":debug_build": [],
706 "//conditions:default": xnnpack_min_size_copts(),
707 }),
708 linkstatic = True,
709 textual_hdrs = INTERNAL_HDRS,
710 deps = [
711 ":enable_assembly",
712 ":indirection",
713 ":ukernels",
714 "@FP16",
715 "@FXdiv",
716 "@clog",
717 "@cpuinfo",
718 "@pthreadpool",
719 ],
720)
721
722cc_library(
723 name = "XNNPACK",
724 hdrs = ["include/xnnpack.h"],
725 includes = ["include"],
726 linkstatic = True,
727 # XNNPACK API is unstable and can break without notice.
728 # End users are encouraged to use this package through a TFLite delegate.
729 visibility = xnnpack_visibility(),
730 deps = [
731 ":operator_run",
732 ":operators",
733 "@pthreadpool",
734 ],
735)
736
737cc_library(
738 name = "bench_utils",
739 srcs = ["bench/utils.cc"],
740 hdrs = ["bench/utils.h"],
741 copts = ["-Wno-unused-result"],
742 linkstatic = True,
743 deps = ["@cpuinfo"],
744)
745
746######################### Unit tests for micro-kernels #########################
747
748xnnpack_benchmark(
749 name = "q8_gemm_bench",
750 srcs = [
751 "bench/gemm.h",
752 "bench/q8-gemm.cc",
753 "src/xnnpack/AlignedAllocator.h",
754 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
755 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
756 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
757)
758
759xnnpack_benchmark(
760 name = "f16_gemm_bench",
761 srcs = [
762 "bench/f16-gemm.cc",
763 "bench/gemm.h",
764 "src/xnnpack/AlignedAllocator.h",
765 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
766 copts = ["-Wno-unused-function"],
767 deps = MICROKERNEL_BENCHMARK_DEPS,
768)
769
770xnnpack_benchmark(
771 name = "f32_igemm_bench",
772 srcs = [
773 "bench/f32-igemm.cc",
774 "bench/conv.h",
775 "src/xnnpack/AlignedAllocator.h",
776 ] + MICROKERNEL_BENCHMARK_HDRS,
777 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
778)
779
780xnnpack_benchmark(
781 name = "f32_conv_hwc_bench",
782 srcs = [
783 "bench/f32-conv-hwc.cc",
784 "bench/dconv.h",
785 "src/xnnpack/AlignedAllocator.h",
786 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
787 copts = ["-Wno-unused-function"],
788 deps = MICROKERNEL_BENCHMARK_DEPS,
789)
790
791xnnpack_benchmark(
792 name = "f32_dwconv_bench",
793 srcs = [
794 "bench/f32-dwconv.cc",
795 "bench/dwconv.h",
796 "src/xnnpack/AlignedAllocator.h",
797 ] + MICROKERNEL_BENCHMARK_HDRS,
798 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
799)
800
801xnnpack_benchmark(
802 name = "f32_dwconv_spchw_bench",
803 srcs = [
804 "bench/f32-dwconv-spchw.cc",
805 "bench/dwconv.h",
806 "src/xnnpack/AlignedAllocator.h",
807 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
808 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
809)
810
811xnnpack_benchmark(
812 name = "f32_gemm_bench",
813 srcs = [
814 "bench/f32-gemm.cc",
815 "bench/gemm.h",
816 "src/xnnpack/AlignedAllocator.h",
817 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
818 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
819 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
820)
821
822xnnpack_benchmark(
823 name = "f32_rmax_bench",
824 srcs = [
825 "bench/f32-rmax.cc",
826 "src/xnnpack/AlignedAllocator.h",
827 ] + MICROKERNEL_BENCHMARK_HDRS,
828 deps = MICROKERNEL_BENCHMARK_DEPS,
829)
830
831xnnpack_benchmark(
832 name = "f32_spmm_bench",
833 srcs = [
834 "bench/f32-spmm.cc",
835 "bench/gemm.h",
836 "src/xnnpack/AlignedAllocator.h",
837 ] + MICROKERNEL_BENCHMARK_HDRS,
838 copts = ["-Wno-unused-function"],
839 deps = MICROKERNEL_BENCHMARK_DEPS,
840)
841
842xnnpack_benchmark(
843 name = "f32_im2col_gemm_bench",
844 srcs = [
845 "bench/f32-im2col-gemm.cc",
846 "bench/conv.h",
847 "src/xnnpack/AlignedAllocator.h",
848 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
849 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
850)
851
852########################### Benchmarks for operators ###########################
853
854xnnpack_benchmark(
855 name = "add_bench",
856 srcs = ["bench/add.cc"],
857 deps = OPERATOR_BENCHMARK_DEPS,
858)
859
860xnnpack_benchmark(
861 name = "average_pooling_bench",
862 srcs = ["bench/average-pooling.cc"],
863 deps = OPERATOR_BENCHMARK_DEPS,
864)
865
866xnnpack_benchmark(
867 name = "channel_shuffle_bench",
868 srcs = ["bench/channel-shuffle.cc"],
869 deps = OPERATOR_BENCHMARK_DEPS,
870)
871
872xnnpack_benchmark(
873 name = "convolution_bench",
874 srcs = ["bench/convolution.cc"],
875 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
876 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
877)
878
879xnnpack_benchmark(
880 name = "deconvolution_bench",
881 srcs = ["bench/deconvolution.cc"],
882 copts = xnnpack_optional_tflite_copts(),
883 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
884)
885
886xnnpack_benchmark(
887 name = "global_average_pooling_bench",
888 srcs = ["bench/global-average-pooling.cc"],
889 deps = OPERATOR_BENCHMARK_DEPS,
890)
891
892xnnpack_benchmark(
893 name = "max_pooling_bench",
894 srcs = ["bench/max-pooling.cc"],
895 deps = OPERATOR_BENCHMARK_DEPS,
896)
897
898xnnpack_benchmark(
899 name = "sigmoid_bench",
900 srcs = ["bench/sigmoid.cc"],
901 deps = OPERATOR_BENCHMARK_DEPS,
902)
903
904xnnpack_benchmark(
905 name = "softargmax_bench",
906 srcs = ["bench/softargmax.cc"],
907 deps = OPERATOR_BENCHMARK_DEPS,
908)
909
910######################### Unit tests for micro-kernels #########################
911
912xnnpack_unit_test(
913 name = "f16_gemm_test",
914 srcs = [
915 "test/f16-gemm.cc",
916 "test/gemm-microkernel-tester.h",
917 "src/xnnpack/AlignedAllocator.h",
918 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
919 deps = MICROKERNEL_TEST_DEPS,
920)
921
922xnnpack_unit_test(
923 name = "f32_argmaxpool_test",
924 srcs = [
925 "test/f32-argmaxpool.cc",
926 "test/argmaxpool-microkernel-tester.h",
927 "src/xnnpack/AlignedAllocator.h",
928 ] + MICROKERNEL_TEST_HDRS,
929 deps = MICROKERNEL_TEST_DEPS,
930)
931
932xnnpack_unit_test(
933 name = "f32_avgpool_test",
934 srcs = [
935 "test/f32-avgpool.cc",
936 "test/avgpool-microkernel-tester.h",
937 "src/xnnpack/AlignedAllocator.h",
938 ] + MICROKERNEL_TEST_HDRS,
939 deps = MICROKERNEL_TEST_DEPS,
940)
941
942xnnpack_unit_test(
943 name = "f32_clamp_test",
944 srcs = [
945 "test/f32-clamp.cc",
946 "test/clamp-microkernel-tester.h",
947 ] + MICROKERNEL_TEST_HDRS,
948 deps = MICROKERNEL_TEST_DEPS,
949)
950
951xnnpack_unit_test(
952 name = "f32_igemm_test",
953 srcs = [
954 "test/f32-igemm.cc",
955 "test/gemm-microkernel-tester.h",
956 "src/xnnpack/AlignedAllocator.h",
957 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
958 deps = MICROKERNEL_TEST_DEPS,
959)
960
961xnnpack_unit_test(
962 name = "f32_conv_hwc_test",
963 srcs = [
964 "test/f32-conv-hwc.cc",
965 "test/conv-hwc-microkernel-tester.h",
966 "src/xnnpack/AlignedAllocator.h",
967 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
968 deps = MICROKERNEL_TEST_DEPS,
969)
970
971xnnpack_unit_test(
972 name = "f32_conv_hwc2spchw_test",
973 srcs = [
974 "test/f32-conv-hwc2spchw.cc",
975 "test/conv-hwc2spchw-microkernel-tester.h",
976 "src/xnnpack/AlignedAllocator.h",
977 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
978 deps = MICROKERNEL_TEST_DEPS,
979)
980
981xnnpack_unit_test(
982 name = "f32_dwconv_test",
983 srcs = [
984 "test/f32-dwconv.cc",
985 "test/dwconv-microkernel-tester.h",
986 "src/xnnpack/AlignedAllocator.h",
987 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
988 deps = MICROKERNEL_TEST_DEPS,
989)
990
991xnnpack_unit_test(
992 name = "f32_dwconv_spchw_test",
993 srcs = [
994 "test/f32-dwconv-spchw.cc",
995 "test/dwconv-spchw-microkernel-tester.h",
996 "src/xnnpack/AlignedAllocator.h",
997 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
998 deps = MICROKERNEL_TEST_DEPS,
999)
1000
1001xnnpack_unit_test(
1002 name = "f32_gavgpool_test",
1003 srcs = [
1004 "test/f32-gavgpool.cc",
1005 "test/gavgpool-microkernel-tester.h",
1006 "src/xnnpack/AlignedAllocator.h",
1007 ] + MICROKERNEL_TEST_HDRS,
1008 deps = MICROKERNEL_TEST_DEPS,
1009)
1010
1011xnnpack_unit_test(
1012 name = "f32_gavgpool_spchw_test",
1013 srcs = [
1014 "test/f32-gavgpool-spchw.cc",
1015 "test/gavgpool-spchw-microkernel-tester.h",
1016 "src/xnnpack/AlignedAllocator.h",
1017 ] + MICROKERNEL_TEST_HDRS,
1018 deps = MICROKERNEL_TEST_DEPS,
1019)
1020
1021xnnpack_unit_test(
1022 name = "f32_gemm_test",
1023 srcs = [
1024 "test/f32-gemm.cc",
1025 "test/gemm-microkernel-tester.h",
1026 "src/xnnpack/AlignedAllocator.h",
1027 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1028 deps = MICROKERNEL_TEST_DEPS,
1029)
1030
1031xnnpack_unit_test(
1032 name = "f32_gemminc_test",
1033 srcs = [
1034 "test/f32-gemminc.cc",
1035 "test/gemm-microkernel-tester.h",
1036 "src/xnnpack/AlignedAllocator.h",
1037 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1038 deps = MICROKERNEL_TEST_DEPS,
1039)
1040
1041xnnpack_unit_test(
1042 name = "f32_hswish_test",
1043 srcs = [
1044 "test/f32-hswish.cc",
1045 "test/hswish-microkernel-tester.h",
1046 ] + MICROKERNEL_TEST_HDRS,
1047 deps = MICROKERNEL_TEST_DEPS,
1048)
1049
1050xnnpack_unit_test(
1051 name = "f32_maxpool_test",
1052 srcs = [
1053 "test/f32-maxpool.cc",
1054 "test/maxpool-microkernel-tester.h",
1055 ] + MICROKERNEL_TEST_HDRS,
1056 deps = MICROKERNEL_TEST_DEPS,
1057)
1058
1059xnnpack_unit_test(
1060 name = "f32_pavgpool_test",
1061 srcs = [
1062 "test/f32-pavgpool.cc",
1063 "test/avgpool-microkernel-tester.h",
1064 "src/xnnpack/AlignedAllocator.h",
1065 ] + MICROKERNEL_TEST_HDRS,
1066 deps = MICROKERNEL_TEST_DEPS,
1067)
1068
1069xnnpack_unit_test(
1070 name = "f32_ppmm_test",
1071 srcs = [
1072 "test/f32-ppmm.cc",
1073 "test/gemm-microkernel-tester.h",
1074 "src/xnnpack/AlignedAllocator.h",
1075 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1076 deps = MICROKERNEL_TEST_DEPS,
1077)
1078
1079xnnpack_unit_test(
1080 name = "f32_prelu_test",
1081 srcs = [
1082 "test/f32-prelu.cc",
1083 "test/prelu-microkernel-tester.h",
1084 "src/xnnpack/AlignedAllocator.h",
1085 ] + MICROKERNEL_TEST_HDRS,
1086 deps = MICROKERNEL_TEST_DEPS,
1087)
1088
1089xnnpack_unit_test(
1090 name = "f32_rmax_test",
1091 srcs = [
1092 "test/f32-rmax.cc",
1093 "test/rmax-microkernel-tester.h",
1094 ] + MICROKERNEL_TEST_HDRS,
1095 deps = MICROKERNEL_TEST_DEPS,
1096)
1097
1098xnnpack_unit_test(
1099 name = "f32_spmm_test",
1100 srcs = [
1101 "test/f32-spmm.cc",
1102 "test/spmm-microkernel-tester.h",
1103 "src/xnnpack/AlignedAllocator.h",
1104 ] + MICROKERNEL_TEST_HDRS,
1105 deps = MICROKERNEL_TEST_DEPS,
1106)
1107
1108xnnpack_unit_test(
1109 name = "f32_vadd_test",
1110 srcs = [
1111 "test/f32-vadd.cc",
1112 "test/vadd-microkernel-tester.h",
1113 ] + MICROKERNEL_TEST_HDRS,
1114 deps = MICROKERNEL_TEST_DEPS,
1115)
1116
1117xnnpack_unit_test(
1118 name = "f32_vsub_test",
1119 srcs = [
1120 "test/f32-vsub.cc",
1121 "test/vsub-microkernel-tester.h",
1122 ] + MICROKERNEL_TEST_HDRS,
1123 deps = MICROKERNEL_TEST_DEPS,
1124)
1125
1126xnnpack_unit_test(
1127 name = "f32_vmul_test",
1128 srcs = [
1129 "test/f32-vmul.cc",
1130 "test/vmul-microkernel-tester.h",
1131 ] + MICROKERNEL_TEST_HDRS,
1132 deps = MICROKERNEL_TEST_DEPS,
1133)
1134
1135xnnpack_unit_test(
1136 name = "f32_vmulcaddc_test",
1137 srcs = [
1138 "test/f32-vmulcaddc.cc",
1139 "test/vmulcaddc-microkernel-tester.h",
1140 "src/xnnpack/AlignedAllocator.h",
1141 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1142 deps = MICROKERNEL_TEST_DEPS,
1143)
1144
1145xnnpack_unit_test(
1146 name = "q8_avgpool_test",
1147 srcs = [
1148 "test/q8-avgpool.cc",
1149 "test/avgpool-microkernel-tester.h",
1150 "src/xnnpack/AlignedAllocator.h",
1151 ] + MICROKERNEL_TEST_HDRS,
1152 deps = MICROKERNEL_TEST_DEPS,
1153)
1154
1155xnnpack_unit_test(
1156 name = "q8_igemm_test",
1157 srcs = [
1158 "test/q8-igemm.cc",
1159 "test/gemm-microkernel-tester.h",
1160 "src/xnnpack/AlignedAllocator.h",
1161 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1162 deps = MICROKERNEL_TEST_DEPS,
1163)
1164
1165xnnpack_unit_test(
1166 name = "q8_dwconv_test",
1167 srcs = [
1168 "test/q8-dwconv.cc",
1169 "test/dwconv-microkernel-tester.h",
1170 "src/xnnpack/AlignedAllocator.h",
1171 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1172 deps = MICROKERNEL_TEST_DEPS,
1173)
1174
1175xnnpack_unit_test(
1176 name = "q8_gavgpool_test",
1177 srcs = [
1178 "test/q8-gavgpool.cc",
1179 "test/gavgpool-microkernel-tester.h",
1180 "src/xnnpack/AlignedAllocator.h",
1181 ] + MICROKERNEL_TEST_HDRS,
1182 deps = MICROKERNEL_TEST_DEPS,
1183)
1184
1185xnnpack_unit_test(
1186 name = "q8_gemm_test",
1187 srcs = [
1188 "test/q8-gemm.cc",
1189 "test/gemm-microkernel-tester.h",
1190 "src/xnnpack/AlignedAllocator.h",
1191 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1192 deps = MICROKERNEL_TEST_DEPS,
1193)
1194
1195xnnpack_unit_test(
1196 name = "q8_vadd_test",
1197 srcs = [
1198 "test/q8-vadd.cc",
1199 "test/vadd-microkernel-tester.h",
1200 ] + MICROKERNEL_TEST_HDRS,
1201 deps = MICROKERNEL_TEST_DEPS,
1202)
1203
1204xnnpack_unit_test(
1205 name = "u8_clamp_test",
1206 srcs = [
1207 "test/u8-clamp.cc",
1208 "test/clamp-microkernel-tester.h",
1209 ] + MICROKERNEL_TEST_HDRS,
1210 deps = MICROKERNEL_TEST_DEPS,
1211)
1212
1213xnnpack_unit_test(
1214 name = "u8_lut32norm_test",
1215 srcs = [
1216 "test/u8-lut32norm.cc",
1217 "test/lut-norm-microkernel-tester.h",
1218 ] + MICROKERNEL_TEST_HDRS,
1219 deps = MICROKERNEL_TEST_DEPS,
1220)
1221
1222xnnpack_unit_test(
1223 name = "u8_maxpool_test",
1224 srcs = [
1225 "test/u8-maxpool.cc",
1226 "test/maxpool-microkernel-tester.h",
1227 ] + MICROKERNEL_TEST_HDRS,
1228 deps = MICROKERNEL_TEST_DEPS,
1229)
1230
1231xnnpack_unit_test(
1232 name = "u8_rmax_test",
1233 srcs = [
1234 "test/u8-rmax.cc",
1235 "test/rmax-microkernel-tester.h",
1236 ] + MICROKERNEL_TEST_HDRS,
1237 deps = MICROKERNEL_TEST_DEPS,
1238)
1239
1240xnnpack_unit_test(
1241 name = "x32_packx_test",
1242 srcs = [
1243 "test/x32-packx.cc",
1244 "test/pack-microkernel-tester.h",
1245 "src/xnnpack/AlignedAllocator.h",
1246 ] + MICROKERNEL_TEST_HDRS,
1247 deps = MICROKERNEL_TEST_DEPS,
1248)
1249
1250xnnpack_unit_test(
1251 name = "x32_pad_test",
1252 srcs = [
1253 "test/x32-pad.cc",
1254 "test/pad-microkernel-tester.h",
1255 ] + MICROKERNEL_TEST_HDRS,
1256 deps = MICROKERNEL_TEST_DEPS,
1257)
1258
1259xnnpack_unit_test(
1260 name = "x32_unpool_test",
1261 srcs = [
1262 "test/x32-unpool.cc",
1263 "test/unpool-microkernel-tester.h",
1264 ] + MICROKERNEL_TEST_HDRS,
1265 deps = MICROKERNEL_TEST_DEPS,
1266)
1267
1268xnnpack_unit_test(
1269 name = "x32_zip_test",
1270 srcs = [
1271 "test/x32-zip.cc",
1272 "test/zip-microkernel-tester.h",
1273 ] + MICROKERNEL_TEST_HDRS,
1274 deps = MICROKERNEL_TEST_DEPS,
1275)
1276
1277xnnpack_unit_test(
1278 name = "x8_lut_test",
1279 srcs = [
1280 "test/x8-lut.cc",
1281 "test/lut-microkernel-tester.h",
1282 ] + MICROKERNEL_TEST_HDRS,
1283 deps = MICROKERNEL_TEST_DEPS,
1284)
1285
1286xnnpack_unit_test(
1287 name = "x8_zip_test",
1288 srcs = [
1289 "test/x8-zip.cc",
1290 "test/zip-microkernel-tester.h",
1291 ] + MICROKERNEL_TEST_HDRS,
1292 deps = MICROKERNEL_TEST_DEPS,
1293)
1294
1295########################### Size test for the library ##########################
1296
1297xnnpack_binary(
1298 name = "size_test",
1299 srcs = ["test/size.c"],
1300 deps = [":XNNPACK"],
1301)
1302
1303########################### Unit tests for operators ###########################
1304
1305xnnpack_unit_test(
1306 name = "add_test",
1307 srcs = [
1308 "test/add.cc",
1309 "test/add-operator-tester.h",
1310 ],
1311 deps = OPERATOR_TEST_DEPS,
1312)
1313
1314xnnpack_unit_test(
1315 name = "argmax_pooling_test",
1316 srcs = [
1317 "test/argmax-pooling.cc",
1318 "test/argmax-pooling-operator-tester.h",
1319 ] + OPERATOR_TEST_PARAMS_HDRS,
1320 deps = OPERATOR_TEST_DEPS,
1321)
1322
1323xnnpack_unit_test(
1324 name = "average_pooling_test",
1325 srcs = [
1326 "test/average-pooling.cc",
1327 "test/average-pooling-operator-tester.h",
1328 ] + OPERATOR_TEST_PARAMS_HDRS,
1329 deps = OPERATOR_TEST_DEPS,
1330)
1331
1332xnnpack_unit_test(
1333 name = "channel_pad_test",
1334 srcs = [
1335 "test/channel-pad.cc",
1336 "test/channel-pad-operator-tester.h",
1337 ] + OPERATOR_TEST_PARAMS_HDRS,
1338 deps = OPERATOR_TEST_DEPS,
1339)
1340
1341xnnpack_unit_test(
1342 name = "channel_shuffle_test",
1343 srcs = [
1344 "test/channel-shuffle.cc",
1345 "test/channel-shuffle-operator-tester.h",
1346 ],
1347 deps = OPERATOR_TEST_DEPS,
1348)
1349
1350xnnpack_unit_test(
1351 name = "clamp_test",
1352 srcs = [
1353 "test/clamp.cc",
1354 "test/clamp-operator-tester.h",
1355 ],
1356 deps = OPERATOR_TEST_DEPS,
1357)
1358
1359xnnpack_unit_test(
1360 name = "convolution_test",
1361 srcs = [
1362 "test/convolution.cc",
1363 "test/convolution-operator-tester.h",
1364 ],
1365 deps = OPERATOR_TEST_DEPS,
1366)
1367
1368xnnpack_unit_test(
1369 name = "convolution_spnchw_test",
1370 srcs = [
1371 "test/convolution-spnchw.cc",
1372 "test/convolution-spnchw-operator-tester.h",
1373 ],
1374 deps = OPERATOR_TEST_DEPS,
1375)
1376
1377xnnpack_unit_test(
1378 name = "deconvolution_test",
1379 srcs = [
1380 "test/deconvolution.cc",
1381 "test/deconvolution-operator-tester.h",
1382 ] + OPERATOR_TEST_PARAMS_HDRS,
1383 deps = OPERATOR_TEST_DEPS,
1384)
1385
1386xnnpack_unit_test(
1387 name = "fully_connected_test",
1388 srcs = [
1389 "test/fully-connected.cc",
1390 "test/fully-connected-operator-tester.h",
1391 ],
1392 deps = OPERATOR_TEST_DEPS,
1393)
1394
1395xnnpack_unit_test(
1396 name = "global_average_pooling_test",
1397 srcs = [
1398 "test/global-average-pooling.cc",
1399 "test/global-average-pooling-operator-tester.h",
1400 ] + OPERATOR_TEST_PARAMS_HDRS,
1401 deps = OPERATOR_TEST_DEPS,
1402)
1403
1404xnnpack_unit_test(
1405 name = "global_average_pooling_spnchw_test",
1406 srcs = [
1407 "test/global-average-pooling-spnchw.cc",
1408 "test/global-average-pooling-spnchw-operator-tester.h",
1409 ],
1410 deps = OPERATOR_TEST_DEPS,
1411)
1412
1413xnnpack_unit_test(
1414 name = "hardswish_test",
1415 srcs = [
1416 "test/hardswish.cc",
1417 "test/hardswish-operator-tester.h",
1418 ],
1419 deps = OPERATOR_TEST_DEPS,
1420)
1421
1422xnnpack_unit_test(
1423 name = "leaky_relu_test",
1424 srcs = [
1425 "test/leaky-relu.cc",
1426 "test/leaky-relu-operator-tester.h",
1427 ],
1428 deps = OPERATOR_TEST_DEPS,
1429)
1430
1431xnnpack_unit_test(
1432 name = "max_pooling_test",
1433 srcs = [
1434 "test/max-pooling.cc",
1435 "test/max-pooling-operator-tester.h",
1436 ] + OPERATOR_TEST_PARAMS_HDRS,
1437 deps = OPERATOR_TEST_DEPS,
1438)
1439
1440xnnpack_unit_test(
1441 name = "prelu_test",
1442 srcs = [
1443 "test/prelu.cc",
1444 "test/prelu-operator-tester.h",
1445 ] + OPERATOR_TEST_PARAMS_HDRS,
1446 deps = OPERATOR_TEST_DEPS,
1447)
1448
1449xnnpack_unit_test(
1450 name = "sigmoid_test",
1451 srcs = [
1452 "test/sigmoid.cc",
1453 "test/sigmoid-operator-tester.h",
1454 ],
1455 deps = OPERATOR_TEST_DEPS,
1456)
1457
1458xnnpack_unit_test(
1459 name = "softargmax_test",
1460 srcs = [
1461 "test/softargmax.cc",
1462 "test/softargmax-operator-tester.h",
1463 ],
1464 deps = OPERATOR_TEST_DEPS,
1465)
1466
1467xnnpack_unit_test(
1468 name = "unpooling_test",
1469 srcs = [
1470 "test/unpooling.cc",
1471 "test/unpooling-operator-tester.h",
1472 ],
1473 deps = OPERATOR_TEST_DEPS,
1474)
1475
1476############################# Build configurations #############################
1477
1478config_setting(
1479 name = "linux_k8",
1480 values = {
1481 "cpu": "k8",
1482 },
1483)
1484
1485config_setting(
1486 name = "android",
1487 values = {"crosstool_top": "//external:android/crosstool"},
1488)
1489
1490config_setting(
1491 name = "android_armv7",
1492 values = {
1493 "crosstool_top": "//external:android/crosstool",
1494 "cpu": "armeabi-v7a",
1495 },
1496)
1497
1498config_setting(
1499 name = "android_arm64",
1500 values = {
1501 "crosstool_top": "//external:android/crosstool",
1502 "cpu": "arm64-v8a",
1503 },
1504)
1505
1506config_setting(
1507 name = "android_x86",
1508 values = {
1509 "crosstool_top": "//external:android/crosstool",
1510 "cpu": "x86",
1511 },
1512)
1513
1514config_setting(
1515 name = "android_x86_64",
1516 values = {
1517 "crosstool_top": "//external:android/crosstool",
1518 "cpu": "x86_64",
1519 },
1520)
1521
1522config_setting(
1523 name = "emscripten",
1524 values = {"crosstool_top": "//external:android/emscripten"},
1525)
1526
1527config_setting(
1528 name = "emscripten_wasm",
1529 values = {
1530 "crosstool_top": "//external:android/emscripten",
1531 "cpu": "wasm",
1532 },
1533)
1534
1535config_setting(
1536 name = "emscripten_wasmsimd",
1537 values = {
1538 "crosstool_top": "//external:android/emscripten",
1539 "cpu": "wasm",
1540 "features": "wasmsimd",
1541 },
1542)
1543
1544config_setting(
1545 name = "emscripten_asmjs",
1546 values = {
1547 "crosstool_top": "//external:android/emscripten",
1548 "cpu": "asmjs",
1549 },
1550)
1551
1552# Builds with -c opt
1553config_setting(
1554 name = "debug_build",
1555 values = {
1556 "compilation_mode": "dbg",
1557 },
1558 visibility = ["//visibility:public"],
1559)
1560
1561# Builds with -c dbg
1562config_setting(
1563 name = "optimized_build",
1564 values = {
1565 "compilation_mode": "opt",
1566 },
1567 visibility = ["//visibility:public"],
1568)
1569
1570# Enables usage of assembly kernels.
1571config_setting(
1572 name = "xnn_enable_assembly_explicit_true",
1573 define_values = {"xnn_enable_assembly": "true"},
1574)
1575
1576# Disables usage of assembly kernels.
1577config_setting(
1578 name = "xnn_enable_assembly_explicit_false",
1579 define_values = {"xnn_enable_assembly": "false"},
1580)