blob: d47b4a3533643191b8902b78ed9ae0b3b4fdfc95 [file] [log] [blame]
Marat Dukhan08c4a432019-10-03 09:29:21 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
5#
6# Description:
7# XNNPACK - optimized floating-point neural network operators library
8
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08009load(":build_defs.bzl", "xnnpack_aggregate_library", "xnnpack_benchmark", "xnnpack_binary", "xnnpack_cc_library", "xnnpack_min_size_copts", "xnnpack_optional_armcl_copts", "xnnpack_optional_armcl_deps", "xnnpack_optional_gemmlowp_copts", "xnnpack_optional_gemmlowp_deps", "xnnpack_optional_ruy_copts", "xnnpack_optional_ruy_deps", "xnnpack_optional_tflite_copts", "xnnpack_optional_tflite_deps", "xnnpack_std_copts", "xnnpack_unit_test", "xnnpack_visibility")
10
Marat Dukhan08c4a432019-10-03 09:29:21 -070011licenses(["notice"])
12
13exports_files(["LICENSE"])
14
Marat Dukhan08c4a432019-10-03 09:29:21 -070015OPERATOR_BENCHMARK_DEPS = [
16 ":XNNPACK",
17 ":bench_utils",
18 "@cpuinfo",
19 "@pthreadpool",
20]
21
22MICROKERNEL_BENCHMARK_DEPS = [
23 ":ukernels",
24 ":bench_utils",
Frank Barchard7e955972019-10-11 10:34:25 -070025 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070026 "@cpuinfo",
27 "@FP16",
28 "@pthreadpool",
29]
30
Marat Dukhan6adff4e2019-10-14 18:32:07 -070031ACCURACY_EVAL_DEPS = [
32 ":XNNPACK",
33 ":ukernels",
34 "@FP16",
35 "@pthreadpool",
36]
37
Marat Dukhan08c4a432019-10-03 09:29:21 -070038MICROKERNEL_TEST_DEPS = [
39 ":ukernels",
Frank Barchard7e955972019-10-11 10:34:25 -070040 ":enable_assembly",
Marat Dukhan08c4a432019-10-03 09:29:21 -070041 "@cpuinfo",
42 "@FP16",
43 "@pthreadpool",
44]
45
46OPERATOR_TEST_DEPS = [
47 ":XNNPACK",
48 "@pthreadpool",
49 "@FP16",
50]
51
52OPERATOR_SRCS = [
53 "src/add.c",
54 "src/argmax-pooling.c",
55 "src/average-pooling.c",
56 "src/channel-pad.c",
57 "src/channel-shuffle.c",
58 "src/clamp.c",
59 "src/convolution-spnchw.c",
60 "src/convolution.c",
61 "src/deconvolution.c",
62 "src/fully-connected.c",
63 "src/global-average-pooling-spnchw.c",
64 "src/global-average-pooling.c",
65 "src/hardswish.c",
66 "src/leaky-relu.c",
67 "src/max-pooling.c",
68 "src/prelu.c",
69 "src/sigmoid.c",
70 "src/softargmax.c",
71 "src/unpooling.c",
72]
73
74SCALAR_UKERNELS = [
75 "src/f32-argmaxpool/mp9p8q-scalar.c",
76 "src/f32-argmaxpool/up4-scalar.c",
77 "src/f32-argmaxpool/up9-scalar.c",
78 "src/f32-avgpool/mp9p8q-scalar.c",
79 "src/f32-avgpool/up9-scalar.c",
80 "src/f32-clamp/scalar.c",
Erich Elsen563df5f2019-10-23 08:02:21 -070081 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070082 "src/f32-igemm/1x4-scalar.c",
83 "src/f32-igemm/2x4-scalar.c",
84 "src/f32-igemm/4x2-scalar.c",
85 "src/f32-igemm/4x4-scalar.c",
86 "src/f32-dwconv/up1x25-scalar.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -080087 "src/f32-dwconv/up1x25-scalar-acc2.c",
88 "src/f32-dwconv/up2x25-scalar.c",
89 "src/f32-dwconv/up2x25-scalar-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070090 "src/f32-dwconv/up1x4-scalar.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -080091 "src/f32-dwconv/up1x4-scalar-acc2.c",
92 "src/f32-dwconv/up2x4-scalar.c",
93 "src/f32-dwconv/up2x4-scalar-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -070094 "src/f32-dwconv/up1x9-scalar.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -080095 "src/f32-dwconv/up1x9-scalar-acc2.c",
96 "src/f32-dwconv/up2x9-scalar.c",
97 "src/f32-dwconv/up2x9-scalar-acc2.c",
Erich Elsen0cc2c532019-10-15 04:44:18 -070098 "src/f32-dwconv-spchw/3x3p1-scalar.c",
Erich Elsenac4de802019-10-16 04:35:30 -070099 "src/f32-dwconv-spchw/3x3s2p1-scalar.c",
Erich Elsen34dc2c02019-10-16 05:11:41 -0700100 "src/f32-gavgpool-spchw/scalar-x1.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700101 "src/f32-gavgpool/mp7p7q-scalar.c",
102 "src/f32-gavgpool/up7-scalar.c",
103 "src/f32-gemm/1x4-scalar.c",
104 "src/f32-gemm/2x4-scalar.c",
105 "src/f32-gemm/4x2-scalar.c",
106 "src/f32-gemm/4x4-scalar.c",
107 "src/f32-gemminc/1x4-scalar.c",
108 "src/f32-gemminc/2x4-scalar.c",
109 "src/f32-gemminc/4x4-scalar.c",
110 "src/f32-hswish/scalar.c",
111 "src/f32-maxpool/9p8q-scalar.c",
112 "src/f32-pavgpool/mp9p8q-scalar.c",
113 "src/f32-pavgpool/up9-scalar.c",
114 "src/f32-ppmm/2x4-scalar.c",
115 "src/f32-ppmm/3x3-scalar.c",
116 "src/f32-ppmm/4x2-scalar.c",
117 "src/f32-ppmm/4x4-scalar.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800118 "src/f32-prelu/scalar-2x1.c",
119 "src/f32-prelu/scalar-2x4.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700120 "src/f32-rmax/scalar.c",
121 "src/f32-spmm/1x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700122 "src/f32-spmm/1x1-scalar.c",
123 "src/f32-spmm/2x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700124 "src/f32-spmm/2x1-scalar.c",
125 "src/f32-spmm/4x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700126 "src/f32-spmm/4x1-scalar.c",
127 "src/f32-spmm/8x1-scalar-pipelined.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700128 "src/f32-spmm/8x1-scalar.c",
Erich Elsenc6afd9b2019-10-24 16:10:53 -0700129 "src/f32-spmm/8x2-scalar.c",
130 "src/f32-spmm/8x4-scalar.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700131 "src/f32-vadd/scalar.c",
132 "src/f32-vmul/scalar.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800133 "src/f32-vmulcaddc/c1-scalar-2x.c",
134 "src/f32-vmulcaddc/c2-scalar-2x.c",
135 "src/f32-vmulcaddc/c4-scalar-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700136 "src/f32-vsub/scalar.c",
137 "src/q8-avgpool/mp9p8q-scalar.c",
138 "src/q8-avgpool/up9-scalar.c",
139 "src/q8-igemm/2x2-scalar.c",
140 "src/q8-dwconv/up1x9-scalar.c",
141 "src/q8-gavgpool/mp7p7q-scalar.c",
142 "src/q8-gavgpool/up7-scalar.c",
143 "src/q8-gemm/2x2-scalar.c",
144 "src/q8-vadd/scalar.c",
145 "src/u8-clamp/scalar.c",
146 "src/u8-lut32norm/scalar.c",
147 "src/u8-maxpool/9p8q-scalar.c",
148 "src/u8-rmax/scalar.c",
149 "src/x32-packx/x2-scalar.c",
150 "src/x32-packx/x3-scalar.c",
151 "src/x32-packx/x4-scalar.c",
152 "src/x32-pad/x2-scalar.c",
153 "src/x32-unpool/scalar.c",
154 "src/x32-zip/x2-scalar.c",
155 "src/x32-zip/x3-scalar.c",
156 "src/x32-zip/x4-scalar.c",
157 "src/x32-zip/xm-scalar.c",
158 "src/x8-lut/scalar.c",
159 "src/x8-zip/x2-scalar.c",
160 "src/x8-zip/x3-scalar.c",
161 "src/x8-zip/x4-scalar.c",
162 "src/x8-zip/xm-scalar.c",
163]
164
165PSIMD_UKERNELS = [
166 "src/f32-argmaxpool/mp9p8q-psimd.c",
167 "src/f32-argmaxpool/up4-psimd.c",
168 "src/f32-argmaxpool/up9-psimd.c",
169 "src/f32-avgpool/mp9p8q-psimd.c",
170 "src/f32-avgpool/up9-psimd.c",
171 "src/f32-clamp/psimd.c",
172 "src/f32-igemm/1x8-psimd-loadsplat.c",
173 "src/f32-igemm/1x8-psimd-splat.c",
174 "src/f32-igemm/1x8s4-psimd.c",
175 "src/f32-igemm/4x2c4-psimd.c",
176 "src/f32-igemm/4x8-psimd-loadsplat.c",
177 "src/f32-igemm/4x8-psimd-splat.c",
178 "src/f32-igemm/4x8s4-psimd.c",
179 "src/f32-igemm/6x8-psimd-loadsplat.c",
180 "src/f32-igemm/6x8-psimd-splat.c",
181 "src/f32-igemm/6x8s4-psimd.c",
182 "src/f32-dwconv/up4x25-psimd.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800183 "src/f32-dwconv/up4x25-psimd-acc2.c",
184 "src/f32-dwconv/up8x25-psimd.c",
185 "src/f32-dwconv/up8x25-psimd-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700186 "src/f32-dwconv/up4x4-psimd.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800187 "src/f32-dwconv/up4x4-psimd-acc2.c",
188 "src/f32-dwconv/up8x4-psimd.c",
189 "src/f32-dwconv/up8x4-psimd-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700190 "src/f32-dwconv/up4x9-psimd.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800191 "src/f32-dwconv/up4x9-psimd-acc2.c",
192 "src/f32-dwconv/up8x9-psimd.c",
193 "src/f32-dwconv/up8x9-psimd-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700194 "src/f32-gavgpool/mp7p7q-psimd.c",
195 "src/f32-gavgpool/up7-psimd.c",
196 "src/f32-gemm/1x8-psimd-loadsplat.c",
197 "src/f32-gemm/1x8-psimd-splat.c",
198 "src/f32-gemm/1x8s4-psimd.c",
199 "src/f32-gemm/4x8-psimd-loadsplat.c",
200 "src/f32-gemm/4x8-psimd-splat.c",
201 "src/f32-gemm/4x8s4-psimd.c",
202 "src/f32-gemm/6x8-psimd-loadsplat.c",
203 "src/f32-gemm/6x8-psimd-splat.c",
204 "src/f32-gemm/6x8s4-psimd.c",
205 "src/f32-gemminc/1x8-psimd-loadsplat.c",
206 "src/f32-gemminc/1x8-psimd-splat.c",
207 "src/f32-gemminc/1x8s4-psimd.c",
208 "src/f32-gemminc/4x8-psimd-loadsplat.c",
209 "src/f32-gemminc/4x8-psimd-splat.c",
210 "src/f32-gemminc/4x8s4-psimd.c",
211 "src/f32-gemminc/6x8-psimd-loadsplat.c",
212 "src/f32-gemminc/6x8-psimd-splat.c",
213 "src/f32-gemminc/6x8s4-psimd.c",
214 "src/f32-hswish/psimd.c",
215 "src/f32-maxpool/9p8q-psimd.c",
216 "src/f32-pavgpool/mp9p8q-psimd.c",
217 "src/f32-pavgpool/up9-psimd.c",
218 "src/f32-ppmm/4x8-psimd.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800219 "src/f32-prelu/psimd-2x4.c",
220 "src/f32-prelu/psimd-2x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700221 "src/f32-vadd/psimd.c",
222 "src/f32-vmul/psimd.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800223 "src/f32-vmulcaddc/c4-psimd-2x.c",
224 "src/f32-vmulcaddc/c8-psimd-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700225 "src/f32-vsub/psimd.c",
226 "src/x32-packx/x4-psimd.c",
227 "src/x32-pad/x2-psimd.c",
228 "src/x32-unpool/psimd.c",
229 "src/x32-zip/x2-psimd.c",
230 "src/x32-zip/x3-psimd.c",
231 "src/x32-zip/x4-psimd.c",
232 "src/x32-zip/xm-psimd.c",
233]
234
235# ISA-specific micro-kernels
236NEON_UKERNELS = [
237 "src/f32-avgpool/mp9p8q-neon.c",
238 "src/f32-avgpool/up9-neon.c",
239 "src/f32-clamp/neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700240 "src/f32-dwconv/up4x9-neon.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800241 "src/f32-dwconv/up4x9-neon-acc2.c",
242 "src/f32-dwconv/up8x9-neon.c",
243 "src/f32-dwconv/up8x9-neon-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700244 "src/f32-gavgpool-spchw/neon-x4.c",
245 "src/f32-gavgpool/mp7p7q-neon.c",
246 "src/f32-gavgpool/up7-neon.c",
247 "src/f32-gemm/1x8-neon-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700248 "src/f32-gemm/4x2-neon-ld64.c",
249 "src/f32-gemm/4x8-neon-ld128.c",
250 "src/f32-gemm/4x8-neon-ld64.c",
251 "src/f32-gemm/5x8-neon-ld64.c",
252 "src/f32-gemm/6x8-neon-ld64.c",
253 "src/f32-gemminc/1x8-neon-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700254 "src/f32-gemminc/4x8-neon-ld128.c",
255 "src/f32-gemminc/4x8-neon-ld64.c",
256 "src/f32-gemminc/5x8-neon-ld64.c",
257 "src/f32-gemminc/6x8-neon-ld64.c",
258 "src/f32-hswish/neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800259 "src/f32-igemm/1x8-neon-ld64.c",
260 "src/f32-igemm/4x2-neon-ld64.c",
261 "src/f32-igemm/4x4-neon-ld64.c",
262 "src/f32-igemm/4x8-neon-ld128.c",
263 "src/f32-igemm/4x8-neon-ld64.c",
264 "src/f32-igemm/6x8-neon-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700265 "src/f32-pavgpool/mp9p8q-neon.c",
266 "src/f32-pavgpool/up9-neon.c",
267 "src/f32-ppmm/4x8-neon.c",
268 "src/f32-ppmm/8x8-neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800269 "src/f32-prelu/neon-2x4.c",
270 "src/f32-prelu/neon-2x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700271 "src/f32-rmax/neon.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800272 "src/f32-vmulcaddc/c4-neon-2x.c",
273 "src/f32-vmulcaddc/c8-neon-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700274 "src/q8-avgpool/mp9p8q-neon.c",
275 "src/q8-avgpool/up9-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700276 "src/q8-dwconv/up8x9-neon.c",
277 "src/q8-gavgpool/mp7p7q-neon.c",
278 "src/q8-gavgpool/up7-neon.c",
279 "src/q8-gemm/4x8-neon.c",
280 "src/q8-gemm/8x8-neon.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800281 "src/q8-igemm/4x8-neon.c",
282 "src/q8-igemm/8x8-neon.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700283 "src/q8-vadd/neon.c",
284 "src/u8-clamp/neon.c",
285 "src/u8-maxpool/9p8q-neon.c",
286 "src/u8-rmax/neon.c",
287 "src/x32-packx/x4-neon-st4.c",
288 "src/x32-pad/x2-neon.c",
289 "src/x32-zip/x2-neon.c",
290 "src/x32-zip/x3-neon.c",
291 "src/x32-zip/x4-neon.c",
292 "src/x32-zip/xm-neon.c",
293 "src/x8-zip/x2-neon.c",
294 "src/x8-zip/x3-neon.c",
295 "src/x8-zip/x4-neon.c",
296 "src/x8-zip/xm-neon.c",
297]
298
299NEONFMA_UKERNELS = [
Frank Barcharddb45b6a2019-10-09 16:42:45 -0700300 "src/f32-igemm/1x8-neonfma-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700301 "src/f32-igemm/4x2-neonfma-ld64.c",
302 "src/f32-igemm/4x4-neonfma-ld64.c",
303 "src/f32-igemm/4x8-neonfma-ld128.c",
304 "src/f32-igemm/4x8-neonfma-ld64.c",
305 "src/f32-igemm/6x8-neonfma-ld64.c",
306 "src/f32-dwconv/up4x9-neonfma.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800307 "src/f32-dwconv/up4x9-neonfma-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700308 "src/f32-dwconv/up8x9-neonfma.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800309 "src/f32-dwconv/up8x9-neonfma-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700310 "src/f32-gemm/1x8-neonfma-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700311 "src/f32-gemm/4x2-neonfma-ld64.c",
312 "src/f32-gemm/4x8-neonfma-ld128.c",
313 "src/f32-gemm/4x8-neonfma-ld64.c",
314 "src/f32-gemm/5x8-neonfma-ld64.c",
315 "src/f32-gemm/6x8-neonfma-ld64.c",
316 "src/f32-gemminc/1x8-neonfma-ld64.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700317 "src/f32-gemminc/4x8-neonfma-ld128.c",
318 "src/f32-gemminc/4x8-neonfma-ld64.c",
319 "src/f32-gemminc/5x8-neonfma-ld64.c",
320 "src/f32-gemminc/6x8-neonfma-ld64.c",
321 "src/f32-hswish/neonfma.c",
322 "src/f32-ppmm/4x8-neonfma.c",
323 "src/f32-ppmm/8x8-neonfma.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800324 "src/f32-vmulcaddc/c4-neonfma-2x.c",
325 "src/f32-vmulcaddc/c8-neonfma-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700326]
327
328AARCH64_NEONFMA_UKERNELS = [
329 "src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
330 "src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
331 "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
332 "src/f32-dwconv-spchw/3x3p1-neonfma.c",
333 "src/f32-dwconv-spchw/5x5p2-neonfma.c",
334 "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
335 "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
336 "src/f32-spmm/12x1-neonfma.c",
337 "src/f32-spmm/12x2-neonfma.c",
338 "src/f32-spmm/12x4-neonfma.c",
339 "src/f32-spmm/16x1-neonfma-pipelined.c",
340 "src/f32-spmm/16x1-neonfma-unroll2.c",
341 "src/f32-spmm/16x1-neonfma.c",
342 "src/f32-spmm/16x2-neonfma.c",
343 "src/f32-spmm/16x4-neonfma.c",
344 "src/f32-spmm/4x1-neonfma-pipelined.c",
345 "src/f32-spmm/4x1-neonfma-unroll2.c",
346 "src/f32-spmm/4x1-neonfma.c",
347 "src/f32-spmm/4x2-neonfma.c",
348 "src/f32-spmm/4x4-neonfma.c",
349 "src/f32-spmm/8x1-neonfma-pipelined.c",
350 "src/f32-spmm/8x1-neonfma-unroll2.c",
351 "src/f32-spmm/8x1-neonfma.c",
352 "src/f32-spmm/8x2-neonfma.c",
353 "src/f32-spmm/8x4-neonfma.c",
354]
355
356AARCH64_NEONFP16ARITH_UKERNELS = [
357 "src/f16-gemm/4x8-neonfp16arith-ld64.c",
358 "src/f16-gemm/6x8-neonfp16arith-ld64.c",
359 "src/f16-gemm/8x8-neonfp16arith-ld64.c",
360]
361
362SSE_UKERNELS = [
363 "src/f32-avgpool/mp9p8q-sse.c",
364 "src/f32-avgpool/up9-sse.c",
365 "src/f32-clamp/sse.c",
366 "src/f32-igemm/1x8-sse-dup.c",
367 "src/f32-igemm/1x8-sse-load1.c",
368 "src/f32-igemm/1x8s4-sse.c",
369 "src/f32-igemm/4x2c4-sse.c",
370 "src/f32-igemm/4x8-sse-dup.c",
371 "src/f32-igemm/4x8-sse-load1.c",
372 "src/f32-igemm/4x8s4-sse.c",
373 "src/f32-dwconv/up4x25-sse.c",
374 "src/f32-dwconv/up4x4-sse.c",
375 "src/f32-dwconv/up4x9-sse.c",
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800376 "src/f32-dwconv/up4x25-sse-acc2.c",
377 "src/f32-dwconv/up4x4-sse-acc2.c",
378 "src/f32-dwconv/up4x9-sse-acc2.c",
379 "src/f32-dwconv/up8x25-sse.c",
380 "src/f32-dwconv/up8x4-sse.c",
381 "src/f32-dwconv/up8x9-sse.c",
382 "src/f32-dwconv/up8x25-sse-acc2.c",
383 "src/f32-dwconv/up8x4-sse-acc2.c",
384 "src/f32-dwconv/up8x9-sse-acc2.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700385 "src/f32-gavgpool-spchw/sse-x4.c",
386 "src/f32-gavgpool/mp7p7q-sse.c",
387 "src/f32-gavgpool/up7-sse.c",
388 "src/f32-gemm/1x8-sse-dup.c",
389 "src/f32-gemm/1x8-sse-load1.c",
390 "src/f32-gemm/1x8s4-sse.c",
391 "src/f32-gemm/4x8-sse-dup.c",
392 "src/f32-gemm/4x8-sse-load1.c",
393 "src/f32-gemm/4x8s4-sse.c",
394 "src/f32-gemminc/1x8-sse-dup.c",
395 "src/f32-gemminc/1x8-sse-load1.c",
396 "src/f32-gemminc/1x8s4-sse.c",
397 "src/f32-gemminc/4x8-sse-dup.c",
398 "src/f32-gemminc/4x8-sse-load1.c",
399 "src/f32-gemminc/4x8s4-sse.c",
400 "src/f32-hswish/sse.c",
401 "src/f32-maxpool/9p8q-sse.c",
402 "src/f32-pavgpool/mp9p8q-sse.c",
403 "src/f32-pavgpool/up9-sse.c",
404 "src/f32-dwconv-spchw/3x3p1-sse.c",
405 "src/f32-dwconv-spchw/3x3s2p1-sse.c",
406 "src/f32-ppmm/4x8-sse.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700407 "src/f32-rmax/sse.c",
408 "src/f32-spmm/4x1-sse.c",
409 "src/f32-spmm/8x1-sse.c",
410 "src/f32-vadd/sse.c",
411 "src/f32-vmul/sse.c",
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800412 "src/f32-vmulcaddc/c4-sse-2x.c",
413 "src/f32-vmulcaddc/c8-sse-2x.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700414 "src/f32-vsub/sse.c",
415 "src/x32-packx/x4-sse.c",
416]
417
418SSE2_UKERNELS = [
419 "src/f32-argmaxpool/mp9p8q-sse2.c",
420 "src/f32-argmaxpool/up4-sse2.c",
421 "src/f32-argmaxpool/up9-sse2.c",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800422 "src/f32-prelu/sse2-2x4.c",
423 "src/f32-prelu/sse2-2x8.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700424 "src/q8-avgpool/mp9p8q-sse2.c",
425 "src/q8-avgpool/up9-sse2.c",
426 "src/q8-igemm/4x4c2-sse2.c",
427 "src/q8-dwconv/up8x9-sse2.c",
428 "src/q8-gavgpool/mp7p7q-sse2.c",
429 "src/q8-gavgpool/up7-sse2.c",
430 "src/q8-gemm/2x4c8-sse2.c",
431 "src/q8-gemm/4x4c2-sse2.c",
432 "src/q8-vadd/sse2.c",
433 "src/u8-clamp/sse2.c",
434 "src/u8-maxpool/9p8q-sse2.c",
435 "src/u8-rmax/sse2.c",
436 "src/x32-pad/x2-sse2.c",
437 "src/x32-zip/x2-sse2.c",
438 "src/x32-zip/x3-sse2.c",
439 "src/x32-zip/x4-sse2.c",
440 "src/x32-zip/xm-sse2.c",
441 "src/x8-zip/x2-sse2.c",
442 "src/x8-zip/x3-sse2.c",
443 "src/x8-zip/x4-sse2.c",
444 "src/x8-zip/xm-sse2.c",
445]
446
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800447SSE41_UKERNELS = [
448 "src/f32-prelu/sse41-2x4.c",
449 "src/f32-prelu/sse41-2x8.c",
450]
451
Marat Dukhan08c4a432019-10-03 09:29:21 -0700452AVX_UKERNELS = [
453 "src/f32-rmax/avx.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700454 "src/f32-vscale/avx-unroll32.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700455]
456
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700457AVX2_UKERNELS = [
Marat Dukhan97579532019-10-18 16:40:39 -0700458 "src/f32-raddexpminusmax/avx2-p5-unroll64.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700459 "src/f32-raddextexp/avx2-p5-unroll64.c",
Marat Dukhan97579532019-10-18 16:40:39 -0700460 "src/f32-raddstoreexpminusmax/avx2-p5-unroll64.c",
461 "src/f32-vscaleexpminusmax/avx2-p5-unroll64.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700462 "src/f32-vscaleextexp/avx2-p5-unroll64.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700463 "src/math/exp-avx2-p5.c",
464 "src/math/exp-avx2-perm-p3.c",
465 "src/math/exp-avx2-perm-p4.c",
Marat Dukhan515c9772019-10-17 18:07:57 -0700466 "src/math/expminus-avx2-p5.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -0700467 "src/math/extexp-avx2-p5.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700468]
469
Marat Dukhan08c4a432019-10-03 09:29:21 -0700470AVX512F_UKERNELS = [
Marat Dukhan97579532019-10-18 16:40:39 -0700471 "src/f32-raddexpminusmax/avx512f-p5-scalef-unroll128.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700472 "src/f32-raddextexp/avx512f-p5-scalef-unroll128.c",
Marat Dukhan97579532019-10-18 16:40:39 -0700473 "src/f32-raddstoreexpminusmax/avx512f-p5-scalef-unroll128.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700474 "src/f32-rmax/avx512f.c",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700475 "src/f32-vscale/avx512f-unroll64.c",
Marat Dukhan97579532019-10-18 16:40:39 -0700476 "src/f32-vscaleexpminusmax/avx512f-p5-scalef-unroll128.c",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700477 "src/f32-vscaleextexp/avx512f-p5-scalef-unroll128.c",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700478 "src/math/exp-avx512f-p5-scalef.c",
479 "src/math/exp-avx512f-p5.c",
480 "src/math/exp-avx512f-perm-p3.c",
Marat Dukhanfeb49232019-10-28 11:03:31 -0700481 "src/math/exp-avx512f-perm2-p2.c",
Marat Dukhan98ba4412019-10-23 02:14:28 -0700482 "src/math/extexp-avx512f-p5.c",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700483]
484
485AARCH32_ASM_UKERNELS = [
486 "src/q8-dwconv/up8x9-aarch32-neon.S",
487]
488
489AARCH64_ASM_UKERNELS = [
490 "src/f32-dwconv/up4x9-aarch64-neonfma-cortex-a55.S",
491 "src/f32-dwconv/up4x9-aarch64-neonfma.S",
492 "src/f32-gemm/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -0700493 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700494 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a57.S",
495 "src/f32-gemm/1x8-aarch64-neonfma-cortex-a75.S",
496 "src/f32-gemm/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -0700497 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700498 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a57.S",
499 "src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S",
500 "src/f32-gemm/4x8-aarch64-neonfma-ld128.S",
501 "src/f32-gemm/4x8-aarch64-neonfma-ld64.S",
502 "src/f32-gemm/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -0700503 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700504 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a57.S",
505 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a73.S",
506 "src/f32-gemm/6x8-aarch64-neonfma-cortex-a75.S",
507 "src/f32-gemm/6x8-aarch64-neonfma-ld128.S",
508 "src/f32-gemm/6x8-aarch64-neonfma-ld64.S",
509 "src/f32-gemminc/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -0700510 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700511 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a57.S",
512 "src/f32-gemminc/1x8-aarch64-neonfma-cortex-a75.S",
513 "src/f32-gemminc/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -0700514 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700515 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a57.S",
516 "src/f32-gemminc/4x8-aarch64-neonfma-cortex-a75.S",
517 "src/f32-gemminc/4x8-aarch64-neonfma-ld128.S",
518 "src/f32-gemminc/4x8-aarch64-neonfma-ld64.S",
519 "src/f32-gemminc/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -0700520 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700521 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a57.S",
522 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a73.S",
523 "src/f32-gemminc/6x8-aarch64-neonfma-cortex-a75.S",
524 "src/f32-gemminc/6x8-aarch64-neonfma-ld128.S",
525 "src/f32-gemminc/6x8-aarch64-neonfma-ld64.S",
526 "src/f32-igemm/1x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard21be34f2019-10-09 19:32:19 -0700527 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700528 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a57.S",
529 "src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S",
530 "src/f32-igemm/4x12-aarch64-neonfma-cortex-a53.S",
Frank Barchard46fb8072019-10-25 12:54:22 -0700531 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700532 "src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S",
533 "src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S",
Frank Barcharda7fb8552019-10-23 17:14:17 -0700534 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a53.S",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700535 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a57.S",
536 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a73.S",
537 "src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S",
538]
539
540INTERNAL_MICROKERNEL_HDRS = [
541 "src/xnnpack/argmaxpool.h",
542 "src/xnnpack/avgpool.h",
543 "src/xnnpack/clamp.h",
544 "src/xnnpack/common.h",
545 "src/xnnpack/conv.h",
546 "src/xnnpack/dwconv.h",
547 "src/xnnpack/gavgpool.h",
548 "src/xnnpack/gemm.h",
549 "src/xnnpack/hswish.h",
550 "src/xnnpack/igemm.h",
551 "src/xnnpack/lut.h",
552 "src/xnnpack/math.h",
553 "src/xnnpack/maxpool.h",
554 "src/xnnpack/packx.h",
555 "src/xnnpack/pad.h",
556 "src/xnnpack/params.h",
557 "src/xnnpack/pavgpool.h",
558 "src/xnnpack/ppmm.h",
559 "src/xnnpack/prelu.h",
Marat Dukhan97579532019-10-18 16:40:39 -0700560 "src/xnnpack/raddexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700561 "src/xnnpack/raddextexp.h",
Marat Dukhan97579532019-10-18 16:40:39 -0700562 "src/xnnpack/raddstoreexpminusmax.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700563 "src/xnnpack/rmax.h",
564 "src/xnnpack/scalar-utils.h",
565 "src/xnnpack/spmm.h",
566 "src/xnnpack/unpool.h",
567 "src/xnnpack/vadd.h",
568 "src/xnnpack/vmul.h",
569 "src/xnnpack/vmulcaddc.h",
Marat Dukhan05ac8e32019-10-21 15:39:33 -0700570 "src/xnnpack/vscale.h",
Marat Dukhan97579532019-10-18 16:40:39 -0700571 "src/xnnpack/vscaleexpminusmax.h",
Marat Dukhan6f8d4d32019-10-25 17:07:09 -0700572 "src/xnnpack/vscaleextexp.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700573 "src/xnnpack/vsub.h",
574 "src/xnnpack/zip.h",
575]
576
577INTERNAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
578 "include/xnnpack.h",
579 "src/xnnpack/allocator.h",
580 "src/xnnpack/compute.h",
581 "src/xnnpack/im2col.h",
582 "src/xnnpack/indirection.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700583 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700584 "src/xnnpack/operator.h",
585 "src/xnnpack/pack.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700586 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700587 "src/xnnpack/requantization-stubs.h",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700588 "src/xnnpack/requantization.h",
589]
590
591ACCURACY_EVAL_HDRS = INTERNAL_MICROKERNEL_HDRS + [
592 "src/xnnpack/math-stubs.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700593]
594
595MICROKERNEL_BENCHMARK_HDRS = INTERNAL_MICROKERNEL_HDRS + [
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700596 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700597 "include/xnnpack.h",
598]
599
600MICROKERNEL_TEST_HDRS = INTERNAL_MICROKERNEL_HDRS + [
601 "src/xnnpack/isa-checks.h",
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700602 "src/xnnpack/params-init.h",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700603 "src/xnnpack/requantization.h",
604 "include/xnnpack.h",
605]
606
607OPERATOR_TEST_PARAMS_HDRS = [
608 "src/xnnpack/params.h",
609 "src/xnnpack/common.h",
610]
611
612WEIGHTS_PACK_HDRS = [
613 "src/xnnpack/pack.h",
614 "src/xnnpack/operator.h",
615 "src/xnnpack/compute.h",
616]
617
Marat Dukhanc8e00eb2019-10-04 14:55:26 -0700618LOGGING_COPTS = select({
619 # No logging in optimized mode
620 ":optimized_build": ["-DXNN_LOG_LEVEL=0"],
621 # Full logging in debug mode
622 ":debug_build": ["-DXNN_LOG_LEVEL=5"],
623 # Error-only logging in default (fastbuild) mode
624 "//conditions:default": ["-DXNN_LOG_LEVEL=2"],
625})
626
627LOGGING_HDRS = [
628 "src/xnnpack/log.h",
629]
630
Marat Dukhan08c4a432019-10-03 09:29:21 -0700631xnnpack_cc_library(
632 name = "scalar_ukernels",
633 srcs = SCALAR_UKERNELS,
634 hdrs = INTERNAL_HDRS,
635 aarch32_copts = ["-marm"],
636 copts = xnnpack_std_copts(),
637 deps = [
638 "@FP16",
639 "@FXdiv",
640 ],
641)
642
643xnnpack_cc_library(
644 name = "psimd_ukernels",
645 srcs = PSIMD_UKERNELS,
646 hdrs = INTERNAL_HDRS,
647 aarch32_copts = [
648 "-marm",
649 "-mfpu=neon",
650 ],
651 copts = xnnpack_std_copts(),
652 optimized_copts = [
653 "-O3",
654 "-ffast-math",
655 ],
656 deps = [
657 "@FP16",
658 "@psimd",
659 ],
660)
661
662xnnpack_cc_library(
663 name = "neon_ukernels",
664 hdrs = INTERNAL_HDRS,
665 aarch32_copts = [
666 "-marm",
667 "-mfpu=neon",
668 ],
669 aarch32_srcs = NEON_UKERNELS,
670 aarch64_srcs = NEON_UKERNELS,
671 copts = xnnpack_std_copts(),
672 deps = ["@FP16"],
673)
674
675xnnpack_cc_library(
676 name = "neonfma_ukernels",
677 hdrs = INTERNAL_HDRS,
678 aarch32_copts = [
679 "-marm",
680 "-mfpu=neon-vfpv4",
681 ],
682 aarch32_srcs = NEONFMA_UKERNELS,
683 aarch64_srcs = NEONFMA_UKERNELS + AARCH64_NEONFMA_UKERNELS,
684 copts = xnnpack_std_copts(),
685 deps = ["@FP16"],
686)
687
688xnnpack_cc_library(
689 name = "neonfp16arith_ukernels",
690 hdrs = INTERNAL_HDRS,
691 aarch64_copts = ["-march=armv8.2-a+fp16"],
692 aarch64_srcs = AARCH64_NEONFP16ARITH_UKERNELS,
693 copts = xnnpack_std_copts(),
694 deps = ["@FP16"],
695)
696
697xnnpack_cc_library(
698 name = "sse2_ukernels",
699 hdrs = INTERNAL_HDRS,
700 copts = xnnpack_std_copts(),
701 x86_copts = ["-msse2"],
702 x86_srcs = SSE_UKERNELS + SSE2_UKERNELS,
703 deps = ["@FP16"],
704)
705
706xnnpack_cc_library(
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800707 name = "sse41_ukernels",
708 hdrs = INTERNAL_HDRS,
709 copts = xnnpack_std_copts(),
710 x86_copts = ["-msse4.1"],
711 x86_srcs = SSE41_UKERNELS,
712 deps = ["@FP16"],
713)
714
715xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700716 name = "avx_ukernels",
717 hdrs = INTERNAL_HDRS,
718 copts = xnnpack_std_copts(),
719 x86_copts = ["-mavx"],
720 x86_srcs = AVX_UKERNELS,
721 deps = ["@FP16"],
722)
723
724xnnpack_cc_library(
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700725 name = "avx2_ukernels",
726 hdrs = INTERNAL_HDRS,
727 copts = xnnpack_std_copts(),
728 x86_copts = [
729 "-mfma",
730 "-mavx2",
731 ],
732 x86_srcs = AVX2_UKERNELS,
733 deps = ["@FP16"],
734)
735
736xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700737 name = "avx512f_ukernels",
738 hdrs = INTERNAL_HDRS,
739 copts = xnnpack_std_copts(),
740 x86_copts = ["-mavx512f"],
741 x86_srcs = AVX512F_UKERNELS,
742 deps = ["@FP16"],
743)
744
745xnnpack_cc_library(
746 name = "asm_ukernels",
747 hdrs = ["src/xnnpack/assembly.h"],
748 aarch32_srcs = AARCH32_ASM_UKERNELS,
749 aarch64_srcs = AARCH64_ASM_UKERNELS,
750)
751
752xnnpack_aggregate_library(
753 name = "ukernels",
754 aarch32_deps = [
755 ":psimd_ukernels",
756 ":neon_ukernels",
757 ":neonfma_ukernels",
758 ":asm_ukernels",
759 ],
760 aarch64_deps = [
761 ":psimd_ukernels",
762 ":neon_ukernels",
763 ":neonfma_ukernels",
764 ":neonfp16arith_ukernels",
765 ":asm_ukernels",
766 ],
767 generic_deps = [":scalar_ukernels"],
768 wasmsimd_deps = [
769 ":psimd_ukernels",
770 ],
771 x86_deps = [
772 ":psimd_ukernels",
773 ":sse2_ukernels",
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800774 ":sse41_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700775 ":avx_ukernels",
Marat Dukhan6adff4e2019-10-14 18:32:07 -0700776 ":avx2_ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700777 ":avx512f_ukernels",
778 ],
779)
780
781xnnpack_cc_library(
782 name = "im2col",
783 srcs = ["src/im2col.c"],
784 hdrs = [
785 "src/xnnpack/common.h",
786 "src/xnnpack/im2col.h",
787 ],
788 copts = xnnpack_std_copts(),
789)
790
791xnnpack_cc_library(
792 name = "indirection",
793 srcs = ["src/indirection.c"],
794 hdrs = INTERNAL_HDRS,
795 copts = xnnpack_std_copts(),
796 deps = [
797 "@FP16",
798 "@FXdiv",
799 "@pthreadpool",
800 ],
801)
802
803xnnpack_cc_library(
804 name = "operator_run",
805 srcs = ["src/operator-run.c"],
Marat Dukhanc8e00eb2019-10-04 14:55:26 -0700806 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
807 copts = xnnpack_std_copts() + LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -0700808 # Wrappers for multi-pass microkernels use VLAs for temporary buffers.
809 "-Wno-vla",
810 ],
811 deps = [
812 "@FP16",
813 "@FXdiv",
814 "@clog",
815 "@pthreadpool",
816 ],
817)
818
819cc_library(
820 name = "enable_assembly",
821 defines = select({
822 ":xnn_enable_assembly_explicit_true": ["XNN_ENABLE_ASSEMBLY=1"],
823 ":xnn_enable_assembly_explicit_false": ["XNN_ENABLE_ASSEMBLY=0"],
Frank Barchard810171d2019-10-10 10:34:51 -0700824 "//conditions:default": ["XNN_ENABLE_ASSEMBLY=1"],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700825 }),
826)
827
Marat Dukhancf056b22019-10-07 10:26:29 -0700828xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700829 name = "operators",
830 srcs = OPERATOR_SRCS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -0700831 "src/operator-delete.c",
Marat Dukhancf056b22019-10-07 10:26:29 -0700832 ],
833 hdrs = INTERNAL_HDRS + LOGGING_HDRS,
Marat Dukhanc8e00eb2019-10-04 14:55:26 -0700834 copts = xnnpack_std_copts() + LOGGING_COPTS + [
Marat Dukhan08c4a432019-10-03 09:29:21 -0700835 "-Isrc",
836 "-Iinclude",
837 ] + select({
838 ":debug_build": [],
839 "//conditions:default": xnnpack_min_size_copts(),
840 }),
Marat Dukhancf056b22019-10-07 10:26:29 -0700841 wasm_srcs = ["src/wasm-stubs.c"],
842 wasmsimd_srcs = ["src/wasm-stubs.c"],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700843 deps = [
Marat Dukhan08c4a432019-10-03 09:29:21 -0700844 ":indirection",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700845 "@FP16",
846 "@FXdiv",
847 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700848 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700849 ],
850)
851
852cc_library(
853 name = "XNNPACK",
854 srcs = [
855 "src/init.c",
856 ],
857 copts = xnnpack_std_copts() + LOGGING_COPTS + [
858 "-Isrc",
859 "-Iinclude",
860 ] + select({
861 ":debug_build": [],
862 "//conditions:default": xnnpack_min_size_copts(),
863 }),
864 includes = ["include"],
865 linkstatic = True,
866 textual_hdrs = ["include/xnnpack.h"],
867 visibility = xnnpack_visibility(),
868 deps = [
869 ":enable_assembly",
870 ":ukernels",
871 ":operator_run",
872 ":operators",
873 "@clog",
874 "@pthreadpool",
Marat Dukhand343c222019-10-07 09:22:14 -0700875 ] + select({
876 ":emscripten": [],
877 "//conditions:default": ["@cpuinfo"],
878 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -0700879)
880
881cc_library(
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700882 name = "xnnpack_operators_nhwc_f32",
883 srcs = [
884 "src/init.c",
885 ],
886 copts = xnnpack_std_copts() + LOGGING_COPTS + [
887 "-Isrc",
888 "-Iinclude",
889 ] + select({
890 ":debug_build": [],
891 "//conditions:default": xnnpack_min_size_copts(),
892 }),
893 defines = [
894 "XNN_NO_Q8_OPERATORS",
895 "XNN_NO_U8_OPERATORS",
896 "XNN_NO_X8_OPERATORS",
897 "XNN_NO_SPNCHW_OPERATORS",
898 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700899 includes = ["include"],
900 linkstatic = True,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700901 textual_hdrs = ["include/xnnpack.h"],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700902 visibility = xnnpack_visibility(),
903 deps = [
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700904 ":enable_assembly",
905 ":ukernels",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700906 ":operator_run",
907 ":operators",
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700908 "@clog",
Marat Dukhan08c4a432019-10-03 09:29:21 -0700909 "@pthreadpool",
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700910 ] + select({
911 ":emscripten": [],
912 "//conditions:default": ["@cpuinfo"],
913 }),
Marat Dukhan08c4a432019-10-03 09:29:21 -0700914)
915
Marat Dukhancf056b22019-10-07 10:26:29 -0700916xnnpack_cc_library(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700917 name = "bench_utils",
918 srcs = ["bench/utils.cc"],
919 hdrs = ["bench/utils.h"],
920 copts = ["-Wno-unused-result"],
Marat Dukhanbad48fe2019-11-04 10:35:22 -0800921 deps = [
922 "@com_google_benchmark//:benchmark",
923 "@cpuinfo",
924 ],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700925)
926
Frank Barchard7e955972019-10-11 10:34:25 -0700927######################### Benchmarks for micro-kernels #########################
Marat Dukhan08c4a432019-10-03 09:29:21 -0700928
929xnnpack_benchmark(
930 name = "q8_gemm_bench",
931 srcs = [
932 "bench/gemm.h",
933 "bench/q8-gemm.cc",
934 "src/xnnpack/AlignedAllocator.h",
935 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
936 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts() + xnnpack_optional_gemmlowp_copts(),
937 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps() + xnnpack_optional_gemmlowp_deps(),
938)
939
940xnnpack_benchmark(
941 name = "f16_gemm_bench",
942 srcs = [
943 "bench/f16-gemm.cc",
944 "bench/gemm.h",
945 "src/xnnpack/AlignedAllocator.h",
946 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
947 copts = ["-Wno-unused-function"],
948 deps = MICROKERNEL_BENCHMARK_DEPS,
949)
950
951xnnpack_benchmark(
952 name = "f32_igemm_bench",
953 srcs = [
954 "bench/f32-igemm.cc",
955 "bench/conv.h",
956 "src/xnnpack/AlignedAllocator.h",
957 ] + MICROKERNEL_BENCHMARK_HDRS,
Frank Barchard7e955972019-10-11 10:34:25 -0700958 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
Marat Dukhan08c4a432019-10-03 09:29:21 -0700959)
960
961xnnpack_benchmark(
962 name = "f32_conv_hwc_bench",
963 srcs = [
964 "bench/f32-conv-hwc.cc",
965 "bench/dconv.h",
966 "src/xnnpack/AlignedAllocator.h",
967 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
968 copts = ["-Wno-unused-function"],
969 deps = MICROKERNEL_BENCHMARK_DEPS,
970)
971
972xnnpack_benchmark(
Erich Elsen563df5f2019-10-23 08:02:21 -0700973 name = "f32_conv_hwc2spchw_bench",
974 srcs = [
975 "bench/f32-conv-hwc2spchw.cc",
976 "bench/dconv.h",
977 "src/xnnpack/AlignedAllocator.h",
978 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
979 copts = ["-Wno-unused-function"],
980 deps = MICROKERNEL_BENCHMARK_DEPS,
981)
982
983xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -0700984 name = "f32_dwconv_bench",
985 srcs = [
986 "bench/f32-dwconv.cc",
987 "bench/dwconv.h",
988 "src/xnnpack/AlignedAllocator.h",
989 ] + MICROKERNEL_BENCHMARK_HDRS,
990 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
991)
992
993xnnpack_benchmark(
994 name = "f32_dwconv_spchw_bench",
995 srcs = [
996 "bench/f32-dwconv-spchw.cc",
997 "bench/dwconv.h",
998 "src/xnnpack/AlignedAllocator.h",
999 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1000 deps = MICROKERNEL_BENCHMARK_DEPS + [":indirection"],
1001)
1002
1003xnnpack_benchmark(
1004 name = "f32_gemm_bench",
1005 srcs = [
1006 "bench/f32-gemm.cc",
1007 "bench/gemm.h",
1008 "src/xnnpack/AlignedAllocator.h",
1009 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1010 copts = ["-Wno-unused-function"] + xnnpack_optional_ruy_copts(),
Frank Barchard7e955972019-10-11 10:34:25 -07001011 deps = MICROKERNEL_BENCHMARK_DEPS + xnnpack_optional_ruy_deps(),
Marat Dukhan08c4a432019-10-03 09:29:21 -07001012)
1013
1014xnnpack_benchmark(
1015 name = "f32_rmax_bench",
1016 srcs = [
1017 "bench/f32-rmax.cc",
1018 "src/xnnpack/AlignedAllocator.h",
1019 ] + MICROKERNEL_BENCHMARK_HDRS,
1020 deps = MICROKERNEL_BENCHMARK_DEPS,
1021)
1022
1023xnnpack_benchmark(
1024 name = "f32_spmm_bench",
1025 srcs = [
1026 "bench/f32-spmm.cc",
1027 "bench/gemm.h",
1028 "src/xnnpack/AlignedAllocator.h",
1029 ] + MICROKERNEL_BENCHMARK_HDRS,
1030 copts = ["-Wno-unused-function"],
1031 deps = MICROKERNEL_BENCHMARK_DEPS,
1032)
1033
1034xnnpack_benchmark(
Marat Dukhan4a4a7fa2019-10-21 13:46:14 -07001035 name = "f32_softargmax_bench",
1036 srcs = [
1037 "bench/f32-softargmax.cc",
1038 ] + MICROKERNEL_BENCHMARK_HDRS,
1039 copts = ["-Wno-unused-function"],
1040 deps = MICROKERNEL_BENCHMARK_DEPS,
1041)
1042
1043xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001044 name = "f32_im2col_gemm_bench",
1045 srcs = [
1046 "bench/f32-im2col-gemm.cc",
1047 "bench/conv.h",
1048 "src/xnnpack/AlignedAllocator.h",
1049 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
1050 deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
1051)
1052
1053########################### Benchmarks for operators ###########################
1054
1055xnnpack_benchmark(
1056 name = "add_bench",
1057 srcs = ["bench/add.cc"],
1058 deps = OPERATOR_BENCHMARK_DEPS,
1059)
1060
1061xnnpack_benchmark(
1062 name = "average_pooling_bench",
1063 srcs = ["bench/average-pooling.cc"],
1064 deps = OPERATOR_BENCHMARK_DEPS,
1065)
1066
1067xnnpack_benchmark(
1068 name = "channel_shuffle_bench",
1069 srcs = ["bench/channel-shuffle.cc"],
1070 deps = OPERATOR_BENCHMARK_DEPS,
1071)
1072
1073xnnpack_benchmark(
1074 name = "convolution_bench",
1075 srcs = ["bench/convolution.cc"],
1076 copts = xnnpack_optional_tflite_copts() + xnnpack_optional_armcl_copts(),
1077 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps() + xnnpack_optional_armcl_deps(),
1078)
1079
1080xnnpack_benchmark(
1081 name = "deconvolution_bench",
1082 srcs = ["bench/deconvolution.cc"],
1083 copts = xnnpack_optional_tflite_copts(),
1084 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
1085)
1086
1087xnnpack_benchmark(
1088 name = "global_average_pooling_bench",
1089 srcs = ["bench/global-average-pooling.cc"],
1090 deps = OPERATOR_BENCHMARK_DEPS,
1091)
1092
1093xnnpack_benchmark(
1094 name = "max_pooling_bench",
1095 srcs = ["bench/max-pooling.cc"],
1096 deps = OPERATOR_BENCHMARK_DEPS,
1097)
1098
1099xnnpack_benchmark(
1100 name = "sigmoid_bench",
1101 srcs = ["bench/sigmoid.cc"],
1102 deps = OPERATOR_BENCHMARK_DEPS,
1103)
1104
1105xnnpack_benchmark(
Marat Dukhan95b22432019-10-30 16:30:14 -07001106 name = "prelu_bench",
1107 srcs = ["bench/prelu.cc"],
1108 copts = xnnpack_optional_tflite_copts(),
1109 deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
1110)
1111
1112xnnpack_benchmark(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001113 name = "softargmax_bench",
1114 srcs = ["bench/softargmax.cc"],
1115 deps = OPERATOR_BENCHMARK_DEPS,
1116)
1117
Marat Dukhanc068bb62019-10-04 13:24:39 -07001118############################# End-to-end benchmarks ############################
1119
1120cc_library(
1121 name = "mobilenet_v1",
1122 srcs = ["models/mobilenet-v1.cc"],
1123 hdrs = ["models/models.h"],
1124 linkstatic = True,
1125 deps = [
1126 ":XNNPACK",
1127 "@pthreadpool",
1128 ],
1129)
1130
1131cc_library(
1132 name = "mobilenet_v2",
1133 srcs = ["models/mobilenet-v2.cc"],
1134 hdrs = ["models/models.h"],
1135 linkstatic = True,
1136 deps = [
1137 ":XNNPACK",
1138 "@pthreadpool",
1139 ],
1140)
1141
1142xnnpack_benchmark(
Marat Dukhanef4416e2019-10-31 13:44:40 -07001143 name = "f32_dwconv_e2e_bench",
1144 srcs = ["bench/f32-dwconv-e2e.cc"] + MICROKERNEL_BENCHMARK_HDRS,
1145 copts = ["-Wno-unused-function"],
1146 deps = MICROKERNEL_BENCHMARK_DEPS + [
1147 ":XNNPACK",
1148 ":mobilenet_v1",
1149 ":mobilenet_v2",
1150 ],
1151)
1152
1153xnnpack_benchmark(
Marat Dukhan5f18d262019-10-31 10:24:14 -07001154 name = "f32_gemm_e2e_bench",
1155 srcs = ["bench/f32-gemm-e2e.cc"] + MICROKERNEL_BENCHMARK_HDRS,
1156 copts = ["-Wno-unused-function"],
1157 deps = MICROKERNEL_BENCHMARK_DEPS + [
1158 ":XNNPACK",
1159 ":mobilenet_v1",
1160 ":mobilenet_v2",
1161 ],
1162)
1163
1164xnnpack_benchmark(
Marat Dukhanc068bb62019-10-04 13:24:39 -07001165 name = "end2end_bench",
1166 srcs = ["bench/end2end.cc"],
1167 deps = [
1168 ":XNNPACK",
Frank Barchardc712fa42019-10-31 14:00:21 -07001169 ":bench_utils",
Marat Dukhanc068bb62019-10-04 13:24:39 -07001170 ":mobilenet_v1",
1171 ":mobilenet_v2",
1172 "@pthreadpool",
1173 ],
1174)
1175
Marat Dukhan6adff4e2019-10-14 18:32:07 -07001176#################### Accuracy evaluation for math functions ####################
1177
1178xnnpack_benchmark(
1179 name = "f32_exp_eval",
1180 srcs = [
1181 "eval/f32-exp.cc",
1182 "src/xnnpack/AlignedAllocator.h",
1183 ] + ACCURACY_EVAL_HDRS,
1184 deps = ACCURACY_EVAL_DEPS,
1185)
1186
Marat Dukhan515c9772019-10-17 18:07:57 -07001187xnnpack_benchmark(
1188 name = "f32_expminus_eval",
1189 srcs = [
1190 "eval/f32-expminus.cc",
1191 "src/xnnpack/AlignedAllocator.h",
1192 ] + ACCURACY_EVAL_HDRS,
1193 deps = ACCURACY_EVAL_DEPS,
1194)
1195
Marat Dukhan98ba4412019-10-23 02:14:28 -07001196xnnpack_benchmark(
1197 name = "f32_extexp_eval",
1198 srcs = [
1199 "eval/f32-extexp.cc",
1200 "src/xnnpack/AlignedAllocator.h",
1201 ] + ACCURACY_EVAL_HDRS,
1202 deps = ACCURACY_EVAL_DEPS,
1203)
1204
Marat Dukhan08c4a432019-10-03 09:29:21 -07001205######################### Unit tests for micro-kernels #########################
1206
1207xnnpack_unit_test(
1208 name = "f16_gemm_test",
1209 srcs = [
1210 "test/f16-gemm.cc",
1211 "test/gemm-microkernel-tester.h",
1212 "src/xnnpack/AlignedAllocator.h",
1213 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1214 deps = MICROKERNEL_TEST_DEPS,
1215)
1216
1217xnnpack_unit_test(
1218 name = "f32_argmaxpool_test",
1219 srcs = [
1220 "test/f32-argmaxpool.cc",
1221 "test/argmaxpool-microkernel-tester.h",
1222 "src/xnnpack/AlignedAllocator.h",
1223 ] + MICROKERNEL_TEST_HDRS,
1224 deps = MICROKERNEL_TEST_DEPS,
1225)
1226
1227xnnpack_unit_test(
1228 name = "f32_avgpool_test",
1229 srcs = [
1230 "test/f32-avgpool.cc",
1231 "test/avgpool-microkernel-tester.h",
1232 "src/xnnpack/AlignedAllocator.h",
1233 ] + MICROKERNEL_TEST_HDRS,
1234 deps = MICROKERNEL_TEST_DEPS,
1235)
1236
1237xnnpack_unit_test(
1238 name = "f32_clamp_test",
1239 srcs = [
1240 "test/f32-clamp.cc",
1241 "test/clamp-microkernel-tester.h",
1242 ] + MICROKERNEL_TEST_HDRS,
1243 deps = MICROKERNEL_TEST_DEPS,
1244)
1245
1246xnnpack_unit_test(
1247 name = "f32_igemm_test",
1248 srcs = [
1249 "test/f32-igemm.cc",
1250 "test/gemm-microkernel-tester.h",
1251 "src/xnnpack/AlignedAllocator.h",
1252 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1253 deps = MICROKERNEL_TEST_DEPS,
1254)
1255
1256xnnpack_unit_test(
1257 name = "f32_conv_hwc_test",
1258 srcs = [
1259 "test/f32-conv-hwc.cc",
1260 "test/conv-hwc-microkernel-tester.h",
1261 "src/xnnpack/AlignedAllocator.h",
1262 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1263 deps = MICROKERNEL_TEST_DEPS,
1264)
1265
1266xnnpack_unit_test(
1267 name = "f32_conv_hwc2spchw_test",
1268 srcs = [
1269 "test/f32-conv-hwc2spchw.cc",
1270 "test/conv-hwc2spchw-microkernel-tester.h",
1271 "src/xnnpack/AlignedAllocator.h",
1272 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1273 deps = MICROKERNEL_TEST_DEPS,
1274)
1275
1276xnnpack_unit_test(
1277 name = "f32_dwconv_test",
1278 srcs = [
1279 "test/f32-dwconv.cc",
1280 "test/dwconv-microkernel-tester.h",
1281 "src/xnnpack/AlignedAllocator.h",
1282 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1283 deps = MICROKERNEL_TEST_DEPS,
1284)
1285
1286xnnpack_unit_test(
1287 name = "f32_dwconv_spchw_test",
1288 srcs = [
1289 "test/f32-dwconv-spchw.cc",
1290 "test/dwconv-spchw-microkernel-tester.h",
1291 "src/xnnpack/AlignedAllocator.h",
1292 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1293 deps = MICROKERNEL_TEST_DEPS,
1294)
1295
1296xnnpack_unit_test(
1297 name = "f32_gavgpool_test",
1298 srcs = [
1299 "test/f32-gavgpool.cc",
1300 "test/gavgpool-microkernel-tester.h",
1301 "src/xnnpack/AlignedAllocator.h",
1302 ] + MICROKERNEL_TEST_HDRS,
1303 deps = MICROKERNEL_TEST_DEPS,
1304)
1305
1306xnnpack_unit_test(
1307 name = "f32_gavgpool_spchw_test",
1308 srcs = [
1309 "test/f32-gavgpool-spchw.cc",
1310 "test/gavgpool-spchw-microkernel-tester.h",
1311 "src/xnnpack/AlignedAllocator.h",
1312 ] + MICROKERNEL_TEST_HDRS,
1313 deps = MICROKERNEL_TEST_DEPS,
1314)
1315
1316xnnpack_unit_test(
1317 name = "f32_gemm_test",
1318 srcs = [
1319 "test/f32-gemm.cc",
1320 "test/gemm-microkernel-tester.h",
1321 "src/xnnpack/AlignedAllocator.h",
1322 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1323 deps = MICROKERNEL_TEST_DEPS,
1324)
1325
1326xnnpack_unit_test(
1327 name = "f32_gemminc_test",
1328 srcs = [
1329 "test/f32-gemminc.cc",
1330 "test/gemm-microkernel-tester.h",
1331 "src/xnnpack/AlignedAllocator.h",
1332 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1333 deps = MICROKERNEL_TEST_DEPS,
1334)
1335
1336xnnpack_unit_test(
1337 name = "f32_hswish_test",
1338 srcs = [
1339 "test/f32-hswish.cc",
1340 "test/hswish-microkernel-tester.h",
1341 ] + MICROKERNEL_TEST_HDRS,
1342 deps = MICROKERNEL_TEST_DEPS,
1343)
1344
1345xnnpack_unit_test(
1346 name = "f32_maxpool_test",
1347 srcs = [
1348 "test/f32-maxpool.cc",
1349 "test/maxpool-microkernel-tester.h",
1350 ] + MICROKERNEL_TEST_HDRS,
1351 deps = MICROKERNEL_TEST_DEPS,
1352)
1353
1354xnnpack_unit_test(
1355 name = "f32_pavgpool_test",
1356 srcs = [
1357 "test/f32-pavgpool.cc",
1358 "test/avgpool-microkernel-tester.h",
1359 "src/xnnpack/AlignedAllocator.h",
1360 ] + MICROKERNEL_TEST_HDRS,
1361 deps = MICROKERNEL_TEST_DEPS,
1362)
1363
1364xnnpack_unit_test(
1365 name = "f32_ppmm_test",
1366 srcs = [
1367 "test/f32-ppmm.cc",
1368 "test/gemm-microkernel-tester.h",
1369 "src/xnnpack/AlignedAllocator.h",
1370 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1371 deps = MICROKERNEL_TEST_DEPS,
1372)
1373
1374xnnpack_unit_test(
1375 name = "f32_prelu_test",
1376 srcs = [
1377 "test/f32-prelu.cc",
1378 "test/prelu-microkernel-tester.h",
1379 "src/xnnpack/AlignedAllocator.h",
1380 ] + MICROKERNEL_TEST_HDRS,
1381 deps = MICROKERNEL_TEST_DEPS,
1382)
1383
1384xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001385 name = "f32_raddexpminusmax_test",
1386 srcs = [
1387 "test/f32-raddexpminusmax.cc",
1388 "test/raddexpminusmax-microkernel-tester.h",
1389 ] + MICROKERNEL_TEST_HDRS,
1390 deps = MICROKERNEL_TEST_DEPS,
1391)
1392
1393xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001394 name = "f32_raddextexp_test",
1395 srcs = [
1396 "test/f32-raddextexp.cc",
1397 "test/raddextexp-microkernel-tester.h",
1398 ] + MICROKERNEL_TEST_HDRS,
1399 deps = MICROKERNEL_TEST_DEPS,
1400)
1401
1402xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001403 name = "f32_raddstoreexpminusmax_test",
1404 srcs = [
1405 "test/f32-raddstoreexpminusmax.cc",
1406 "test/raddstoreexpminusmax-microkernel-tester.h",
1407 ] + MICROKERNEL_TEST_HDRS,
1408 deps = MICROKERNEL_TEST_DEPS,
1409)
1410
1411xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001412 name = "f32_rmax_test",
1413 srcs = [
1414 "test/f32-rmax.cc",
1415 "test/rmax-microkernel-tester.h",
1416 ] + MICROKERNEL_TEST_HDRS,
1417 deps = MICROKERNEL_TEST_DEPS,
1418)
1419
1420xnnpack_unit_test(
1421 name = "f32_spmm_test",
1422 srcs = [
1423 "test/f32-spmm.cc",
1424 "test/spmm-microkernel-tester.h",
1425 "src/xnnpack/AlignedAllocator.h",
1426 ] + MICROKERNEL_TEST_HDRS,
1427 deps = MICROKERNEL_TEST_DEPS,
1428)
1429
1430xnnpack_unit_test(
1431 name = "f32_vadd_test",
1432 srcs = [
1433 "test/f32-vadd.cc",
1434 "test/vadd-microkernel-tester.h",
1435 ] + MICROKERNEL_TEST_HDRS,
1436 deps = MICROKERNEL_TEST_DEPS,
1437)
1438
1439xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001440 name = "f32_vmul_test",
1441 srcs = [
1442 "test/f32-vmul.cc",
1443 "test/vmul-microkernel-tester.h",
1444 ] + MICROKERNEL_TEST_HDRS,
1445 deps = MICROKERNEL_TEST_DEPS,
1446)
1447
1448xnnpack_unit_test(
1449 name = "f32_vmulcaddc_test",
1450 srcs = [
1451 "test/f32-vmulcaddc.cc",
1452 "test/vmulcaddc-microkernel-tester.h",
1453 "src/xnnpack/AlignedAllocator.h",
1454 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1455 deps = MICROKERNEL_TEST_DEPS,
1456)
1457
1458xnnpack_unit_test(
Marat Dukhan05ac8e32019-10-21 15:39:33 -07001459 name = "f32_vscale_test",
1460 srcs = [
1461 "test/f32-vscale.cc",
1462 "test/vscale-microkernel-tester.h",
1463 ] + MICROKERNEL_TEST_HDRS,
1464 deps = MICROKERNEL_TEST_DEPS,
1465)
1466
1467xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001468 name = "f32_vscaleexpminusmax_test",
1469 srcs = [
1470 "test/f32-vscaleexpminusmax.cc",
1471 "test/vscaleexpminusmax-microkernel-tester.h",
1472 ] + MICROKERNEL_TEST_HDRS,
1473 deps = MICROKERNEL_TEST_DEPS,
1474)
1475
1476xnnpack_unit_test(
Marat Dukhan6f8d4d32019-10-25 17:07:09 -07001477 name = "f32_vscaleextexp_test",
1478 srcs = [
1479 "test/f32-vscaleextexp.cc",
1480 "test/vscaleextexp-microkernel-tester.h",
1481 ] + MICROKERNEL_TEST_HDRS,
1482 deps = MICROKERNEL_TEST_DEPS,
1483)
1484
1485xnnpack_unit_test(
Marat Dukhan97579532019-10-18 16:40:39 -07001486 name = "f32_vsub_test",
1487 srcs = [
1488 "test/f32-vsub.cc",
1489 "test/vsub-microkernel-tester.h",
1490 ] + MICROKERNEL_TEST_HDRS,
1491 deps = MICROKERNEL_TEST_DEPS,
1492)
1493
1494xnnpack_unit_test(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001495 name = "q8_avgpool_test",
1496 srcs = [
1497 "test/q8-avgpool.cc",
1498 "test/avgpool-microkernel-tester.h",
1499 "src/xnnpack/AlignedAllocator.h",
1500 ] + MICROKERNEL_TEST_HDRS,
1501 deps = MICROKERNEL_TEST_DEPS,
1502)
1503
1504xnnpack_unit_test(
1505 name = "q8_igemm_test",
1506 srcs = [
1507 "test/q8-igemm.cc",
1508 "test/gemm-microkernel-tester.h",
1509 "src/xnnpack/AlignedAllocator.h",
1510 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1511 deps = MICROKERNEL_TEST_DEPS,
1512)
1513
1514xnnpack_unit_test(
1515 name = "q8_dwconv_test",
1516 srcs = [
1517 "test/q8-dwconv.cc",
1518 "test/dwconv-microkernel-tester.h",
1519 "src/xnnpack/AlignedAllocator.h",
1520 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1521 deps = MICROKERNEL_TEST_DEPS,
1522)
1523
1524xnnpack_unit_test(
1525 name = "q8_gavgpool_test",
1526 srcs = [
1527 "test/q8-gavgpool.cc",
1528 "test/gavgpool-microkernel-tester.h",
1529 "src/xnnpack/AlignedAllocator.h",
1530 ] + MICROKERNEL_TEST_HDRS,
1531 deps = MICROKERNEL_TEST_DEPS,
1532)
1533
1534xnnpack_unit_test(
1535 name = "q8_gemm_test",
1536 srcs = [
1537 "test/q8-gemm.cc",
1538 "test/gemm-microkernel-tester.h",
1539 "src/xnnpack/AlignedAllocator.h",
1540 ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
1541 deps = MICROKERNEL_TEST_DEPS,
1542)
1543
1544xnnpack_unit_test(
1545 name = "q8_vadd_test",
1546 srcs = [
1547 "test/q8-vadd.cc",
1548 "test/vadd-microkernel-tester.h",
1549 ] + MICROKERNEL_TEST_HDRS,
1550 deps = MICROKERNEL_TEST_DEPS,
1551)
1552
1553xnnpack_unit_test(
1554 name = "u8_clamp_test",
1555 srcs = [
1556 "test/u8-clamp.cc",
1557 "test/clamp-microkernel-tester.h",
1558 ] + MICROKERNEL_TEST_HDRS,
1559 deps = MICROKERNEL_TEST_DEPS,
1560)
1561
1562xnnpack_unit_test(
1563 name = "u8_lut32norm_test",
1564 srcs = [
1565 "test/u8-lut32norm.cc",
1566 "test/lut-norm-microkernel-tester.h",
1567 ] + MICROKERNEL_TEST_HDRS,
1568 deps = MICROKERNEL_TEST_DEPS,
1569)
1570
1571xnnpack_unit_test(
1572 name = "u8_maxpool_test",
1573 srcs = [
1574 "test/u8-maxpool.cc",
1575 "test/maxpool-microkernel-tester.h",
1576 ] + MICROKERNEL_TEST_HDRS,
1577 deps = MICROKERNEL_TEST_DEPS,
1578)
1579
1580xnnpack_unit_test(
1581 name = "u8_rmax_test",
1582 srcs = [
1583 "test/u8-rmax.cc",
1584 "test/rmax-microkernel-tester.h",
1585 ] + MICROKERNEL_TEST_HDRS,
1586 deps = MICROKERNEL_TEST_DEPS,
1587)
1588
1589xnnpack_unit_test(
1590 name = "x32_packx_test",
1591 srcs = [
1592 "test/x32-packx.cc",
1593 "test/pack-microkernel-tester.h",
1594 "src/xnnpack/AlignedAllocator.h",
1595 ] + MICROKERNEL_TEST_HDRS,
1596 deps = MICROKERNEL_TEST_DEPS,
1597)
1598
1599xnnpack_unit_test(
1600 name = "x32_pad_test",
1601 srcs = [
1602 "test/x32-pad.cc",
1603 "test/pad-microkernel-tester.h",
1604 ] + MICROKERNEL_TEST_HDRS,
1605 deps = MICROKERNEL_TEST_DEPS,
1606)
1607
1608xnnpack_unit_test(
1609 name = "x32_unpool_test",
1610 srcs = [
1611 "test/x32-unpool.cc",
1612 "test/unpool-microkernel-tester.h",
1613 ] + MICROKERNEL_TEST_HDRS,
1614 deps = MICROKERNEL_TEST_DEPS,
1615)
1616
1617xnnpack_unit_test(
1618 name = "x32_zip_test",
1619 srcs = [
1620 "test/x32-zip.cc",
1621 "test/zip-microkernel-tester.h",
1622 ] + MICROKERNEL_TEST_HDRS,
1623 deps = MICROKERNEL_TEST_DEPS,
1624)
1625
1626xnnpack_unit_test(
1627 name = "x8_lut_test",
1628 srcs = [
1629 "test/x8-lut.cc",
1630 "test/lut-microkernel-tester.h",
1631 ] + MICROKERNEL_TEST_HDRS,
1632 deps = MICROKERNEL_TEST_DEPS,
1633)
1634
1635xnnpack_unit_test(
1636 name = "x8_zip_test",
1637 srcs = [
1638 "test/x8-zip.cc",
1639 "test/zip-microkernel-tester.h",
1640 ] + MICROKERNEL_TEST_HDRS,
1641 deps = MICROKERNEL_TEST_DEPS,
1642)
1643
1644########################### Size test for the library ##########################
1645
1646xnnpack_binary(
1647 name = "size_test",
1648 srcs = ["test/size.c"],
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001649 deps = [":xnnpack_operators_nhwc_f32"],
Marat Dukhan08c4a432019-10-03 09:29:21 -07001650)
1651
1652########################### Unit tests for operators ###########################
1653
1654xnnpack_unit_test(
1655 name = "add_test",
1656 srcs = [
1657 "test/add.cc",
1658 "test/add-operator-tester.h",
1659 ],
1660 deps = OPERATOR_TEST_DEPS,
1661)
1662
1663xnnpack_unit_test(
1664 name = "argmax_pooling_test",
1665 srcs = [
1666 "test/argmax-pooling.cc",
1667 "test/argmax-pooling-operator-tester.h",
1668 ] + OPERATOR_TEST_PARAMS_HDRS,
1669 deps = OPERATOR_TEST_DEPS,
1670)
1671
1672xnnpack_unit_test(
1673 name = "average_pooling_test",
1674 srcs = [
1675 "test/average-pooling.cc",
1676 "test/average-pooling-operator-tester.h",
1677 ] + OPERATOR_TEST_PARAMS_HDRS,
1678 deps = OPERATOR_TEST_DEPS,
1679)
1680
1681xnnpack_unit_test(
1682 name = "channel_pad_test",
1683 srcs = [
1684 "test/channel-pad.cc",
1685 "test/channel-pad-operator-tester.h",
1686 ] + OPERATOR_TEST_PARAMS_HDRS,
1687 deps = OPERATOR_TEST_DEPS,
1688)
1689
1690xnnpack_unit_test(
1691 name = "channel_shuffle_test",
1692 srcs = [
1693 "test/channel-shuffle.cc",
1694 "test/channel-shuffle-operator-tester.h",
1695 ],
1696 deps = OPERATOR_TEST_DEPS,
1697)
1698
1699xnnpack_unit_test(
1700 name = "clamp_test",
1701 srcs = [
1702 "test/clamp.cc",
1703 "test/clamp-operator-tester.h",
1704 ],
1705 deps = OPERATOR_TEST_DEPS,
1706)
1707
1708xnnpack_unit_test(
1709 name = "convolution_test",
1710 srcs = [
1711 "test/convolution.cc",
1712 "test/convolution-operator-tester.h",
1713 ],
1714 deps = OPERATOR_TEST_DEPS,
1715)
1716
1717xnnpack_unit_test(
1718 name = "convolution_spnchw_test",
1719 srcs = [
1720 "test/convolution-spnchw.cc",
1721 "test/convolution-spnchw-operator-tester.h",
1722 ],
1723 deps = OPERATOR_TEST_DEPS,
1724)
1725
1726xnnpack_unit_test(
1727 name = "deconvolution_test",
1728 srcs = [
1729 "test/deconvolution.cc",
1730 "test/deconvolution-operator-tester.h",
1731 ] + OPERATOR_TEST_PARAMS_HDRS,
1732 deps = OPERATOR_TEST_DEPS,
1733)
1734
1735xnnpack_unit_test(
1736 name = "fully_connected_test",
1737 srcs = [
1738 "test/fully-connected.cc",
1739 "test/fully-connected-operator-tester.h",
1740 ],
1741 deps = OPERATOR_TEST_DEPS,
1742)
1743
1744xnnpack_unit_test(
1745 name = "global_average_pooling_test",
1746 srcs = [
1747 "test/global-average-pooling.cc",
1748 "test/global-average-pooling-operator-tester.h",
1749 ] + OPERATOR_TEST_PARAMS_HDRS,
1750 deps = OPERATOR_TEST_DEPS,
1751)
1752
1753xnnpack_unit_test(
1754 name = "global_average_pooling_spnchw_test",
1755 srcs = [
1756 "test/global-average-pooling-spnchw.cc",
1757 "test/global-average-pooling-spnchw-operator-tester.h",
1758 ],
1759 deps = OPERATOR_TEST_DEPS,
1760)
1761
1762xnnpack_unit_test(
1763 name = "hardswish_test",
1764 srcs = [
1765 "test/hardswish.cc",
1766 "test/hardswish-operator-tester.h",
1767 ],
1768 deps = OPERATOR_TEST_DEPS,
1769)
1770
1771xnnpack_unit_test(
1772 name = "leaky_relu_test",
1773 srcs = [
1774 "test/leaky-relu.cc",
1775 "test/leaky-relu-operator-tester.h",
1776 ],
1777 deps = OPERATOR_TEST_DEPS,
1778)
1779
1780xnnpack_unit_test(
1781 name = "max_pooling_test",
1782 srcs = [
1783 "test/max-pooling.cc",
1784 "test/max-pooling-operator-tester.h",
1785 ] + OPERATOR_TEST_PARAMS_HDRS,
1786 deps = OPERATOR_TEST_DEPS,
1787)
1788
1789xnnpack_unit_test(
1790 name = "prelu_test",
1791 srcs = [
1792 "test/prelu.cc",
1793 "test/prelu-operator-tester.h",
1794 ] + OPERATOR_TEST_PARAMS_HDRS,
1795 deps = OPERATOR_TEST_DEPS,
1796)
1797
1798xnnpack_unit_test(
1799 name = "sigmoid_test",
1800 srcs = [
1801 "test/sigmoid.cc",
1802 "test/sigmoid-operator-tester.h",
1803 ],
1804 deps = OPERATOR_TEST_DEPS,
1805)
1806
1807xnnpack_unit_test(
1808 name = "softargmax_test",
1809 srcs = [
1810 "test/softargmax.cc",
1811 "test/softargmax-operator-tester.h",
1812 ],
1813 deps = OPERATOR_TEST_DEPS,
1814)
1815
1816xnnpack_unit_test(
1817 name = "unpooling_test",
1818 srcs = [
1819 "test/unpooling.cc",
1820 "test/unpooling-operator-tester.h",
1821 ],
1822 deps = OPERATOR_TEST_DEPS,
1823)
1824
1825############################# Build configurations #############################
1826
Marat Dukhanb8642352019-10-30 15:43:02 -07001827# Enables usage of assembly kernels.
Marat Dukhan08c4a432019-10-03 09:29:21 -07001828config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07001829 name = "xnn_enable_assembly_explicit_true",
1830 define_values = {"xnn_enable_assembly": "true"},
1831)
1832
1833# Disables usage of assembly kernels.
1834config_setting(
1835 name = "xnn_enable_assembly_explicit_false",
1836 define_values = {"xnn_enable_assembly": "false"},
1837)
1838
1839# Builds with -c dbg
1840config_setting(
1841 name = "debug_build",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001842 values = {
Marat Dukhanb8642352019-10-30 15:43:02 -07001843 "compilation_mode": "dbg",
1844 },
1845)
1846
1847# Builds with -c opt
1848config_setting(
1849 name = "optimized_build",
1850 values = {
1851 "compilation_mode": "opt",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001852 },
1853)
1854
1855config_setting(
Marat Dukhanb8642352019-10-30 15:43:02 -07001856 name = "linux_k8",
1857 values = {"cpu": "k8"},
1858)
1859
1860config_setting(
Marat Dukhan4e45e662019-10-03 15:40:24 -07001861 name = "linux_aarch64",
Marat Dukhanb8642352019-10-30 15:43:02 -07001862 values = {"cpu": "aarch64"},
Marat Dukhan4e45e662019-10-03 15:40:24 -07001863)
1864
1865config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001866 name = "android",
1867 values = {"crosstool_top": "//external:android/crosstool"},
1868)
1869
1870config_setting(
1871 name = "android_armv7",
1872 values = {
1873 "crosstool_top": "//external:android/crosstool",
1874 "cpu": "armeabi-v7a",
1875 },
1876)
1877
1878config_setting(
1879 name = "android_arm64",
1880 values = {
1881 "crosstool_top": "//external:android/crosstool",
1882 "cpu": "arm64-v8a",
1883 },
1884)
1885
1886config_setting(
1887 name = "android_x86",
1888 values = {
1889 "crosstool_top": "//external:android/crosstool",
1890 "cpu": "x86",
1891 },
1892)
1893
1894config_setting(
1895 name = "android_x86_64",
1896 values = {
1897 "crosstool_top": "//external:android/crosstool",
1898 "cpu": "x86_64",
1899 },
1900)
1901
1902config_setting(
Marat Dukhan885ca242019-10-07 09:17:32 -07001903 name = "macos_x86_64",
1904 values = {
1905 "apple_platform_type": "macos",
1906 "cpu": "darwin",
1907 },
1908)
1909
1910config_setting(
Marat Dukhan08c4a432019-10-03 09:29:21 -07001911 name = "emscripten",
Marat Dukhan1a729ec2019-10-07 09:31:44 -07001912 values = {"crosstool_top": "//toolchain:emscripten"},
Marat Dukhan08c4a432019-10-03 09:29:21 -07001913)
1914
1915config_setting(
1916 name = "emscripten_wasm",
1917 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07001918 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001919 "cpu": "wasm",
1920 },
1921)
1922
1923config_setting(
1924 name = "emscripten_wasmsimd",
1925 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07001926 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001927 "cpu": "wasm",
Marat Dukhan8c19e3c2019-10-30 12:14:58 -07001928 "features": "wasm_simd",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001929 },
1930)
1931
1932config_setting(
1933 name = "emscripten_asmjs",
1934 values = {
Marat Dukhan1a729ec2019-10-07 09:31:44 -07001935 "crosstool_top": "//toolchain:emscripten",
Marat Dukhan08c4a432019-10-03 09:29:21 -07001936 "cpu": "asmjs",
1937 },
1938)