blob: 1296eb6ad891190b6fc8aaddc929fae30130564e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
22#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/conv.h>
25#include <xnnpack/dwconv.h>
26#include <xnnpack/gavgpool.h>
27#include <xnnpack/gemm.h>
28#include <xnnpack/hswish.h>
29#include <xnnpack/igemm.h>
30#include <xnnpack/log.h>
31#include <xnnpack/lut.h>
32#include <xnnpack/maxpool.h>
33#include <xnnpack/pad.h>
34#include <xnnpack/params.h>
35#include <xnnpack/pavgpool.h>
36#include <xnnpack/prelu.h>
37#include <xnnpack/rmax.h>
38#include <xnnpack/spmm.h>
39#include <xnnpack/unpool.h>
40#include <xnnpack/vadd.h>
41#include <xnnpack/vmulcaddc.h>
42#include <xnnpack/zip.h>
43
44#ifndef XNN_ENABLE_ASSEMBLY
45 #define XNN_ENABLE_ASSEMBLY 1
46#endif
47
48static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
49
50struct xnn_parameters xnn_params = {
51 .initialized = false
52};
53
Marat Dukhan1dadbf72019-10-01 10:46:20 -070054#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
56#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
59#endif
60
61static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 if (!cpuinfo_has_arm_neon()) {
64 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
65 return;
66 }
67
68 /**************************** Q8 micro-kernels ****************************/
69 xnn_params.q8.gemm = (struct gemm_parameters) {
70 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
71 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
72 .mr = 4,
73 .nr = 8,
74 };
75
76#if XNN_ENABLE_ASSEMBLY
77 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
78 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
79 .cr = 8,
80 .mr = 9,
81 };
82#else
83 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
84 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
85 .cr = 8,
86 .mr = 9,
87 };
88#endif
89 xnn_params.q8.avgpool = (struct avgpool_parameters) {
90 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
91 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
92 .mr = 9,
93 .qr = 8,
94 };
95 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
96 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
97 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
98 .mr = 7,
99 };
100 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
101
102 /**************************** U8 micro-kernels ****************************/
103 xnn_params.u8.maxpool = (struct maxpool_parameters) {
104 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
105 .mr = 9,
106 .qr = 8,
107 };
108 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
109 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
110 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
111
112 /**************************** X8 micro-kernels ****************************/
113 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
114 xnn_params.x8.zip = (struct zip_parameters) {
115 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
116 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
117 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
118 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
119 };
120
121 /**************************** F32 micro-kernels ****************************/
122 xnn_params.f32.gemm = (struct gemm_parameters) {
123 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
124 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
125 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
126 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
127 .mr = 4,
128 .nr = 8,
129 };
130 xnn_params.f32.gemm2 = (struct gemm_parameters) {
131 .gemm = NULL,
132 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
133 .mr = 4,
134 .nr = 2,
135 };
136 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
137 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
138 .cr = 4,
139 .mr = 4,
140 };
141 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
142 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
143 .cr = 4,
144 .mr = 9,
145 };
146 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
148 .cr = 4,
149 .mr = 25,
150 };
151 xnn_params.f32.avgpool = (struct avgpool_parameters) {
152 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
153 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
154 .mr = 9,
155 .qr = 8,
156 };
157 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
158 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
159 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
160 .mr = 9,
161 .qr = 8,
162 };
163 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
164 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
165 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
166 .mr = 7,
167 };
168 xnn_params.f32.maxpool = (struct maxpool_parameters) {
169 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
174 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
175 .mr = 4,
176 };
177 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
178 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
179 .mr = 9,
180 };
181 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
182 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
183 .mr = 9,
184 .qr = 8,
185 };
186 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
187 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
188 xnn_params.f32.prelu = (struct prelu_parameters) {
189 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
190 .mr = 4,
191 };
192 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
193 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
194 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
195 .cr = 4,
196 .mr = 2,
197 };
198
199 /**************************** X32 micro-kernels ****************************/
200 xnn_params.x32.pad = (struct pad_parameters) {
201 .ukernel = xnn_x32_pad_x2__neon,
202 .mr = 2,
203 };
204 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
205 xnn_params.x32.zip = (struct zip_parameters) {
206 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
207 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
208 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
209 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
210 };
211
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700212#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700213
214 /**************************** Q8 micro-kernels ****************************/
215 xnn_params.q8.gemm = (struct gemm_parameters) {
216 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
217 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
218 .mr = 8,
219 .nr = 8,
220 };
221 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
222 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
223 .cr = 8,
224 .mr = 9,
225 };
226 xnn_params.q8.avgpool = (struct avgpool_parameters) {
227 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
228 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
229 .mr = 9,
230 .qr = 8,
231 };
232 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
233 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
234 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
235 .mr = 7,
236 };
237 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
238
239 /**************************** U8 micro-kernels ****************************/
240 xnn_params.u8.maxpool = (struct maxpool_parameters) {
241 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
242 .mr = 9,
243 .qr = 8,
244 };
245 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
246 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
247 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
248
249 /**************************** X8 micro-kernels ****************************/
250 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
251 xnn_params.x8.zip = (struct zip_parameters) {
252 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
253 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
254 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
255 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
256 };
257
258 /**************************** F32 micro-kernels ****************************/
259#if XNN_ENABLE_ASSEMBLY
260 switch (cpuinfo_get_core(0)->uarch) {
261 case cpuinfo_uarch_kryo:
262 xnn_params.f32.gemm = (struct gemm_parameters) {
263 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
264 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
265 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
266 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
267 .mr = 4,
268 .nr = 8,
269 };
270 break;
271 case cpuinfo_uarch_cortex_a57:
272 xnn_params.f32.gemm = (struct gemm_parameters) {
273 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
274 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
275 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
276 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
277 .mr = 6,
278 .nr = 8,
279 };
280 break;
281 case cpuinfo_uarch_cortex_a72:
282 case cpuinfo_uarch_cortex_a76:
283 xnn_params.f32.gemm = (struct gemm_parameters) {
284 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
285 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
286 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
287 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
288 .mr = 4,
289 .nr = 8,
290 };
291 break;
292 case cpuinfo_uarch_cortex_a75:
Marat Dukhandf6985f2019-10-01 17:04:18 -0700293 case cpuinfo_uarch_mongoose_m1:
294 case cpuinfo_uarch_mongoose_m2:
295 case cpuinfo_uarch_meerkat_m3:
296 case (cpuinfo_uarch_meerkat_m3 + 1):
XNNPACK Teamb455b122019-09-27 18:10:33 -0700297 xnn_params.f32.gemm = (struct gemm_parameters) {
298 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
299 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
300 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
301 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
302 .mr = 6,
303 .nr = 8,
304 };
305 break;
306 case cpuinfo_uarch_cortex_a53:
307 case cpuinfo_uarch_cortex_a55:
308 xnn_params.f32.gemm = (struct gemm_parameters) {
309 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
310 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
311 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
312 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
313 .mr = 4,
314 .nr = 12,
315 };
316 break;
317 case cpuinfo_uarch_cortex_a73:
318 xnn_params.f32.gemm = (struct gemm_parameters) {
319 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
320 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
321 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
322 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
323 .mr = 6,
324 .nr = 8,
325 };
326 break;
327 default:
328 xnn_params.f32.gemm = (struct gemm_parameters) {
329 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
330 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
331 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
332 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
333 .mr = 4,
334 .nr = 8,
335 };
336 break;
337 }
338#else // XNN_ENABLE_ASSEMBLY
339 xnn_params.f32.gemm = (struct gemm_parameters) {
340 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
341 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
342 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
343 // TODO(b/140592595): xnn_f32_igemm_ukernel_1x8__neonfma_ld64
344 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
345 .mr = 4,
346 .nr = 8,
347 };
348#endif
349
350 xnn_params.f32.gemm2 = (struct gemm_parameters) {
351 .gemm = NULL,
352 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
353 .mr = 4,
354 .nr = 2,
355 };
356 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
357 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
358 .cr = 4,
359 .mr = 4,
360 };
361 switch (cpuinfo_get_core(0)->uarch) {
362 case cpuinfo_uarch_kryo:
363 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
364 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
365 .cr = 4,
366 .mr = 9,
367 };
368 break;
369#if XNN_ENABLE_ASSEMBLY
370 case cpuinfo_uarch_cortex_a53:
371 case cpuinfo_uarch_cortex_a55:
372 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
373 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
374 .cr = 4,
375 .mr = 9,
376 };
377 break;
378#endif
379 default:
380 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
381 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
382 .cr = 8,
383 .mr = 9,
384 };
385 break;
386 }
387 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
388 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
389 .cr = 4,
390 .mr = 25,
391 };
392 xnn_params.f32.avgpool = (struct avgpool_parameters) {
393 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
394 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
395 .mr = 9,
396 .qr = 8,
397 };
398 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
399 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
400 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
401 .mr = 9,
402 .qr = 8,
403 };
404 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
405 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
406 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
407 .mr = 7,
408 };
409 xnn_params.f32.maxpool = (struct maxpool_parameters) {
410 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
411 .mr = 9,
412 .qr = 8,
413 };
414 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
415 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
416 .mr = 4,
417 };
418 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
419 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
420 .mr = 9,
421 };
422 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
423 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
424 .mr = 9,
425 .qr = 8,
426 };
427 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
428 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
429 xnn_params.f32.prelu = (struct prelu_parameters) {
430 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
431 .mr = 4,
432 };
433 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
434 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
435 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
436 .cr = 4,
437 .mr = 2,
438 };
439 xnn_params.f32.spmm = (struct spmm_parameters) {
440 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma,
441 .mr = 16,
442 .nr = 1,
443 };
444 xnn_params.f32.spmm2 = (struct spmm_parameters) {
445 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
446 .mr = 16,
447 .nr = 2,
448 };
449 xnn_params.f32.spmm4 = (struct spmm_parameters) {
450 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
451 .mr = 16,
452 .nr = 4,
453 };
454 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
455 .ukernel_with_symm_padding =
456 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
457 .output_channel_tile = 4,
458 .output_height_tile = 2,
459 .output_width_tile = 2,
460 };
461 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
462 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
463 .input_width_tile = 4,
464 .output_width_tile = 4,
465 .output_height_tile = 3,
466 };
467 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
468 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
469 .input_width_tile = 4,
470 .output_width_tile = 4,
471 .output_height_tile = 1,
472 };
473 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
474 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
475 .channel_tile = 4,
476 };
477
478 /**************************** X32 micro-kernels ****************************/
479 xnn_params.x32.pad = (struct pad_parameters) {
480 .ukernel = xnn_x32_pad_x2__neon,
481 .mr = 2,
482 };
483 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
484 xnn_params.x32.zip = (struct zip_parameters) {
485 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
486 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
487 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
488 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
489 };
490
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700491#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700492 if (!cpuinfo_has_x86_sse2()) {
493 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
494 return;
495 }
496
497 /**************************** Q8 micro-kernels ****************************/
498 xnn_params.q8.gemm = (struct gemm_parameters) {
499 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
500 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
501 .mr = 4,
502 .nr = 4,
503 .log2_kr = 1,
504 };
505 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
506 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
507 .cr = 8,
508 .mr = 9,
509 };
510 xnn_params.q8.avgpool = (struct avgpool_parameters) {
511 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
512 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
513 .mr = 9,
514 .qr = 8,
515 };
516 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
517 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
518 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
519 .mr = 7,
520 };
521 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
522
523 /**************************** U8 micro-kernels ****************************/
524 xnn_params.u8.maxpool = (struct maxpool_parameters) {
525 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
526 .mr = 9,
527 .qr = 8,
528 };
529 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
530 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
531 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
532
533 /**************************** X8 micro-kernels ****************************/
534 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
535 xnn_params.x8.zip = (struct zip_parameters) {
536 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
537 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
538 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
539 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
540 };
541
542 /**************************** F32 micro-kernels ****************************/
543 xnn_params.f32.gemm = (struct gemm_parameters) {
544 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
545 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
546 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
547 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
548 .mr = 4,
549 .nr = 8,
550 };
551 xnn_params.f32.gemm2 = (struct gemm_parameters) {
552 .gemm = NULL,
553 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
554 .mr = 4,
555 .nr = 2,
556 .log2_kr = 2,
557 };
558 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
559 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
560 .cr = 4,
561 .mr = 4,
562 };
563 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
564 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
565 .cr = 4,
566 .mr = 9,
567 };
568 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
569 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
570 .cr = 4,
571 .mr = 25,
572 };
573 xnn_params.f32.avgpool = (struct avgpool_parameters) {
574 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
575 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
576 .mr = 9,
577 .qr = 8,
578 };
579 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
580 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
581 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
582 .mr = 9,
583 .qr = 8,
584 };
585 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
586 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
587 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
588 .mr = 7,
589 };
590 xnn_params.f32.maxpool = (struct maxpool_parameters) {
591 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
592 .mr = 9,
593 .qr = 8,
594 };
595 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
596 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
597 .mr = 4,
598 };
599 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
600 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
601 .mr = 9,
602 };
603 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
604 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
605 .mr = 9,
606 .qr = 8,
607 };
608 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
609 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
610 xnn_params.f32.prelu = (struct prelu_parameters) {
611 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
612 .mr = 4,
613 };
614 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
615 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
616 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
617 .cr = 4,
618 .mr = 2,
619 };
620 xnn_params.f32.spmm = (struct spmm_parameters) {
621 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
622 .mr = 4,
623 .nr = 1,
624 };
625 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
626 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
627 .input_width_tile = 4,
628 .output_width_tile = 4,
629 .output_height_tile = 1,
630 };
631 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
632 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
633 .input_width_tile = 4,
634 .output_width_tile = 4,
635 .output_height_tile = 1,
636 };
637 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
638 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
639 .channel_tile = 4,
640 };
641
642 /**************************** X32 micro-kernels ****************************/
643 xnn_params.x32.pad = (struct pad_parameters) {
644 .ukernel = xnn_x32_pad_x2__sse2,
645 .mr = 2,
646 };
647 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
648 xnn_params.x32.zip = (struct zip_parameters) {
649 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
650 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
651 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
652 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
653 };
654
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700655#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -0700656 /**************************** Q8 micro-kernels ****************************/
657 xnn_params.q8.gemm = (struct gemm_parameters) {
658 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
659 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
660 .mr = 2,
661 .nr = 2,
662 };
663 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
664 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
665 .cr = 1,
666 .mr = 9,
667 };
668 xnn_params.q8.avgpool = (struct avgpool_parameters) {
669 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
670 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
671 .mr = 9,
672 .qr = 8,
673 };
674 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
675 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
676 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
677 .mr = 7,
678 };
679 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
680
681 /**************************** U8 micro-kernels ****************************/
682 xnn_params.u8.maxpool = (struct maxpool_parameters) {
683 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
684 .mr = 9,
685 .qr = 8,
686 };
687 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
688 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
689 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
690
691 /**************************** X8 micro-kernels ****************************/
692 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
693 xnn_params.x8.zip = (struct zip_parameters) {
694 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
695 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
696 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
697 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
698 };
699
700 /**************************** F32 micro-kernels ****************************/
701 xnn_params.f32.gemm = (struct gemm_parameters) {
702 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
703 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
704 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_loadsplat,
705 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_loadsplat,
706 .mr = 4,
707 .nr = 8,
708 };
709 xnn_params.f32.gemm2 = (struct gemm_parameters) {
710 .gemm = NULL,
711 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
712 .mr = 4,
713 .nr = 2,
714 .log2_kr = 2,
715 };
716 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
717 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
718 .cr = 4,
719 .mr = 4,
720 };
721 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
722 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
723 .cr = 4,
724 .mr = 9,
725 };
726 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
727 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
728 .cr = 4,
729 .mr = 25,
730 };
731 xnn_params.f32.avgpool = (struct avgpool_parameters) {
732 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
733 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
734 .mr = 9,
735 .qr = 8,
736 };
737 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
738 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
739 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
740 .mr = 9,
741 .qr = 8,
742 };
743 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
744 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
745 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
746 .mr = 7,
747 };
748 xnn_params.f32.maxpool = (struct maxpool_parameters) {
749 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
750 .mr = 9,
751 .qr = 8,
752 };
753 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
754 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
755 .mr = 4,
756 };
757 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
758 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
759 .mr = 9,
760 };
761 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
762 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
763 .mr = 9,
764 .qr = 8,
765 };
766 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
767 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
768 xnn_params.f32.prelu = (struct prelu_parameters) {
769 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
770 .mr = 4,
771 };
772 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
773 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
774 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
775 .cr = 4,
776 .mr = 2,
777 };
778
779 /**************************** X32 micro-kernels ****************************/
780 xnn_params.x32.pad = (struct pad_parameters) {
781 .ukernel = xnn_x32_pad_x2__psimd,
782 .mr = 2,
783 };
784 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
785 xnn_params.x32.zip = (struct zip_parameters) {
786 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
787 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
788 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
789 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
790 };
791
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700792#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700793 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
794 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
795 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
796 // of two infinities (must produce NaN per IEEE 754 standard).
797 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
798 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
799
800 /**************************** Q8 micro-kernels ****************************/
801 xnn_params.q8.gemm = (struct gemm_parameters) {
802 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
803 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
804 .mr = 2,
805 .nr = 2,
806 };
807 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
808 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
809 .cr = 1,
810 .mr = 9,
811 };
812 xnn_params.q8.avgpool = (struct avgpool_parameters) {
813 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
814 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
815 .mr = 9,
816 .qr = 8,
817 };
818 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
819 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
820 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
821 .mr = 7,
822 };
823 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
824
825 /**************************** U8 micro-kernels ****************************/
826 xnn_params.u8.maxpool = (struct maxpool_parameters) {
827 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
828 .mr = 9,
829 .qr = 8,
830 };
831 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
832 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
833 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
834
835 /**************************** X8 micro-kernels ****************************/
836 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
837 xnn_params.x8.zip = (struct zip_parameters) {
838 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
839 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
840 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
841 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
842 };
843
844 /**************************** F32 micro-kernels ****************************/
845 if (is_wasm_x86) {
846 xnn_params.f32.gemm = (struct gemm_parameters) {
847 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
848 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
849 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
850 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
851 .mr = 2,
852 .nr = 4,
853 };
854 } else {
855 xnn_params.f32.gemm = (struct gemm_parameters) {
856 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
857 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
858 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
859 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
860 .mr = 4,
861 .nr = 4,
862 };
863 }
864 xnn_params.f32.gemm2 = (struct gemm_parameters) {
865 .gemm = NULL,
866 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
867 .mr = 4,
868 .nr = 2,
869 };
870 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
871 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
872 .cr = 1,
873 .mr = 4,
874 };
875 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
876 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
877 .cr = 1,
878 .mr = 9,
879 };
880 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
881 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
882 .cr = 1,
883 .mr = 25,
884 };
885 xnn_params.f32.avgpool = (struct avgpool_parameters) {
886 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
887 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
888 .mr = 9,
889 .qr = 8,
890 };
891 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
892 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
893 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
894 .mr = 9,
895 .qr = 8,
896 };
897 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
898 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
899 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
900 .mr = 7,
901 };
902 xnn_params.f32.maxpool = (struct maxpool_parameters) {
903 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
904 .mr = 9,
905 .qr = 8,
906 };
907 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
908 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
909 .mr = 4,
910 };
911 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
912 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
913 .mr = 9,
914 };
915 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
916 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
917 .mr = 9,
918 .qr = 8,
919 };
920 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
921 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
922 xnn_params.f32.prelu = (struct prelu_parameters) {
923 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
924 .mr = 4,
925 };
926 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
927 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
928 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
929 .cr = 1,
930 .mr = 2,
931 };
932 xnn_params.f32.spmm = (struct spmm_parameters) {
933 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
934 .mr = 4,
935 .nr = 1,
936 };
937
938 /**************************** X32 micro-kernels ****************************/
939 xnn_params.x32.pad = (struct pad_parameters) {
940 .ukernel = xnn_x32_pad_x2__scalar,
941 .mr = 2,
942 };
943 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
944 xnn_params.x32.zip = (struct zip_parameters) {
945 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
946 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
947 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
948 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
949 };
950
951#else
952 #error "Unsupported architecture"
953#endif
954 xnn_params.initialized = true;
955}
956
957enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -0700958 #ifndef __EMSCRIPTEN__
959 if (!cpuinfo_initialize()) {
960 return xnn_status_out_of_memory;
961 }
962 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700963 pthread_once(&init_guard, &init);
964 if (xnn_params.initialized) {
965 return xnn_status_success;
966 } else {
967 return xnn_status_unsupported_hardware;
968 }
969}
970
971enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -0700972 #ifndef __EMSCRIPTEN__
973 cpuinfo_deinitialize();
974 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700975 return xnn_status_success;
976}