blob: da4ff1ed2a7ebd9a76faa8baa43f44e85be19815 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
22#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/conv.h>
25#include <xnnpack/dwconv.h>
26#include <xnnpack/gavgpool.h>
27#include <xnnpack/gemm.h>
28#include <xnnpack/hswish.h>
29#include <xnnpack/igemm.h>
30#include <xnnpack/log.h>
31#include <xnnpack/lut.h>
32#include <xnnpack/maxpool.h>
33#include <xnnpack/pad.h>
34#include <xnnpack/params.h>
35#include <xnnpack/pavgpool.h>
36#include <xnnpack/prelu.h>
37#include <xnnpack/rmax.h>
38#include <xnnpack/spmm.h>
39#include <xnnpack/unpool.h>
40#include <xnnpack/vadd.h>
41#include <xnnpack/vmulcaddc.h>
42#include <xnnpack/zip.h>
43
44#ifndef XNN_ENABLE_ASSEMBLY
45 #define XNN_ENABLE_ASSEMBLY 1
46#endif
47
48static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
49
50struct xnn_parameters xnn_params = {
51 .initialized = false
52};
53
Marat Dukhan1dadbf72019-10-01 10:46:20 -070054#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
56#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
59#endif
60
61static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 if (!cpuinfo_has_arm_neon()) {
64 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
65 return;
66 }
67
68 /**************************** Q8 micro-kernels ****************************/
69 xnn_params.q8.gemm = (struct gemm_parameters) {
70 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
71 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
72 .mr = 4,
73 .nr = 8,
74 };
75
76#if XNN_ENABLE_ASSEMBLY
77 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
78 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
79 .cr = 8,
80 .mr = 9,
81 };
82#else
83 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
84 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
85 .cr = 8,
86 .mr = 9,
87 };
88#endif
89 xnn_params.q8.avgpool = (struct avgpool_parameters) {
90 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
91 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
92 .mr = 9,
93 .qr = 8,
94 };
95 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
96 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
97 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
98 .mr = 7,
99 };
100 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
101
102 /**************************** U8 micro-kernels ****************************/
103 xnn_params.u8.maxpool = (struct maxpool_parameters) {
104 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
105 .mr = 9,
106 .qr = 8,
107 };
108 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
109 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
110 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
111
112 /**************************** X8 micro-kernels ****************************/
113 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
114 xnn_params.x8.zip = (struct zip_parameters) {
115 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
116 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
117 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
118 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
119 };
120
121 /**************************** F32 micro-kernels ****************************/
122 xnn_params.f32.gemm = (struct gemm_parameters) {
123 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
124 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
125 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
126 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
127 .mr = 4,
128 .nr = 8,
129 };
130 xnn_params.f32.gemm2 = (struct gemm_parameters) {
131 .gemm = NULL,
132 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
133 .mr = 4,
134 .nr = 2,
135 };
136 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
137 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
138 .cr = 4,
139 .mr = 4,
140 };
141 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
142 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
143 .cr = 4,
144 .mr = 9,
145 };
146 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
148 .cr = 4,
149 .mr = 25,
150 };
151 xnn_params.f32.avgpool = (struct avgpool_parameters) {
152 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
153 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
154 .mr = 9,
155 .qr = 8,
156 };
157 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
158 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
159 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
160 .mr = 9,
161 .qr = 8,
162 };
163 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
164 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
165 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
166 .mr = 7,
167 };
168 xnn_params.f32.maxpool = (struct maxpool_parameters) {
169 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
174 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
175 .mr = 4,
176 };
177 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
178 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
179 .mr = 9,
180 };
181 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
182 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
183 .mr = 9,
184 .qr = 8,
185 };
186 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
187 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
188 xnn_params.f32.prelu = (struct prelu_parameters) {
189 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
190 .mr = 4,
191 };
192 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
193 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
194 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
195 .cr = 4,
196 .mr = 2,
197 };
198
199 /**************************** X32 micro-kernels ****************************/
200 xnn_params.x32.pad = (struct pad_parameters) {
201 .ukernel = xnn_x32_pad_x2__neon,
202 .mr = 2,
203 };
204 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
205 xnn_params.x32.zip = (struct zip_parameters) {
206 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
207 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
208 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
209 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
210 };
211
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700212#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700213
214 /**************************** Q8 micro-kernels ****************************/
215 xnn_params.q8.gemm = (struct gemm_parameters) {
216 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
217 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
218 .mr = 8,
219 .nr = 8,
220 };
221 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
222 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
223 .cr = 8,
224 .mr = 9,
225 };
226 xnn_params.q8.avgpool = (struct avgpool_parameters) {
227 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
228 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
229 .mr = 9,
230 .qr = 8,
231 };
232 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
233 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
234 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
235 .mr = 7,
236 };
237 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
238
239 /**************************** U8 micro-kernels ****************************/
240 xnn_params.u8.maxpool = (struct maxpool_parameters) {
241 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
242 .mr = 9,
243 .qr = 8,
244 };
245 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
246 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
247 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
248
249 /**************************** X8 micro-kernels ****************************/
250 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
251 xnn_params.x8.zip = (struct zip_parameters) {
252 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
253 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
254 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
255 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
256 };
257
258 /**************************** F32 micro-kernels ****************************/
259#if XNN_ENABLE_ASSEMBLY
260 switch (cpuinfo_get_core(0)->uarch) {
261 case cpuinfo_uarch_kryo:
262 xnn_params.f32.gemm = (struct gemm_parameters) {
263 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
264 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
265 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
266 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
267 .mr = 4,
268 .nr = 8,
269 };
270 break;
271 case cpuinfo_uarch_cortex_a57:
272 xnn_params.f32.gemm = (struct gemm_parameters) {
273 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
274 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
275 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
276 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
277 .mr = 6,
278 .nr = 8,
279 };
280 break;
281 case cpuinfo_uarch_cortex_a72:
282 case cpuinfo_uarch_cortex_a76:
283 xnn_params.f32.gemm = (struct gemm_parameters) {
284 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
285 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
286 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
287 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
288 .mr = 4,
289 .nr = 8,
290 };
291 break;
292 case cpuinfo_uarch_cortex_a75:
Marat Dukhandf6985f2019-10-01 17:04:18 -0700293 case cpuinfo_uarch_mongoose_m1:
294 case cpuinfo_uarch_mongoose_m2:
295 case cpuinfo_uarch_meerkat_m3:
296 case (cpuinfo_uarch_meerkat_m3 + 1):
XNNPACK Teamb455b122019-09-27 18:10:33 -0700297 xnn_params.f32.gemm = (struct gemm_parameters) {
298 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
299 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
300 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
301 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
302 .mr = 6,
303 .nr = 8,
304 };
305 break;
306 case cpuinfo_uarch_cortex_a53:
307 case cpuinfo_uarch_cortex_a55:
308 xnn_params.f32.gemm = (struct gemm_parameters) {
309 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
310 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
311 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
312 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
313 .mr = 4,
314 .nr = 12,
315 };
316 break;
317 case cpuinfo_uarch_cortex_a73:
318 xnn_params.f32.gemm = (struct gemm_parameters) {
319 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
320 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
321 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
322 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
323 .mr = 6,
324 .nr = 8,
325 };
326 break;
327 default:
328 xnn_params.f32.gemm = (struct gemm_parameters) {
329 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
330 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
331 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
332 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
333 .mr = 4,
334 .nr = 8,
335 };
336 break;
337 }
338#else // XNN_ENABLE_ASSEMBLY
339 xnn_params.f32.gemm = (struct gemm_parameters) {
340 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
341 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
342 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
343 // TODO(b/140592595): xnn_f32_igemm_ukernel_1x8__neonfma_ld64
344 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
345 .mr = 4,
346 .nr = 8,
347 };
348#endif
349
350 xnn_params.f32.gemm2 = (struct gemm_parameters) {
351 .gemm = NULL,
352 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
353 .mr = 4,
354 .nr = 2,
355 };
356 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
357 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
358 .cr = 4,
359 .mr = 4,
360 };
361 switch (cpuinfo_get_core(0)->uarch) {
362 case cpuinfo_uarch_kryo:
363 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
364 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
365 .cr = 4,
366 .mr = 9,
367 };
368 break;
369#if XNN_ENABLE_ASSEMBLY
370 case cpuinfo_uarch_cortex_a53:
371 case cpuinfo_uarch_cortex_a55:
372 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
373 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
374 .cr = 4,
375 .mr = 9,
376 };
377 break;
378#endif
379 default:
380 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
381 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
382 .cr = 8,
383 .mr = 9,
384 };
385 break;
386 }
387 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
388 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
389 .cr = 4,
390 .mr = 25,
391 };
392 xnn_params.f32.avgpool = (struct avgpool_parameters) {
393 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
394 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
395 .mr = 9,
396 .qr = 8,
397 };
398 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
399 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
400 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
401 .mr = 9,
402 .qr = 8,
403 };
404 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
405 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
406 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
407 .mr = 7,
408 };
409 xnn_params.f32.maxpool = (struct maxpool_parameters) {
410 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
411 .mr = 9,
412 .qr = 8,
413 };
414 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
415 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
416 .mr = 4,
417 };
418 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
419 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
420 .mr = 9,
421 };
422 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
423 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
424 .mr = 9,
425 .qr = 8,
426 };
427 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
428 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
429 xnn_params.f32.prelu = (struct prelu_parameters) {
430 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
431 .mr = 4,
432 };
433 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
434 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
435 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
436 .cr = 4,
437 .mr = 2,
438 };
439 xnn_params.f32.spmm = (struct spmm_parameters) {
440 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma,
441 .mr = 16,
442 .nr = 1,
443 };
444 xnn_params.f32.spmm2 = (struct spmm_parameters) {
445 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
446 .mr = 16,
447 .nr = 2,
448 };
449 xnn_params.f32.spmm4 = (struct spmm_parameters) {
450 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
451 .mr = 16,
452 .nr = 4,
453 };
454 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
455 .ukernel_with_symm_padding =
456 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
457 .output_channel_tile = 4,
458 .output_height_tile = 2,
459 .output_width_tile = 2,
460 };
461 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
462 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
463 .input_width_tile = 4,
464 .output_width_tile = 4,
465 .output_height_tile = 3,
466 };
467 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
468 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
469 .input_width_tile = 4,
470 .output_width_tile = 4,
471 .output_height_tile = 1,
472 };
473 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
474 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
475 .channel_tile = 4,
476 };
477
478 /**************************** X32 micro-kernels ****************************/
479 xnn_params.x32.pad = (struct pad_parameters) {
480 .ukernel = xnn_x32_pad_x2__neon,
481 .mr = 2,
482 };
483 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
484 xnn_params.x32.zip = (struct zip_parameters) {
485 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
486 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
487 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
488 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
489 };
490
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700491#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700492 if (!cpuinfo_has_x86_sse2()) {
493 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
494 return;
495 }
496
497 /**************************** Q8 micro-kernels ****************************/
498 xnn_params.q8.gemm = (struct gemm_parameters) {
499 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
500 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
501 .mr = 4,
502 .nr = 4,
503 .log2_kr = 1,
504 };
505 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
506 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
507 .cr = 8,
508 .mr = 9,
509 };
510 xnn_params.q8.avgpool = (struct avgpool_parameters) {
511 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
512 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
513 .mr = 9,
514 .qr = 8,
515 };
516 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
517 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
518 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
519 .mr = 7,
520 };
521 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
522
523 /**************************** U8 micro-kernels ****************************/
524 xnn_params.u8.maxpool = (struct maxpool_parameters) {
525 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
526 .mr = 9,
527 .qr = 8,
528 };
529 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
530 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
531 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
532
533 /**************************** X8 micro-kernels ****************************/
534 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
535 xnn_params.x8.zip = (struct zip_parameters) {
536 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
537 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
538 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
539 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
540 };
541
542 /**************************** F32 micro-kernels ****************************/
543 xnn_params.f32.gemm = (struct gemm_parameters) {
544 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
545 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
546 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
547 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
548 .mr = 4,
549 .nr = 8,
550 };
551 xnn_params.f32.gemm2 = (struct gemm_parameters) {
552 .gemm = NULL,
553 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
554 .mr = 4,
555 .nr = 2,
556 .log2_kr = 2,
557 };
558 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
559 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
560 .cr = 4,
561 .mr = 4,
562 };
563 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
564 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
565 .cr = 4,
566 .mr = 9,
567 };
568 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
569 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
570 .cr = 4,
571 .mr = 25,
572 };
573 xnn_params.f32.avgpool = (struct avgpool_parameters) {
574 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
575 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
576 .mr = 9,
577 .qr = 8,
578 };
579 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
580 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
581 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
582 .mr = 9,
583 .qr = 8,
584 };
585 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
586 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
587 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
588 .mr = 7,
589 };
590 xnn_params.f32.maxpool = (struct maxpool_parameters) {
591 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
592 .mr = 9,
593 .qr = 8,
594 };
595 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
596 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
597 .mr = 4,
598 };
599 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
600 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
601 .mr = 9,
602 };
603 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
604 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
605 .mr = 9,
606 .qr = 8,
607 };
608 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
609 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
610 xnn_params.f32.prelu = (struct prelu_parameters) {
611 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
612 .mr = 4,
613 };
614 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
615 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
616 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
617 .cr = 4,
618 .mr = 2,
619 };
620 xnn_params.f32.spmm = (struct spmm_parameters) {
621 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
622 .mr = 4,
623 .nr = 1,
624 };
625 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
626 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
627 .input_width_tile = 4,
628 .output_width_tile = 4,
629 .output_height_tile = 1,
630 };
631 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
632 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
633 .input_width_tile = 4,
634 .output_width_tile = 4,
635 .output_height_tile = 1,
636 };
637 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
638 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
639 .channel_tile = 4,
640 };
641
642 /**************************** X32 micro-kernels ****************************/
643 xnn_params.x32.pad = (struct pad_parameters) {
644 .ukernel = xnn_x32_pad_x2__sse2,
645 .mr = 2,
646 };
647 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
648 xnn_params.x32.zip = (struct zip_parameters) {
649 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
650 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
651 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
652 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
653 };
654
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700655#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700656 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
657 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
658 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
659 // of two infinities (must produce NaN per IEEE 754 standard).
660 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
661 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
662
XNNPACK Teamb455b122019-09-27 18:10:33 -0700663 /**************************** Q8 micro-kernels ****************************/
664 xnn_params.q8.gemm = (struct gemm_parameters) {
665 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
666 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
667 .mr = 2,
668 .nr = 2,
669 };
670 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
671 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
672 .cr = 1,
673 .mr = 9,
674 };
675 xnn_params.q8.avgpool = (struct avgpool_parameters) {
676 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
677 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
678 .mr = 9,
679 .qr = 8,
680 };
681 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
682 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
683 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
684 .mr = 7,
685 };
686 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
687
688 /**************************** U8 micro-kernels ****************************/
689 xnn_params.u8.maxpool = (struct maxpool_parameters) {
690 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
691 .mr = 9,
692 .qr = 8,
693 };
694 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
695 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
696 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
697
698 /**************************** X8 micro-kernels ****************************/
699 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
700 xnn_params.x8.zip = (struct zip_parameters) {
701 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
702 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
703 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
704 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
705 };
706
707 /**************************** F32 micro-kernels ****************************/
Marat Dukhan466b5232019-10-09 11:22:20 -0700708 if (is_wasm_x86) {
709 xnn_params.f32.gemm = (struct gemm_parameters) {
710 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8s4__psimd,
711 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8s4__psimd,
712 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__psimd,
713 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
714 .mr = 4,
715 .nr = 8,
716 .log2_sr = 2,
717 };
718 } else {
719 xnn_params.f32.gemm = (struct gemm_parameters) {
720 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
721 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
722 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
723 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
724 .mr = 6,
725 .nr = 8,
726 .log2_sr = 2,
727 };
728 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700729 xnn_params.f32.gemm2 = (struct gemm_parameters) {
730 .gemm = NULL,
731 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
732 .mr = 4,
733 .nr = 2,
734 .log2_kr = 2,
735 };
736 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
737 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
738 .cr = 4,
739 .mr = 4,
740 };
741 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
742 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
743 .cr = 4,
744 .mr = 9,
745 };
746 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
747 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
748 .cr = 4,
749 .mr = 25,
750 };
751 xnn_params.f32.avgpool = (struct avgpool_parameters) {
752 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
753 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
754 .mr = 9,
755 .qr = 8,
756 };
757 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
758 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
759 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
760 .mr = 9,
761 .qr = 8,
762 };
763 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
764 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
765 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
766 .mr = 7,
767 };
768 xnn_params.f32.maxpool = (struct maxpool_parameters) {
769 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
770 .mr = 9,
771 .qr = 8,
772 };
773 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
774 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
775 .mr = 4,
776 };
777 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
778 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
779 .mr = 9,
780 };
781 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
782 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
783 .mr = 9,
784 .qr = 8,
785 };
786 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
787 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
788 xnn_params.f32.prelu = (struct prelu_parameters) {
789 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
790 .mr = 4,
791 };
792 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
793 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
794 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
795 .cr = 4,
796 .mr = 2,
797 };
798
799 /**************************** X32 micro-kernels ****************************/
800 xnn_params.x32.pad = (struct pad_parameters) {
801 .ukernel = xnn_x32_pad_x2__psimd,
802 .mr = 2,
803 };
804 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
805 xnn_params.x32.zip = (struct zip_parameters) {
806 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
807 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
808 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
809 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
810 };
811
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700812#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700813 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
814 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
815 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
816 // of two infinities (must produce NaN per IEEE 754 standard).
817 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
818 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
819
820 /**************************** Q8 micro-kernels ****************************/
821 xnn_params.q8.gemm = (struct gemm_parameters) {
822 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
823 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
824 .mr = 2,
825 .nr = 2,
826 };
827 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
828 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
829 .cr = 1,
830 .mr = 9,
831 };
832 xnn_params.q8.avgpool = (struct avgpool_parameters) {
833 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
834 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
835 .mr = 9,
836 .qr = 8,
837 };
838 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
839 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
840 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
841 .mr = 7,
842 };
843 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
844
845 /**************************** U8 micro-kernels ****************************/
846 xnn_params.u8.maxpool = (struct maxpool_parameters) {
847 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
848 .mr = 9,
849 .qr = 8,
850 };
851 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
852 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
853 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
854
855 /**************************** X8 micro-kernels ****************************/
856 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
857 xnn_params.x8.zip = (struct zip_parameters) {
858 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
859 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
860 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
861 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
862 };
863
864 /**************************** F32 micro-kernels ****************************/
865 if (is_wasm_x86) {
866 xnn_params.f32.gemm = (struct gemm_parameters) {
867 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
868 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
869 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
870 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
871 .mr = 2,
872 .nr = 4,
873 };
874 } else {
875 xnn_params.f32.gemm = (struct gemm_parameters) {
876 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
877 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
878 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
879 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
880 .mr = 4,
881 .nr = 4,
882 };
883 }
884 xnn_params.f32.gemm2 = (struct gemm_parameters) {
885 .gemm = NULL,
886 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
887 .mr = 4,
888 .nr = 2,
889 };
890 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
891 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
892 .cr = 1,
893 .mr = 4,
894 };
895 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
896 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
897 .cr = 1,
898 .mr = 9,
899 };
900 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
901 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
902 .cr = 1,
903 .mr = 25,
904 };
905 xnn_params.f32.avgpool = (struct avgpool_parameters) {
906 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
907 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
908 .mr = 9,
909 .qr = 8,
910 };
911 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
912 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
913 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
914 .mr = 9,
915 .qr = 8,
916 };
917 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
918 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
919 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
920 .mr = 7,
921 };
922 xnn_params.f32.maxpool = (struct maxpool_parameters) {
923 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
924 .mr = 9,
925 .qr = 8,
926 };
927 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
928 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
929 .mr = 4,
930 };
931 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
932 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
933 .mr = 9,
934 };
935 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
936 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
937 .mr = 9,
938 .qr = 8,
939 };
940 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
941 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
942 xnn_params.f32.prelu = (struct prelu_parameters) {
943 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
944 .mr = 4,
945 };
946 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
947 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
948 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
949 .cr = 1,
950 .mr = 2,
951 };
952 xnn_params.f32.spmm = (struct spmm_parameters) {
953 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
954 .mr = 4,
955 .nr = 1,
956 };
957
958 /**************************** X32 micro-kernels ****************************/
959 xnn_params.x32.pad = (struct pad_parameters) {
960 .ukernel = xnn_x32_pad_x2__scalar,
961 .mr = 2,
962 };
963 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
964 xnn_params.x32.zip = (struct zip_parameters) {
965 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
966 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
967 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
968 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
969 };
970
971#else
972 #error "Unsupported architecture"
973#endif
974 xnn_params.initialized = true;
975}
976
977enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -0700978 #ifndef __EMSCRIPTEN__
979 if (!cpuinfo_initialize()) {
980 return xnn_status_out_of_memory;
981 }
982 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700983 pthread_once(&init_guard, &init);
984 if (xnn_params.initialized) {
985 return xnn_status_success;
986 } else {
987 return xnn_status_unsupported_hardware;
988 }
989}
990
991enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -0700992 #ifndef __EMSCRIPTEN__
993 cpuinfo_deinitialize();
994 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700995 return xnn_status_success;
996}