blob: fec29dc45a59e3dcacb2bbcc06edfcc9d8b15ce1 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
15#include <cpuinfo.h>
16
17#include <xnnpack.h>
18#include <xnnpack/argmaxpool.h>
19#include <xnnpack/avgpool.h>
20#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070021#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include <xnnpack/conv.h>
23#include <xnnpack/dwconv.h>
24#include <xnnpack/gavgpool.h>
25#include <xnnpack/gemm.h>
26#include <xnnpack/hswish.h>
27#include <xnnpack/igemm.h>
28#include <xnnpack/log.h>
29#include <xnnpack/lut.h>
30#include <xnnpack/maxpool.h>
31#include <xnnpack/pad.h>
32#include <xnnpack/params.h>
33#include <xnnpack/pavgpool.h>
34#include <xnnpack/prelu.h>
35#include <xnnpack/rmax.h>
36#include <xnnpack/spmm.h>
37#include <xnnpack/unpool.h>
38#include <xnnpack/vadd.h>
39#include <xnnpack/vmulcaddc.h>
40#include <xnnpack/zip.h>
41
42#ifndef XNN_ENABLE_ASSEMBLY
43 #define XNN_ENABLE_ASSEMBLY 1
44#endif
45
46static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
47
48struct xnn_parameters xnn_params = {
49 .initialized = false
50};
51
Marat Dukhan1dadbf72019-10-01 10:46:20 -070052#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070053 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
54#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070055#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
57#endif
58
59static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070060#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070061 if (!cpuinfo_has_arm_neon()) {
62 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
63 return;
64 }
65
66 /**************************** Q8 micro-kernels ****************************/
67 xnn_params.q8.gemm = (struct gemm_parameters) {
68 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
69 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
70 .mr = 4,
71 .nr = 8,
72 };
73
74#if XNN_ENABLE_ASSEMBLY
75 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
76 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
77 .cr = 8,
78 .mr = 9,
79 };
80#else
81 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
82 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
83 .cr = 8,
84 .mr = 9,
85 };
86#endif
87 xnn_params.q8.avgpool = (struct avgpool_parameters) {
88 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
89 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
90 .mr = 9,
91 .qr = 8,
92 };
93 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
94 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
95 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
96 .mr = 7,
97 };
98 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
99
100 /**************************** U8 micro-kernels ****************************/
101 xnn_params.u8.maxpool = (struct maxpool_parameters) {
102 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
103 .mr = 9,
104 .qr = 8,
105 };
106 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
107 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
108 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
109
110 /**************************** X8 micro-kernels ****************************/
111 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
112 xnn_params.x8.zip = (struct zip_parameters) {
113 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
114 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
115 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
116 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
117 };
118
119 /**************************** F32 micro-kernels ****************************/
120 xnn_params.f32.gemm = (struct gemm_parameters) {
121 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
122 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
123 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
124 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
125 .mr = 4,
126 .nr = 8,
127 };
128 xnn_params.f32.gemm2 = (struct gemm_parameters) {
129 .gemm = NULL,
130 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
131 .mr = 4,
132 .nr = 2,
133 };
134 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
135 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
136 .cr = 4,
137 .mr = 4,
138 };
139 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
140 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
141 .cr = 4,
142 .mr = 9,
143 };
144 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
145 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
146 .cr = 4,
147 .mr = 25,
148 };
149 xnn_params.f32.avgpool = (struct avgpool_parameters) {
150 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
151 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
152 .mr = 9,
153 .qr = 8,
154 };
155 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
156 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
157 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
158 .mr = 9,
159 .qr = 8,
160 };
161 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
162 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
163 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
164 .mr = 7,
165 };
166 xnn_params.f32.maxpool = (struct maxpool_parameters) {
167 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
168 .mr = 9,
169 .qr = 8,
170 };
171 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
172 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
173 .mr = 4,
174 };
175 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
176 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
177 .mr = 9,
178 };
179 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
180 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
181 .mr = 9,
182 .qr = 8,
183 };
184 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
185 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
186 xnn_params.f32.prelu = (struct prelu_parameters) {
187 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
188 .mr = 4,
189 };
190 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
191 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
192 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
193 .cr = 4,
194 .mr = 2,
195 };
196
197 /**************************** X32 micro-kernels ****************************/
198 xnn_params.x32.pad = (struct pad_parameters) {
199 .ukernel = xnn_x32_pad_x2__neon,
200 .mr = 2,
201 };
202 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
203 xnn_params.x32.zip = (struct zip_parameters) {
204 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
205 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
206 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
207 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
208 };
209
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700210#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700211
212 /**************************** Q8 micro-kernels ****************************/
213 xnn_params.q8.gemm = (struct gemm_parameters) {
214 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
215 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
216 .mr = 8,
217 .nr = 8,
218 };
219 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
220 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
221 .cr = 8,
222 .mr = 9,
223 };
224 xnn_params.q8.avgpool = (struct avgpool_parameters) {
225 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
226 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
227 .mr = 9,
228 .qr = 8,
229 };
230 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
231 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
232 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
233 .mr = 7,
234 };
235 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
236
237 /**************************** U8 micro-kernels ****************************/
238 xnn_params.u8.maxpool = (struct maxpool_parameters) {
239 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
240 .mr = 9,
241 .qr = 8,
242 };
243 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
244 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
245 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
246
247 /**************************** X8 micro-kernels ****************************/
248 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
249 xnn_params.x8.zip = (struct zip_parameters) {
250 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
251 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
252 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
253 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
254 };
255
256 /**************************** F32 micro-kernels ****************************/
257#if XNN_ENABLE_ASSEMBLY
258 switch (cpuinfo_get_core(0)->uarch) {
259 case cpuinfo_uarch_kryo:
260 xnn_params.f32.gemm = (struct gemm_parameters) {
261 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
262 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
263 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
264 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
265 .mr = 4,
266 .nr = 8,
267 };
268 break;
269 case cpuinfo_uarch_cortex_a57:
270 xnn_params.f32.gemm = (struct gemm_parameters) {
271 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
272 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
273 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
274 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
275 .mr = 6,
276 .nr = 8,
277 };
278 break;
279 case cpuinfo_uarch_cortex_a72:
280 case cpuinfo_uarch_cortex_a76:
281 xnn_params.f32.gemm = (struct gemm_parameters) {
282 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
283 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
284 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
285 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
286 .mr = 4,
287 .nr = 8,
288 };
289 break;
290 case cpuinfo_uarch_cortex_a75:
Marat Dukhandf6985f2019-10-01 17:04:18 -0700291 case cpuinfo_uarch_mongoose_m1:
292 case cpuinfo_uarch_mongoose_m2:
293 case cpuinfo_uarch_meerkat_m3:
294 case (cpuinfo_uarch_meerkat_m3 + 1):
XNNPACK Teamb455b122019-09-27 18:10:33 -0700295 xnn_params.f32.gemm = (struct gemm_parameters) {
296 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
297 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
298 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
299 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
300 .mr = 6,
301 .nr = 8,
302 };
303 break;
304 case cpuinfo_uarch_cortex_a53:
305 case cpuinfo_uarch_cortex_a55:
306 xnn_params.f32.gemm = (struct gemm_parameters) {
307 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
308 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
309 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
310 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
311 .mr = 4,
312 .nr = 12,
313 };
314 break;
315 case cpuinfo_uarch_cortex_a73:
316 xnn_params.f32.gemm = (struct gemm_parameters) {
317 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
318 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
319 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
320 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
321 .mr = 6,
322 .nr = 8,
323 };
324 break;
325 default:
326 xnn_params.f32.gemm = (struct gemm_parameters) {
327 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
328 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
329 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
330 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
331 .mr = 4,
332 .nr = 8,
333 };
334 break;
335 }
336#else // XNN_ENABLE_ASSEMBLY
337 xnn_params.f32.gemm = (struct gemm_parameters) {
338 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
339 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
340 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
341 // TODO(b/140592595): xnn_f32_igemm_ukernel_1x8__neonfma_ld64
342 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
343 .mr = 4,
344 .nr = 8,
345 };
346#endif
347
348 xnn_params.f32.gemm2 = (struct gemm_parameters) {
349 .gemm = NULL,
350 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
351 .mr = 4,
352 .nr = 2,
353 };
354 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
355 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
356 .cr = 4,
357 .mr = 4,
358 };
359 switch (cpuinfo_get_core(0)->uarch) {
360 case cpuinfo_uarch_kryo:
361 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
362 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
363 .cr = 4,
364 .mr = 9,
365 };
366 break;
367#if XNN_ENABLE_ASSEMBLY
368 case cpuinfo_uarch_cortex_a53:
369 case cpuinfo_uarch_cortex_a55:
370 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
371 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
372 .cr = 4,
373 .mr = 9,
374 };
375 break;
376#endif
377 default:
378 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
379 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
380 .cr = 8,
381 .mr = 9,
382 };
383 break;
384 }
385 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
386 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
387 .cr = 4,
388 .mr = 25,
389 };
390 xnn_params.f32.avgpool = (struct avgpool_parameters) {
391 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
392 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
393 .mr = 9,
394 .qr = 8,
395 };
396 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
397 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
398 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
399 .mr = 9,
400 .qr = 8,
401 };
402 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
403 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
404 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
405 .mr = 7,
406 };
407 xnn_params.f32.maxpool = (struct maxpool_parameters) {
408 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
409 .mr = 9,
410 .qr = 8,
411 };
412 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
413 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
414 .mr = 4,
415 };
416 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
417 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
418 .mr = 9,
419 };
420 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
421 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
422 .mr = 9,
423 .qr = 8,
424 };
425 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
426 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
427 xnn_params.f32.prelu = (struct prelu_parameters) {
428 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
429 .mr = 4,
430 };
431 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
432 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
433 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
434 .cr = 4,
435 .mr = 2,
436 };
437 xnn_params.f32.spmm = (struct spmm_parameters) {
438 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma,
439 .mr = 16,
440 .nr = 1,
441 };
442 xnn_params.f32.spmm2 = (struct spmm_parameters) {
443 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
444 .mr = 16,
445 .nr = 2,
446 };
447 xnn_params.f32.spmm4 = (struct spmm_parameters) {
448 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
449 .mr = 16,
450 .nr = 4,
451 };
452 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
453 .ukernel_with_symm_padding =
454 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
455 .output_channel_tile = 4,
456 .output_height_tile = 2,
457 .output_width_tile = 2,
458 };
459 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
460 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
461 .input_width_tile = 4,
462 .output_width_tile = 4,
463 .output_height_tile = 3,
464 };
465 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
466 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
467 .input_width_tile = 4,
468 .output_width_tile = 4,
469 .output_height_tile = 1,
470 };
471 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
472 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
473 .channel_tile = 4,
474 };
475
476 /**************************** X32 micro-kernels ****************************/
477 xnn_params.x32.pad = (struct pad_parameters) {
478 .ukernel = xnn_x32_pad_x2__neon,
479 .mr = 2,
480 };
481 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
482 xnn_params.x32.zip = (struct zip_parameters) {
483 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
484 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
485 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
486 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
487 };
488
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700489#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700490 if (!cpuinfo_has_x86_sse2()) {
491 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
492 return;
493 }
494
495 /**************************** Q8 micro-kernels ****************************/
496 xnn_params.q8.gemm = (struct gemm_parameters) {
497 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
498 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
499 .mr = 4,
500 .nr = 4,
501 .log2_kr = 1,
502 };
503 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
504 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
505 .cr = 8,
506 .mr = 9,
507 };
508 xnn_params.q8.avgpool = (struct avgpool_parameters) {
509 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
510 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
511 .mr = 9,
512 .qr = 8,
513 };
514 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
515 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
516 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
517 .mr = 7,
518 };
519 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
520
521 /**************************** U8 micro-kernels ****************************/
522 xnn_params.u8.maxpool = (struct maxpool_parameters) {
523 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
524 .mr = 9,
525 .qr = 8,
526 };
527 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
528 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
529 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
530
531 /**************************** X8 micro-kernels ****************************/
532 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
533 xnn_params.x8.zip = (struct zip_parameters) {
534 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
535 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
536 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
537 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
538 };
539
540 /**************************** F32 micro-kernels ****************************/
541 xnn_params.f32.gemm = (struct gemm_parameters) {
542 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
543 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
544 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
545 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
546 .mr = 4,
547 .nr = 8,
548 };
549 xnn_params.f32.gemm2 = (struct gemm_parameters) {
550 .gemm = NULL,
551 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
552 .mr = 4,
553 .nr = 2,
554 .log2_kr = 2,
555 };
556 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
557 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
558 .cr = 4,
559 .mr = 4,
560 };
561 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
562 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
563 .cr = 4,
564 .mr = 9,
565 };
566 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
567 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
568 .cr = 4,
569 .mr = 25,
570 };
571 xnn_params.f32.avgpool = (struct avgpool_parameters) {
572 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
573 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
574 .mr = 9,
575 .qr = 8,
576 };
577 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
578 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
579 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
580 .mr = 9,
581 .qr = 8,
582 };
583 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
584 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
585 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
586 .mr = 7,
587 };
588 xnn_params.f32.maxpool = (struct maxpool_parameters) {
589 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
590 .mr = 9,
591 .qr = 8,
592 };
593 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
594 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
595 .mr = 4,
596 };
597 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
598 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
599 .mr = 9,
600 };
601 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
602 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
603 .mr = 9,
604 .qr = 8,
605 };
606 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
607 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
608 xnn_params.f32.prelu = (struct prelu_parameters) {
609 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
610 .mr = 4,
611 };
612 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
613 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
614 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
615 .cr = 4,
616 .mr = 2,
617 };
618 xnn_params.f32.spmm = (struct spmm_parameters) {
619 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
620 .mr = 4,
621 .nr = 1,
622 };
623 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
624 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
625 .input_width_tile = 4,
626 .output_width_tile = 4,
627 .output_height_tile = 1,
628 };
629 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
630 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
631 .input_width_tile = 4,
632 .output_width_tile = 4,
633 .output_height_tile = 1,
634 };
635 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
636 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
637 .channel_tile = 4,
638 };
639
640 /**************************** X32 micro-kernels ****************************/
641 xnn_params.x32.pad = (struct pad_parameters) {
642 .ukernel = xnn_x32_pad_x2__sse2,
643 .mr = 2,
644 };
645 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
646 xnn_params.x32.zip = (struct zip_parameters) {
647 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
648 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
649 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
650 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
651 };
652
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700653#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -0700654 /**************************** Q8 micro-kernels ****************************/
655 xnn_params.q8.gemm = (struct gemm_parameters) {
656 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
657 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
658 .mr = 2,
659 .nr = 2,
660 };
661 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
662 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
663 .cr = 1,
664 .mr = 9,
665 };
666 xnn_params.q8.avgpool = (struct avgpool_parameters) {
667 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
668 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
669 .mr = 9,
670 .qr = 8,
671 };
672 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
673 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
674 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
675 .mr = 7,
676 };
677 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
678
679 /**************************** U8 micro-kernels ****************************/
680 xnn_params.u8.maxpool = (struct maxpool_parameters) {
681 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
682 .mr = 9,
683 .qr = 8,
684 };
685 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
686 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
687 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
688
689 /**************************** X8 micro-kernels ****************************/
690 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
691 xnn_params.x8.zip = (struct zip_parameters) {
692 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
693 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
694 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
695 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
696 };
697
698 /**************************** F32 micro-kernels ****************************/
699 xnn_params.f32.gemm = (struct gemm_parameters) {
700 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
701 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
702 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_loadsplat,
703 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_loadsplat,
704 .mr = 4,
705 .nr = 8,
706 };
707 xnn_params.f32.gemm2 = (struct gemm_parameters) {
708 .gemm = NULL,
709 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
710 .mr = 4,
711 .nr = 2,
712 .log2_kr = 2,
713 };
714 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
715 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
716 .cr = 4,
717 .mr = 4,
718 };
719 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
720 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
721 .cr = 4,
722 .mr = 9,
723 };
724 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
725 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
726 .cr = 4,
727 .mr = 25,
728 };
729 xnn_params.f32.avgpool = (struct avgpool_parameters) {
730 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
731 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
732 .mr = 9,
733 .qr = 8,
734 };
735 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
736 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
737 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
738 .mr = 9,
739 .qr = 8,
740 };
741 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
742 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
743 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
744 .mr = 7,
745 };
746 xnn_params.f32.maxpool = (struct maxpool_parameters) {
747 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
748 .mr = 9,
749 .qr = 8,
750 };
751 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
752 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
753 .mr = 4,
754 };
755 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
756 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
757 .mr = 9,
758 };
759 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
760 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
761 .mr = 9,
762 .qr = 8,
763 };
764 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
765 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
766 xnn_params.f32.prelu = (struct prelu_parameters) {
767 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
768 .mr = 4,
769 };
770 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
771 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
772 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
773 .cr = 4,
774 .mr = 2,
775 };
776
777 /**************************** X32 micro-kernels ****************************/
778 xnn_params.x32.pad = (struct pad_parameters) {
779 .ukernel = xnn_x32_pad_x2__psimd,
780 .mr = 2,
781 };
782 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
783 xnn_params.x32.zip = (struct zip_parameters) {
784 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
785 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
786 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
787 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
788 };
789
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700790#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700791 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
792 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
793 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
794 // of two infinities (must produce NaN per IEEE 754 standard).
795 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
796 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
797
798 /**************************** Q8 micro-kernels ****************************/
799 xnn_params.q8.gemm = (struct gemm_parameters) {
800 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
801 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
802 .mr = 2,
803 .nr = 2,
804 };
805 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
806 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
807 .cr = 1,
808 .mr = 9,
809 };
810 xnn_params.q8.avgpool = (struct avgpool_parameters) {
811 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
812 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
813 .mr = 9,
814 .qr = 8,
815 };
816 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
817 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
818 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
819 .mr = 7,
820 };
821 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
822
823 /**************************** U8 micro-kernels ****************************/
824 xnn_params.u8.maxpool = (struct maxpool_parameters) {
825 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
826 .mr = 9,
827 .qr = 8,
828 };
829 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
830 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
831 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
832
833 /**************************** X8 micro-kernels ****************************/
834 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
835 xnn_params.x8.zip = (struct zip_parameters) {
836 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
837 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
838 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
839 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
840 };
841
842 /**************************** F32 micro-kernels ****************************/
843 if (is_wasm_x86) {
844 xnn_params.f32.gemm = (struct gemm_parameters) {
845 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
846 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
847 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
848 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
849 .mr = 2,
850 .nr = 4,
851 };
852 } else {
853 xnn_params.f32.gemm = (struct gemm_parameters) {
854 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
855 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
856 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
857 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
858 .mr = 4,
859 .nr = 4,
860 };
861 }
862 xnn_params.f32.gemm2 = (struct gemm_parameters) {
863 .gemm = NULL,
864 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
865 .mr = 4,
866 .nr = 2,
867 };
868 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
869 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
870 .cr = 1,
871 .mr = 4,
872 };
873 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
874 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
875 .cr = 1,
876 .mr = 9,
877 };
878 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
879 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
880 .cr = 1,
881 .mr = 25,
882 };
883 xnn_params.f32.avgpool = (struct avgpool_parameters) {
884 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
885 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
886 .mr = 9,
887 .qr = 8,
888 };
889 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
890 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
891 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
892 .mr = 9,
893 .qr = 8,
894 };
895 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
896 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
897 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
898 .mr = 7,
899 };
900 xnn_params.f32.maxpool = (struct maxpool_parameters) {
901 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
902 .mr = 9,
903 .qr = 8,
904 };
905 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
906 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
907 .mr = 4,
908 };
909 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
910 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
911 .mr = 9,
912 };
913 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
914 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
915 .mr = 9,
916 .qr = 8,
917 };
918 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
919 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
920 xnn_params.f32.prelu = (struct prelu_parameters) {
921 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
922 .mr = 4,
923 };
924 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
925 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
926 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
927 .cr = 1,
928 .mr = 2,
929 };
930 xnn_params.f32.spmm = (struct spmm_parameters) {
931 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
932 .mr = 4,
933 .nr = 1,
934 };
935
936 /**************************** X32 micro-kernels ****************************/
937 xnn_params.x32.pad = (struct pad_parameters) {
938 .ukernel = xnn_x32_pad_x2__scalar,
939 .mr = 2,
940 };
941 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
942 xnn_params.x32.zip = (struct zip_parameters) {
943 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
944 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
945 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
946 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
947 };
948
949#else
950 #error "Unsupported architecture"
951#endif
952 xnn_params.initialized = true;
953}
954
955enum xnn_status xnn_initialize(void) {
956 if (!cpuinfo_initialize()) {
957 return xnn_status_out_of_memory;
958 }
959 pthread_once(&init_guard, &init);
960 if (xnn_params.initialized) {
961 return xnn_status_success;
962 } else {
963 return xnn_status_unsupported_hardware;
964 }
965}
966
967enum xnn_status xnn_deinitialize(void) {
968 cpuinfo_deinitialize();
969 return xnn_status_success;
970}