blob: b0ad23da1bc948e168ca51dcc6b1554e49085dfd [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
15#include <cpuinfo.h>
16
17#include <xnnpack.h>
18#include <xnnpack/argmaxpool.h>
19#include <xnnpack/avgpool.h>
20#include <xnnpack/clamp.h>
21#include <xnnpack/conv.h>
22#include <xnnpack/dwconv.h>
23#include <xnnpack/gavgpool.h>
24#include <xnnpack/gemm.h>
25#include <xnnpack/hswish.h>
26#include <xnnpack/igemm.h>
27#include <xnnpack/log.h>
28#include <xnnpack/lut.h>
29#include <xnnpack/maxpool.h>
30#include <xnnpack/pad.h>
31#include <xnnpack/params.h>
32#include <xnnpack/pavgpool.h>
33#include <xnnpack/prelu.h>
34#include <xnnpack/rmax.h>
35#include <xnnpack/spmm.h>
36#include <xnnpack/unpool.h>
37#include <xnnpack/vadd.h>
38#include <xnnpack/vmulcaddc.h>
39#include <xnnpack/zip.h>
40
41#ifndef XNN_ENABLE_ASSEMBLY
42 #define XNN_ENABLE_ASSEMBLY 1
43#endif
44
45static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
46
47struct xnn_parameters xnn_params = {
48 .initialized = false
49};
50
51#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
52 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
53#endif
54#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
55 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
56#endif
57
58static void init(void) {
59#if CPUINFO_ARCH_ARM
60 if (!cpuinfo_has_arm_neon()) {
61 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
62 return;
63 }
64
65 /**************************** Q8 micro-kernels ****************************/
66 xnn_params.q8.gemm = (struct gemm_parameters) {
67 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
68 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
69 .mr = 4,
70 .nr = 8,
71 };
72
73#if XNN_ENABLE_ASSEMBLY
74 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
75 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
76 .cr = 8,
77 .mr = 9,
78 };
79#else
80 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
81 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
82 .cr = 8,
83 .mr = 9,
84 };
85#endif
86 xnn_params.q8.avgpool = (struct avgpool_parameters) {
87 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
88 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
89 .mr = 9,
90 .qr = 8,
91 };
92 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
93 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
94 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
95 .mr = 7,
96 };
97 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
98
99 /**************************** U8 micro-kernels ****************************/
100 xnn_params.u8.maxpool = (struct maxpool_parameters) {
101 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
102 .mr = 9,
103 .qr = 8,
104 };
105 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
106 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
107 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
108
109 /**************************** X8 micro-kernels ****************************/
110 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
111 xnn_params.x8.zip = (struct zip_parameters) {
112 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
113 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
114 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
115 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
116 };
117
118 /**************************** F32 micro-kernels ****************************/
119 xnn_params.f32.gemm = (struct gemm_parameters) {
120 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
121 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
122 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
123 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
124 .mr = 4,
125 .nr = 8,
126 };
127 xnn_params.f32.gemm2 = (struct gemm_parameters) {
128 .gemm = NULL,
129 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
130 .mr = 4,
131 .nr = 2,
132 };
133 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
134 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
135 .cr = 4,
136 .mr = 4,
137 };
138 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
139 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
140 .cr = 4,
141 .mr = 9,
142 };
143 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
144 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
145 .cr = 4,
146 .mr = 25,
147 };
148 xnn_params.f32.avgpool = (struct avgpool_parameters) {
149 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
150 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
151 .mr = 9,
152 .qr = 8,
153 };
154 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
155 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
156 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
157 .mr = 9,
158 .qr = 8,
159 };
160 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
161 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
162 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
163 .mr = 7,
164 };
165 xnn_params.f32.maxpool = (struct maxpool_parameters) {
166 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
167 .mr = 9,
168 .qr = 8,
169 };
170 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
171 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
172 .mr = 4,
173 };
174 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
175 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
176 .mr = 9,
177 };
178 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
179 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
180 .mr = 9,
181 .qr = 8,
182 };
183 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
184 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
185 xnn_params.f32.prelu = (struct prelu_parameters) {
186 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
187 .mr = 4,
188 };
189 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
190 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
191 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
192 .cr = 4,
193 .mr = 2,
194 };
195
196 /**************************** X32 micro-kernels ****************************/
197 xnn_params.x32.pad = (struct pad_parameters) {
198 .ukernel = xnn_x32_pad_x2__neon,
199 .mr = 2,
200 };
201 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
202 xnn_params.x32.zip = (struct zip_parameters) {
203 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
204 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
205 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
206 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
207 };
208
209#elif CPUINFO_ARCH_ARM64
210
211 /**************************** Q8 micro-kernels ****************************/
212 xnn_params.q8.gemm = (struct gemm_parameters) {
213 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
214 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
215 .mr = 8,
216 .nr = 8,
217 };
218 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
219 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
220 .cr = 8,
221 .mr = 9,
222 };
223 xnn_params.q8.avgpool = (struct avgpool_parameters) {
224 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
225 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
226 .mr = 9,
227 .qr = 8,
228 };
229 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
230 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
231 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
232 .mr = 7,
233 };
234 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
235
236 /**************************** U8 micro-kernels ****************************/
237 xnn_params.u8.maxpool = (struct maxpool_parameters) {
238 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
239 .mr = 9,
240 .qr = 8,
241 };
242 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
243 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
244 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
245
246 /**************************** X8 micro-kernels ****************************/
247 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
248 xnn_params.x8.zip = (struct zip_parameters) {
249 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
250 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
251 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
252 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
253 };
254
255 /**************************** F32 micro-kernels ****************************/
256#if XNN_ENABLE_ASSEMBLY
257 switch (cpuinfo_get_core(0)->uarch) {
258 case cpuinfo_uarch_kryo:
259 xnn_params.f32.gemm = (struct gemm_parameters) {
260 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
261 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
262 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
263 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
264 .mr = 4,
265 .nr = 8,
266 };
267 break;
268 case cpuinfo_uarch_cortex_a57:
269 xnn_params.f32.gemm = (struct gemm_parameters) {
270 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
271 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
272 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
273 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
274 .mr = 6,
275 .nr = 8,
276 };
277 break;
278 case cpuinfo_uarch_cortex_a72:
279 case cpuinfo_uarch_cortex_a76:
280 xnn_params.f32.gemm = (struct gemm_parameters) {
281 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
282 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
283 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
284 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
285 .mr = 4,
286 .nr = 8,
287 };
288 break;
289 case cpuinfo_uarch_cortex_a75:
290 case cpuinfo_uarch_exynos_m1:
291 case cpuinfo_uarch_exynos_m2:
292 case cpuinfo_uarch_exynos_m3:
293 case cpuinfo_uarch_exynos_m4:
294 xnn_params.f32.gemm = (struct gemm_parameters) {
295 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
296 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
297 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
298 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
299 .mr = 6,
300 .nr = 8,
301 };
302 break;
303 case cpuinfo_uarch_cortex_a53:
304 case cpuinfo_uarch_cortex_a55:
305 xnn_params.f32.gemm = (struct gemm_parameters) {
306 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
307 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
308 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
309 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
310 .mr = 4,
311 .nr = 12,
312 };
313 break;
314 case cpuinfo_uarch_cortex_a73:
315 xnn_params.f32.gemm = (struct gemm_parameters) {
316 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
317 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
318 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
319 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
320 .mr = 6,
321 .nr = 8,
322 };
323 break;
324 default:
325 xnn_params.f32.gemm = (struct gemm_parameters) {
326 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
327 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
328 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
329 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
330 .mr = 4,
331 .nr = 8,
332 };
333 break;
334 }
335#else // XNN_ENABLE_ASSEMBLY
336 xnn_params.f32.gemm = (struct gemm_parameters) {
337 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
338 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
339 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
340 // TODO(b/140592595): xnn_f32_igemm_ukernel_1x8__neonfma_ld64
341 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
342 .mr = 4,
343 .nr = 8,
344 };
345#endif
346
347 xnn_params.f32.gemm2 = (struct gemm_parameters) {
348 .gemm = NULL,
349 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
350 .mr = 4,
351 .nr = 2,
352 };
353 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
354 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
355 .cr = 4,
356 .mr = 4,
357 };
358 switch (cpuinfo_get_core(0)->uarch) {
359 case cpuinfo_uarch_kryo:
360 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
361 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
362 .cr = 4,
363 .mr = 9,
364 };
365 break;
366#if XNN_ENABLE_ASSEMBLY
367 case cpuinfo_uarch_cortex_a53:
368 case cpuinfo_uarch_cortex_a55:
369 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
370 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
371 .cr = 4,
372 .mr = 9,
373 };
374 break;
375#endif
376 default:
377 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
378 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
379 .cr = 8,
380 .mr = 9,
381 };
382 break;
383 }
384 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
385 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
386 .cr = 4,
387 .mr = 25,
388 };
389 xnn_params.f32.avgpool = (struct avgpool_parameters) {
390 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
391 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
392 .mr = 9,
393 .qr = 8,
394 };
395 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
396 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
397 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
398 .mr = 9,
399 .qr = 8,
400 };
401 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
402 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
403 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
404 .mr = 7,
405 };
406 xnn_params.f32.maxpool = (struct maxpool_parameters) {
407 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
408 .mr = 9,
409 .qr = 8,
410 };
411 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
412 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
413 .mr = 4,
414 };
415 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
416 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
417 .mr = 9,
418 };
419 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
420 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
421 .mr = 9,
422 .qr = 8,
423 };
424 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
425 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
426 xnn_params.f32.prelu = (struct prelu_parameters) {
427 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
428 .mr = 4,
429 };
430 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
431 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
432 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
433 .cr = 4,
434 .mr = 2,
435 };
436 xnn_params.f32.spmm = (struct spmm_parameters) {
437 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma,
438 .mr = 16,
439 .nr = 1,
440 };
441 xnn_params.f32.spmm2 = (struct spmm_parameters) {
442 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
443 .mr = 16,
444 .nr = 2,
445 };
446 xnn_params.f32.spmm4 = (struct spmm_parameters) {
447 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
448 .mr = 16,
449 .nr = 4,
450 };
451 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
452 .ukernel_with_symm_padding =
453 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
454 .output_channel_tile = 4,
455 .output_height_tile = 2,
456 .output_width_tile = 2,
457 };
458 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
459 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
460 .input_width_tile = 4,
461 .output_width_tile = 4,
462 .output_height_tile = 3,
463 };
464 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
465 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
466 .input_width_tile = 4,
467 .output_width_tile = 4,
468 .output_height_tile = 1,
469 };
470 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
471 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
472 .channel_tile = 4,
473 };
474
475 /**************************** X32 micro-kernels ****************************/
476 xnn_params.x32.pad = (struct pad_parameters) {
477 .ukernel = xnn_x32_pad_x2__neon,
478 .mr = 2,
479 };
480 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
481 xnn_params.x32.zip = (struct zip_parameters) {
482 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
483 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
484 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
485 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
486 };
487
488#elif CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
489 if (!cpuinfo_has_x86_sse2()) {
490 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
491 return;
492 }
493
494 /**************************** Q8 micro-kernels ****************************/
495 xnn_params.q8.gemm = (struct gemm_parameters) {
496 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
497 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
498 .mr = 4,
499 .nr = 4,
500 .log2_kr = 1,
501 };
502 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
503 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
504 .cr = 8,
505 .mr = 9,
506 };
507 xnn_params.q8.avgpool = (struct avgpool_parameters) {
508 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
509 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
510 .mr = 9,
511 .qr = 8,
512 };
513 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
514 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
515 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
516 .mr = 7,
517 };
518 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
519
520 /**************************** U8 micro-kernels ****************************/
521 xnn_params.u8.maxpool = (struct maxpool_parameters) {
522 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
523 .mr = 9,
524 .qr = 8,
525 };
526 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
527 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
528 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
529
530 /**************************** X8 micro-kernels ****************************/
531 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
532 xnn_params.x8.zip = (struct zip_parameters) {
533 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
534 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
535 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
536 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
537 };
538
539 /**************************** F32 micro-kernels ****************************/
540 xnn_params.f32.gemm = (struct gemm_parameters) {
541 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
542 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
543 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
544 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
545 .mr = 4,
546 .nr = 8,
547 };
548 xnn_params.f32.gemm2 = (struct gemm_parameters) {
549 .gemm = NULL,
550 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
551 .mr = 4,
552 .nr = 2,
553 .log2_kr = 2,
554 };
555 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
556 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
557 .cr = 4,
558 .mr = 4,
559 };
560 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
561 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
562 .cr = 4,
563 .mr = 9,
564 };
565 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
566 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
567 .cr = 4,
568 .mr = 25,
569 };
570 xnn_params.f32.avgpool = (struct avgpool_parameters) {
571 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
572 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
573 .mr = 9,
574 .qr = 8,
575 };
576 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
577 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
578 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
579 .mr = 9,
580 .qr = 8,
581 };
582 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
583 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
584 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
585 .mr = 7,
586 };
587 xnn_params.f32.maxpool = (struct maxpool_parameters) {
588 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
589 .mr = 9,
590 .qr = 8,
591 };
592 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
593 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
594 .mr = 4,
595 };
596 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
597 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
598 .mr = 9,
599 };
600 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
601 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
602 .mr = 9,
603 .qr = 8,
604 };
605 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
606 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
607 xnn_params.f32.prelu = (struct prelu_parameters) {
608 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
609 .mr = 4,
610 };
611 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
612 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
613 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
614 .cr = 4,
615 .mr = 2,
616 };
617 xnn_params.f32.spmm = (struct spmm_parameters) {
618 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
619 .mr = 4,
620 .nr = 1,
621 };
622 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
623 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
624 .input_width_tile = 4,
625 .output_width_tile = 4,
626 .output_height_tile = 1,
627 };
628 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
629 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
630 .input_width_tile = 4,
631 .output_width_tile = 4,
632 .output_height_tile = 1,
633 };
634 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
635 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
636 .channel_tile = 4,
637 };
638
639 /**************************** X32 micro-kernels ****************************/
640 xnn_params.x32.pad = (struct pad_parameters) {
641 .ukernel = xnn_x32_pad_x2__sse2,
642 .mr = 2,
643 };
644 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
645 xnn_params.x32.zip = (struct zip_parameters) {
646 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
647 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
648 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
649 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
650 };
651
652#elif CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASMSIMD
653 /**************************** Q8 micro-kernels ****************************/
654 xnn_params.q8.gemm = (struct gemm_parameters) {
655 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
656 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
657 .mr = 2,
658 .nr = 2,
659 };
660 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
661 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
662 .cr = 1,
663 .mr = 9,
664 };
665 xnn_params.q8.avgpool = (struct avgpool_parameters) {
666 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
667 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
668 .mr = 9,
669 .qr = 8,
670 };
671 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
672 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
673 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
674 .mr = 7,
675 };
676 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
677
678 /**************************** U8 micro-kernels ****************************/
679 xnn_params.u8.maxpool = (struct maxpool_parameters) {
680 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
681 .mr = 9,
682 .qr = 8,
683 };
684 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
685 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
686 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
687
688 /**************************** X8 micro-kernels ****************************/
689 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
690 xnn_params.x8.zip = (struct zip_parameters) {
691 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
692 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
693 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
694 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
695 };
696
697 /**************************** F32 micro-kernels ****************************/
698 xnn_params.f32.gemm = (struct gemm_parameters) {
699 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
700 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
701 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_loadsplat,
702 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_loadsplat,
703 .mr = 4,
704 .nr = 8,
705 };
706 xnn_params.f32.gemm2 = (struct gemm_parameters) {
707 .gemm = NULL,
708 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
709 .mr = 4,
710 .nr = 2,
711 .log2_kr = 2,
712 };
713 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
714 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
715 .cr = 4,
716 .mr = 4,
717 };
718 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
719 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
720 .cr = 4,
721 .mr = 9,
722 };
723 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
724 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
725 .cr = 4,
726 .mr = 25,
727 };
728 xnn_params.f32.avgpool = (struct avgpool_parameters) {
729 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
730 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
731 .mr = 9,
732 .qr = 8,
733 };
734 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
735 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
736 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
737 .mr = 9,
738 .qr = 8,
739 };
740 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
741 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
742 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
743 .mr = 7,
744 };
745 xnn_params.f32.maxpool = (struct maxpool_parameters) {
746 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
747 .mr = 9,
748 .qr = 8,
749 };
750 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
751 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
752 .mr = 4,
753 };
754 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
755 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
756 .mr = 9,
757 };
758 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
759 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
760 .mr = 9,
761 .qr = 8,
762 };
763 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
764 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
765 xnn_params.f32.prelu = (struct prelu_parameters) {
766 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
767 .mr = 4,
768 };
769 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
770 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
771 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
772 .cr = 4,
773 .mr = 2,
774 };
775
776 /**************************** X32 micro-kernels ****************************/
777 xnn_params.x32.pad = (struct pad_parameters) {
778 .ukernel = xnn_x32_pad_x2__psimd,
779 .mr = 2,
780 };
781 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
782 xnn_params.x32.zip = (struct zip_parameters) {
783 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
784 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
785 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
786 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
787 };
788
789#elif CPUINFO_ARCH_WASM || CPUINFO_ARCH_ASMJS
790 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
791 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
792 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
793 // of two infinities (must produce NaN per IEEE 754 standard).
794 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
795 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
796
797 /**************************** Q8 micro-kernels ****************************/
798 xnn_params.q8.gemm = (struct gemm_parameters) {
799 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
800 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
801 .mr = 2,
802 .nr = 2,
803 };
804 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
805 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
806 .cr = 1,
807 .mr = 9,
808 };
809 xnn_params.q8.avgpool = (struct avgpool_parameters) {
810 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
811 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
812 .mr = 9,
813 .qr = 8,
814 };
815 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
816 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
817 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
818 .mr = 7,
819 };
820 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
821
822 /**************************** U8 micro-kernels ****************************/
823 xnn_params.u8.maxpool = (struct maxpool_parameters) {
824 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
825 .mr = 9,
826 .qr = 8,
827 };
828 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
829 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
830 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
831
832 /**************************** X8 micro-kernels ****************************/
833 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
834 xnn_params.x8.zip = (struct zip_parameters) {
835 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
836 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
837 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
838 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
839 };
840
841 /**************************** F32 micro-kernels ****************************/
842 if (is_wasm_x86) {
843 xnn_params.f32.gemm = (struct gemm_parameters) {
844 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
845 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
846 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
847 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
848 .mr = 2,
849 .nr = 4,
850 };
851 } else {
852 xnn_params.f32.gemm = (struct gemm_parameters) {
853 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
854 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
855 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
856 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
857 .mr = 4,
858 .nr = 4,
859 };
860 }
861 xnn_params.f32.gemm2 = (struct gemm_parameters) {
862 .gemm = NULL,
863 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
864 .mr = 4,
865 .nr = 2,
866 };
867 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
868 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
869 .cr = 1,
870 .mr = 4,
871 };
872 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
873 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
874 .cr = 1,
875 .mr = 9,
876 };
877 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
878 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
879 .cr = 1,
880 .mr = 25,
881 };
882 xnn_params.f32.avgpool = (struct avgpool_parameters) {
883 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
884 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
885 .mr = 9,
886 .qr = 8,
887 };
888 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
889 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
890 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
891 .mr = 9,
892 .qr = 8,
893 };
894 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
895 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
896 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
897 .mr = 7,
898 };
899 xnn_params.f32.maxpool = (struct maxpool_parameters) {
900 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
901 .mr = 9,
902 .qr = 8,
903 };
904 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
905 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
906 .mr = 4,
907 };
908 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
909 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
910 .mr = 9,
911 };
912 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
913 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
914 .mr = 9,
915 .qr = 8,
916 };
917 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
918 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
919 xnn_params.f32.prelu = (struct prelu_parameters) {
920 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
921 .mr = 4,
922 };
923 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
924 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
925 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
926 .cr = 1,
927 .mr = 2,
928 };
929 xnn_params.f32.spmm = (struct spmm_parameters) {
930 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
931 .mr = 4,
932 .nr = 1,
933 };
934
935 /**************************** X32 micro-kernels ****************************/
936 xnn_params.x32.pad = (struct pad_parameters) {
937 .ukernel = xnn_x32_pad_x2__scalar,
938 .mr = 2,
939 };
940 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
941 xnn_params.x32.zip = (struct zip_parameters) {
942 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
943 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
944 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
945 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
946 };
947
948#else
949 #error "Unsupported architecture"
950#endif
951 xnn_params.initialized = true;
952}
953
954enum xnn_status xnn_initialize(void) {
955 if (!cpuinfo_initialize()) {
956 return xnn_status_out_of_memory;
957 }
958 pthread_once(&init_guard, &init);
959 if (xnn_params.initialized) {
960 return xnn_status_success;
961 } else {
962 return xnn_status_unsupported_hardware;
963 }
964}
965
966enum xnn_status xnn_deinitialize(void) {
967 cpuinfo_deinitialize();
968 return xnn_status_success;
969}