blob: 8e35d80d98208f971e533c219f23c53586f3901a [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
22#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/conv.h>
25#include <xnnpack/dwconv.h>
26#include <xnnpack/gavgpool.h>
27#include <xnnpack/gemm.h>
28#include <xnnpack/hswish.h>
29#include <xnnpack/igemm.h>
30#include <xnnpack/log.h>
31#include <xnnpack/lut.h>
32#include <xnnpack/maxpool.h>
33#include <xnnpack/pad.h>
34#include <xnnpack/params.h>
35#include <xnnpack/pavgpool.h>
36#include <xnnpack/prelu.h>
37#include <xnnpack/rmax.h>
38#include <xnnpack/spmm.h>
39#include <xnnpack/unpool.h>
40#include <xnnpack/vadd.h>
41#include <xnnpack/vmulcaddc.h>
42#include <xnnpack/zip.h>
43
44#ifndef XNN_ENABLE_ASSEMBLY
45 #define XNN_ENABLE_ASSEMBLY 1
46#endif
47
48static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
49
50struct xnn_parameters xnn_params = {
51 .initialized = false
52};
53
Marat Dukhan1dadbf72019-10-01 10:46:20 -070054#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
56#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
59#endif
60
61static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 if (!cpuinfo_has_arm_neon()) {
64 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
65 return;
66 }
67
68 /**************************** Q8 micro-kernels ****************************/
69 xnn_params.q8.gemm = (struct gemm_parameters) {
70 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
71 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
72 .mr = 4,
73 .nr = 8,
74 };
75
76#if XNN_ENABLE_ASSEMBLY
77 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
78 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
79 .cr = 8,
80 .mr = 9,
81 };
82#else
83 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
84 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
85 .cr = 8,
86 .mr = 9,
87 };
88#endif
89 xnn_params.q8.avgpool = (struct avgpool_parameters) {
90 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
91 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
92 .mr = 9,
93 .qr = 8,
94 };
95 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
96 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
97 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
98 .mr = 7,
99 };
100 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
101
102 /**************************** U8 micro-kernels ****************************/
103 xnn_params.u8.maxpool = (struct maxpool_parameters) {
104 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
105 .mr = 9,
106 .qr = 8,
107 };
108 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
109 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
110 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
111
112 /**************************** X8 micro-kernels ****************************/
113 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
114 xnn_params.x8.zip = (struct zip_parameters) {
115 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
116 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
117 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
118 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
119 };
120
121 /**************************** F32 micro-kernels ****************************/
122 xnn_params.f32.gemm = (struct gemm_parameters) {
123 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
124 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
125 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
126 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
127 .mr = 4,
128 .nr = 8,
129 };
130 xnn_params.f32.gemm2 = (struct gemm_parameters) {
131 .gemm = NULL,
132 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
133 .mr = 4,
134 .nr = 2,
135 };
136 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
137 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
138 .cr = 4,
139 .mr = 4,
140 };
141 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
142 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
143 .cr = 4,
144 .mr = 9,
145 };
146 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
147 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
148 .cr = 4,
149 .mr = 25,
150 };
151 xnn_params.f32.avgpool = (struct avgpool_parameters) {
152 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
153 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
154 .mr = 9,
155 .qr = 8,
156 };
157 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
158 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
159 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
160 .mr = 9,
161 .qr = 8,
162 };
163 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
164 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
165 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
166 .mr = 7,
167 };
168 xnn_params.f32.maxpool = (struct maxpool_parameters) {
169 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
170 .mr = 9,
171 .qr = 8,
172 };
173 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
174 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
175 .mr = 4,
176 };
177 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
178 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
179 .mr = 9,
180 };
181 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
182 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
183 .mr = 9,
184 .qr = 8,
185 };
186 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
187 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
188 xnn_params.f32.prelu = (struct prelu_parameters) {
189 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
190 .mr = 4,
191 };
192 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
193 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
194 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_x2,
195 .cr = 4,
196 .mr = 2,
197 };
198
199 /**************************** X32 micro-kernels ****************************/
200 xnn_params.x32.pad = (struct pad_parameters) {
201 .ukernel = xnn_x32_pad_x2__neon,
202 .mr = 2,
203 };
204 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
205 xnn_params.x32.zip = (struct zip_parameters) {
206 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
207 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
208 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
209 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
210 };
211
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700212#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700213
214 /**************************** Q8 micro-kernels ****************************/
215 xnn_params.q8.gemm = (struct gemm_parameters) {
216 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
217 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
218 .mr = 8,
219 .nr = 8,
220 };
221 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
222 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
223 .cr = 8,
224 .mr = 9,
225 };
226 xnn_params.q8.avgpool = (struct avgpool_parameters) {
227 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
228 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
229 .mr = 9,
230 .qr = 8,
231 };
232 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
233 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
234 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
235 .mr = 7,
236 };
237 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
238
239 /**************************** U8 micro-kernels ****************************/
240 xnn_params.u8.maxpool = (struct maxpool_parameters) {
241 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
242 .mr = 9,
243 .qr = 8,
244 };
245 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
246 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
247 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
248
249 /**************************** X8 micro-kernels ****************************/
250 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
251 xnn_params.x8.zip = (struct zip_parameters) {
252 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
253 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
254 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
255 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
256 };
257
258 /**************************** F32 micro-kernels ****************************/
259#if XNN_ENABLE_ASSEMBLY
260 switch (cpuinfo_get_core(0)->uarch) {
261 case cpuinfo_uarch_kryo:
262 xnn_params.f32.gemm = (struct gemm_parameters) {
263 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
264 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
265 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
266 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
267 .mr = 4,
268 .nr = 8,
269 };
270 break;
271 case cpuinfo_uarch_cortex_a57:
272 xnn_params.f32.gemm = (struct gemm_parameters) {
273 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
274 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
275 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
276 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
277 .mr = 6,
278 .nr = 8,
279 };
280 break;
281 case cpuinfo_uarch_cortex_a72:
282 case cpuinfo_uarch_cortex_a76:
283 xnn_params.f32.gemm = (struct gemm_parameters) {
284 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
285 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
286 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
287 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
288 .mr = 4,
289 .nr = 8,
290 };
291 break;
292 case cpuinfo_uarch_cortex_a75:
Marat Dukhandf6985f2019-10-01 17:04:18 -0700293 case cpuinfo_uarch_mongoose_m1:
294 case cpuinfo_uarch_mongoose_m2:
295 case cpuinfo_uarch_meerkat_m3:
296 case (cpuinfo_uarch_meerkat_m3 + 1):
XNNPACK Teamb455b122019-09-27 18:10:33 -0700297 xnn_params.f32.gemm = (struct gemm_parameters) {
298 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
299 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
300 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
301 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
302 .mr = 6,
303 .nr = 8,
304 };
305 break;
306 case cpuinfo_uarch_cortex_a53:
307 case cpuinfo_uarch_cortex_a55:
308 xnn_params.f32.gemm = (struct gemm_parameters) {
309 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
310 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53,
311 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
312 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53,
313 .mr = 4,
314 .nr = 12,
315 };
316 break;
317 case cpuinfo_uarch_cortex_a73:
318 xnn_params.f32.gemm = (struct gemm_parameters) {
319 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
320 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
321 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
322 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
323 .mr = 6,
324 .nr = 8,
325 };
326 break;
327 default:
328 xnn_params.f32.gemm = (struct gemm_parameters) {
329 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
330 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
331 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
332 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
333 .mr = 4,
334 .nr = 8,
335 };
336 break;
337 }
338#else // XNN_ENABLE_ASSEMBLY
339 xnn_params.f32.gemm = (struct gemm_parameters) {
340 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neonfma_ld64,
341 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neonfma_ld64,
342 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
Frank Barcharddb45b6a2019-10-09 16:42:45 -0700343 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700344 .mr = 4,
345 .nr = 8,
346 };
347#endif
348
349 xnn_params.f32.gemm2 = (struct gemm_parameters) {
350 .gemm = NULL,
351 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
352 .mr = 4,
353 .nr = 2,
354 };
355 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
356 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
357 .cr = 4,
358 .mr = 4,
359 };
360 switch (cpuinfo_get_core(0)->uarch) {
361 case cpuinfo_uarch_kryo:
362 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
363 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
364 .cr = 4,
365 .mr = 9,
366 };
367 break;
368#if XNN_ENABLE_ASSEMBLY
369 case cpuinfo_uarch_cortex_a53:
370 case cpuinfo_uarch_cortex_a55:
371 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
372 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
373 .cr = 4,
374 .mr = 9,
375 };
376 break;
377#endif
378 default:
379 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
380 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
381 .cr = 8,
382 .mr = 9,
383 };
384 break;
385 }
386 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
387 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
388 .cr = 4,
389 .mr = 25,
390 };
391 xnn_params.f32.avgpool = (struct avgpool_parameters) {
392 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
393 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
394 .mr = 9,
395 .qr = 8,
396 };
397 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
398 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
399 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
400 .mr = 9,
401 .qr = 8,
402 };
403 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
404 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
405 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
406 .mr = 7,
407 };
408 xnn_params.f32.maxpool = (struct maxpool_parameters) {
409 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
410 .mr = 9,
411 .qr = 8,
412 };
413 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
414 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
415 .mr = 4,
416 };
417 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
418 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
419 .mr = 9,
420 };
421 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
422 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
423 .mr = 9,
424 .qr = 8,
425 };
426 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
427 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
428 xnn_params.f32.prelu = (struct prelu_parameters) {
429 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
430 .mr = 4,
431 };
432 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
433 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
434 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_x2,
435 .cr = 4,
436 .mr = 2,
437 };
438 xnn_params.f32.spmm = (struct spmm_parameters) {
439 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma,
440 .mr = 16,
441 .nr = 1,
442 };
443 xnn_params.f32.spmm2 = (struct spmm_parameters) {
444 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
445 .mr = 16,
446 .nr = 2,
447 };
448 xnn_params.f32.spmm4 = (struct spmm_parameters) {
449 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
450 .mr = 16,
451 .nr = 4,
452 };
453 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
454 .ukernel_with_symm_padding =
455 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
456 .output_channel_tile = 4,
457 .output_height_tile = 2,
458 .output_width_tile = 2,
459 };
460 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
461 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
462 .input_width_tile = 4,
463 .output_width_tile = 4,
464 .output_height_tile = 3,
465 };
466 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
467 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
468 .input_width_tile = 4,
469 .output_width_tile = 4,
470 .output_height_tile = 1,
471 };
472 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
473 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
474 .channel_tile = 4,
475 };
476
477 /**************************** X32 micro-kernels ****************************/
478 xnn_params.x32.pad = (struct pad_parameters) {
479 .ukernel = xnn_x32_pad_x2__neon,
480 .mr = 2,
481 };
482 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
483 xnn_params.x32.zip = (struct zip_parameters) {
484 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
485 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
486 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
487 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
488 };
489
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700490#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700491 if (!cpuinfo_has_x86_sse2()) {
492 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
493 return;
494 }
495
496 /**************************** Q8 micro-kernels ****************************/
497 xnn_params.q8.gemm = (struct gemm_parameters) {
498 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
499 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
500 .mr = 4,
501 .nr = 4,
502 .log2_kr = 1,
503 };
504 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
505 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
506 .cr = 8,
507 .mr = 9,
508 };
509 xnn_params.q8.avgpool = (struct avgpool_parameters) {
510 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
511 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
512 .mr = 9,
513 .qr = 8,
514 };
515 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
516 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
517 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
518 .mr = 7,
519 };
520 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
521
522 /**************************** U8 micro-kernels ****************************/
523 xnn_params.u8.maxpool = (struct maxpool_parameters) {
524 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
525 .mr = 9,
526 .qr = 8,
527 };
528 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
529 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
530 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
531
532 /**************************** X8 micro-kernels ****************************/
533 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
534 xnn_params.x8.zip = (struct zip_parameters) {
535 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
536 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
537 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
538 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
539 };
540
541 /**************************** F32 micro-kernels ****************************/
542 xnn_params.f32.gemm = (struct gemm_parameters) {
543 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
544 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
545 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
546 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
547 .mr = 4,
548 .nr = 8,
549 };
550 xnn_params.f32.gemm2 = (struct gemm_parameters) {
551 .gemm = NULL,
552 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
553 .mr = 4,
554 .nr = 2,
555 .log2_kr = 2,
556 };
557 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
558 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__sse,
559 .cr = 4,
560 .mr = 4,
561 };
562 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
563 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__sse,
564 .cr = 4,
565 .mr = 9,
566 };
567 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
568 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__sse,
569 .cr = 4,
570 .mr = 25,
571 };
572 xnn_params.f32.avgpool = (struct avgpool_parameters) {
573 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
574 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
575 .mr = 9,
576 .qr = 8,
577 };
578 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
579 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
580 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
581 .mr = 9,
582 .qr = 8,
583 };
584 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
585 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
586 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
587 .mr = 7,
588 };
589 xnn_params.f32.maxpool = (struct maxpool_parameters) {
590 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
591 .mr = 9,
592 .qr = 8,
593 };
594 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
595 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
596 .mr = 4,
597 };
598 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
599 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
600 .mr = 9,
601 };
602 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
603 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
604 .mr = 9,
605 .qr = 8,
606 };
607 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
608 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
609 xnn_params.f32.prelu = (struct prelu_parameters) {
610 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__sse,
611 .mr = 4,
612 };
613 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
614 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
615 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_x2,
616 .cr = 4,
617 .mr = 2,
618 };
619 xnn_params.f32.spmm = (struct spmm_parameters) {
620 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
621 .mr = 4,
622 .nr = 1,
623 };
624 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
625 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
626 .input_width_tile = 4,
627 .output_width_tile = 4,
628 .output_height_tile = 1,
629 };
630 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
631 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
632 .input_width_tile = 4,
633 .output_width_tile = 4,
634 .output_height_tile = 1,
635 };
636 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
637 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
638 .channel_tile = 4,
639 };
640
641 /**************************** X32 micro-kernels ****************************/
642 xnn_params.x32.pad = (struct pad_parameters) {
643 .ukernel = xnn_x32_pad_x2__sse2,
644 .mr = 2,
645 };
646 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
647 xnn_params.x32.zip = (struct zip_parameters) {
648 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
649 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
650 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
651 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
652 };
653
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700654#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700655 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
656 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
657 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
658 // of two infinities (must produce NaN per IEEE 754 standard).
659 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
660 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
661
XNNPACK Teamb455b122019-09-27 18:10:33 -0700662 /**************************** Q8 micro-kernels ****************************/
663 xnn_params.q8.gemm = (struct gemm_parameters) {
664 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
665 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
666 .mr = 2,
667 .nr = 2,
668 };
669 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
670 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
671 .cr = 1,
672 .mr = 9,
673 };
674 xnn_params.q8.avgpool = (struct avgpool_parameters) {
675 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
676 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
677 .mr = 9,
678 .qr = 8,
679 };
680 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
681 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
682 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
683 .mr = 7,
684 };
685 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
686
687 /**************************** U8 micro-kernels ****************************/
688 xnn_params.u8.maxpool = (struct maxpool_parameters) {
689 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
690 .mr = 9,
691 .qr = 8,
692 };
693 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
694 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
695 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
696
697 /**************************** X8 micro-kernels ****************************/
698 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
699 xnn_params.x8.zip = (struct zip_parameters) {
700 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
701 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
702 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
703 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
704 };
705
706 /**************************** F32 micro-kernels ****************************/
Marat Dukhan466b5232019-10-09 11:22:20 -0700707 if (is_wasm_x86) {
708 xnn_params.f32.gemm = (struct gemm_parameters) {
709 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8s4__psimd,
710 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8s4__psimd,
711 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8s4__psimd,
712 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
713 .mr = 4,
714 .nr = 8,
715 .log2_sr = 2,
716 };
717 } else {
718 xnn_params.f32.gemm = (struct gemm_parameters) {
719 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
720 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
721 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
722 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
723 .mr = 6,
724 .nr = 8,
725 .log2_sr = 2,
726 };
727 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700728 xnn_params.f32.gemm2 = (struct gemm_parameters) {
729 .gemm = NULL,
730 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
731 .mr = 4,
732 .nr = 2,
733 .log2_kr = 2,
734 };
735 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
736 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
737 .cr = 4,
738 .mr = 4,
739 };
740 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
741 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd,
742 .cr = 4,
743 .mr = 9,
744 };
745 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
746 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
747 .cr = 4,
748 .mr = 25,
749 };
750 xnn_params.f32.avgpool = (struct avgpool_parameters) {
751 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
752 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
753 .mr = 9,
754 .qr = 8,
755 };
756 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
757 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
758 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
759 .mr = 9,
760 .qr = 8,
761 };
762 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
763 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
764 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
765 .mr = 7,
766 };
767 xnn_params.f32.maxpool = (struct maxpool_parameters) {
768 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
769 .mr = 9,
770 .qr = 8,
771 };
772 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
773 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
774 .mr = 4,
775 };
776 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
777 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
778 .mr = 9,
779 };
780 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
781 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
782 .mr = 9,
783 .qr = 8,
784 };
785 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
786 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
787 xnn_params.f32.prelu = (struct prelu_parameters) {
788 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__psimd,
789 .mr = 4,
790 };
791 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
792 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
793 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_x2,
794 .cr = 4,
795 .mr = 2,
796 };
797
798 /**************************** X32 micro-kernels ****************************/
799 xnn_params.x32.pad = (struct pad_parameters) {
800 .ukernel = xnn_x32_pad_x2__psimd,
801 .mr = 2,
802 };
803 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
804 xnn_params.x32.zip = (struct zip_parameters) {
805 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
806 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
807 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
808 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
809 };
810
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700811#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700812 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
813 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
814 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
815 // of two infinities (must produce NaN per IEEE 754 standard).
816 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
817 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
818
819 /**************************** Q8 micro-kernels ****************************/
820 xnn_params.q8.gemm = (struct gemm_parameters) {
821 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
822 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
823 .mr = 2,
824 .nr = 2,
825 };
826 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
827 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
828 .cr = 1,
829 .mr = 9,
830 };
831 xnn_params.q8.avgpool = (struct avgpool_parameters) {
832 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
833 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
834 .mr = 9,
835 .qr = 8,
836 };
837 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
838 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
839 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
840 .mr = 7,
841 };
842 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
843
844 /**************************** U8 micro-kernels ****************************/
845 xnn_params.u8.maxpool = (struct maxpool_parameters) {
846 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
847 .mr = 9,
848 .qr = 8,
849 };
850 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
851 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
852 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
853
854 /**************************** X8 micro-kernels ****************************/
855 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
856 xnn_params.x8.zip = (struct zip_parameters) {
857 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
858 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
859 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
860 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
861 };
862
863 /**************************** F32 micro-kernels ****************************/
864 if (is_wasm_x86) {
865 xnn_params.f32.gemm = (struct gemm_parameters) {
866 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
867 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
868 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
869 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
870 .mr = 2,
871 .nr = 4,
872 };
873 } else {
874 xnn_params.f32.gemm = (struct gemm_parameters) {
875 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
876 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
877 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
878 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
879 .mr = 4,
880 .nr = 4,
881 };
882 }
883 xnn_params.f32.gemm2 = (struct gemm_parameters) {
884 .gemm = NULL,
885 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
886 .mr = 4,
887 .nr = 2,
888 };
889 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
890 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar,
891 .cr = 1,
892 .mr = 4,
893 };
894 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
895 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar,
896 .cr = 1,
897 .mr = 9,
898 };
899 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
900 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar,
901 .cr = 1,
902 .mr = 25,
903 };
904 xnn_params.f32.avgpool = (struct avgpool_parameters) {
905 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
906 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
907 .mr = 9,
908 .qr = 8,
909 };
910 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
911 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
912 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
913 .mr = 9,
914 .qr = 8,
915 };
916 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
917 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
918 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
919 .mr = 7,
920 };
921 xnn_params.f32.maxpool = (struct maxpool_parameters) {
922 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
923 .mr = 9,
924 .qr = 8,
925 };
926 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
927 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
928 .mr = 4,
929 };
930 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
931 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
932 .mr = 9,
933 };
934 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
935 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
936 .mr = 9,
937 .qr = 8,
938 };
939 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
940 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
941 xnn_params.f32.prelu = (struct prelu_parameters) {
942 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel_x4__scalar,
943 .mr = 4,
944 };
945 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
946 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
947 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_x2,
948 .cr = 1,
949 .mr = 2,
950 };
951 xnn_params.f32.spmm = (struct spmm_parameters) {
952 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__scalar,
953 .mr = 4,
954 .nr = 1,
955 };
956
957 /**************************** X32 micro-kernels ****************************/
958 xnn_params.x32.pad = (struct pad_parameters) {
959 .ukernel = xnn_x32_pad_x2__scalar,
960 .mr = 2,
961 };
962 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
963 xnn_params.x32.zip = (struct zip_parameters) {
964 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
965 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
966 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
967 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
968 };
969
970#else
971 #error "Unsupported architecture"
972#endif
973 xnn_params.initialized = true;
974}
975
976enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -0700977 #ifndef __EMSCRIPTEN__
978 if (!cpuinfo_initialize()) {
979 return xnn_status_out_of_memory;
980 }
981 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700982 pthread_once(&init_guard, &init);
983 if (xnn_params.initialized) {
984 return xnn_status_success;
985 } else {
986 return xnn_status_unsupported_hardware;
987 }
988}
989
990enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -0700991 #ifndef __EMSCRIPTEN__
992 cpuinfo_deinitialize();
993 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700994 return xnn_status_success;
995}