blob: 86d43b9d5b9383f34fc68ceead54f4285a2fde65 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
22#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/conv.h>
25#include <xnnpack/dwconv.h>
26#include <xnnpack/gavgpool.h>
27#include <xnnpack/gemm.h>
28#include <xnnpack/hswish.h>
29#include <xnnpack/igemm.h>
30#include <xnnpack/log.h>
31#include <xnnpack/lut.h>
32#include <xnnpack/maxpool.h>
33#include <xnnpack/pad.h>
34#include <xnnpack/params.h>
35#include <xnnpack/pavgpool.h>
36#include <xnnpack/prelu.h>
37#include <xnnpack/rmax.h>
38#include <xnnpack/spmm.h>
39#include <xnnpack/unpool.h>
40#include <xnnpack/vadd.h>
41#include <xnnpack/vmulcaddc.h>
42#include <xnnpack/zip.h>
43
44#ifndef XNN_ENABLE_ASSEMBLY
45 #define XNN_ENABLE_ASSEMBLY 1
46#endif
47
48static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
49
50struct xnn_parameters xnn_params = {
51 .initialized = false
52};
53
Marat Dukhan1dadbf72019-10-01 10:46:20 -070054#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
56#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070057#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070058 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
59#endif
60
61static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070062#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 if (!cpuinfo_has_arm_neon()) {
64 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
65 return;
66 }
67
68 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070069 #ifndef XNN_NO_Q8_OPERATORS
70 xnn_params.q8.gemm = (struct gemm_parameters) {
71 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
72 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
73 .mr = 4,
74 .nr = 8,
75 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070076
Marat Dukhan8fe54e42019-10-10 14:12:59 -070077 #if XNN_ENABLE_ASSEMBLY
78 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
79 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
80 .cr = 8,
81 .mr = 9,
82 };
83 #else
84 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
85 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
86 .cr = 8,
87 .mr = 9,
88 };
89 #endif
90 xnn_params.q8.avgpool = (struct avgpool_parameters) {
91 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
92 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
93 .mr = 9,
94 .qr = 8,
95 };
96 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
97 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
98 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
99 .mr = 7,
100 };
101 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
102 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700103
104 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700105 #ifndef XNN_NO_U8_OPERATORS
106 xnn_params.u8.maxpool = (struct maxpool_parameters) {
107 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
108 .mr = 9,
109 .qr = 8,
110 };
111 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
112 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
113 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
114 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700115
116 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700117 #ifndef XNN_NO_X8_OPERATORS
118 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
119 xnn_params.x8.zip = (struct zip_parameters) {
120 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
121 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
122 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
123 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
124 };
125 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126
127 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700128 #ifndef XNN_NO_F32_OPERATORS
129 xnn_params.f32.gemm = (struct gemm_parameters) {
130 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
131 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
132 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
133 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
134 .mr = 4,
135 .nr = 8,
136 };
137 xnn_params.f32.gemm2 = (struct gemm_parameters) {
138 .gemm = NULL,
139 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
140 .mr = 4,
141 .nr = 2,
142 };
143 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
144 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
145 .cr = 4,
146 .mr = 4,
147 };
148 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
149 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
150 .cr = 4,
151 .mr = 9,
152 };
153 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
154 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
155 .cr = 4,
156 .mr = 25,
157 };
158 xnn_params.f32.avgpool = (struct avgpool_parameters) {
159 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
160 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
161 .mr = 9,
162 .qr = 8,
163 };
164 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
165 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
166 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
167 .mr = 9,
168 .qr = 8,
169 };
170 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
171 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
172 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
173 .mr = 7,
174 };
175 xnn_params.f32.maxpool = (struct maxpool_parameters) {
176 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
177 .mr = 9,
178 .qr = 8,
179 };
180 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
181 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
182 .mr = 4,
183 };
184 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
185 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
186 .mr = 9,
187 };
188 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
189 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
190 .mr = 9,
191 .qr = 8,
192 };
193 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
194 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
195 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800196 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
197 .row_tile = 2,
198 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700199 };
200 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
201 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800202 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
203 .channel_tile = 4,
204 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700205 };
206 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700207
208 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700209 #ifndef XNN_NO_X32_OPERATORS
210 xnn_params.x32.pad = (struct pad_parameters) {
211 .ukernel = xnn_x32_pad_x2__neon,
212 .mr = 2,
213 };
214 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
215 xnn_params.x32.zip = (struct zip_parameters) {
216 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
217 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
218 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
219 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
220 };
221 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700222
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700223#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700224
225 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700226 #ifndef XNN_NO_Q8_OPERATORS
227 xnn_params.q8.gemm = (struct gemm_parameters) {
228 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
229 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
230 .mr = 8,
231 .nr = 8,
232 };
233 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
234 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
235 .cr = 8,
236 .mr = 9,
237 };
238 xnn_params.q8.avgpool = (struct avgpool_parameters) {
239 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
240 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
241 .mr = 9,
242 .qr = 8,
243 };
244 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
245 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
246 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
247 .mr = 7,
248 };
249 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
250 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700251
252 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700253 #ifndef XNN_NO_U8_OPERATORS
254 xnn_params.u8.maxpool = (struct maxpool_parameters) {
255 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
256 .mr = 9,
257 .qr = 8,
258 };
259 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
260 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
261 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
262 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700263
264 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700265 #ifndef XNN_NO_X8_OPERATORS
266 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
267 xnn_params.x8.zip = (struct zip_parameters) {
268 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
269 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
270 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
271 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
272 };
273 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700274
275 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700276 #ifndef XNN_NO_F32_OPERATORS
277 #if XNN_ENABLE_ASSEMBLY
278 switch (cpuinfo_get_core(0)->uarch) {
279 case cpuinfo_uarch_kryo:
280 xnn_params.f32.gemm = (struct gemm_parameters) {
281 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
282 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
283 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
284 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
285 .mr = 4,
286 .nr = 8,
287 };
288 break;
289 case cpuinfo_uarch_cortex_a57:
290 xnn_params.f32.gemm = (struct gemm_parameters) {
291 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
292 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
293 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
294 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
295 .mr = 6,
296 .nr = 8,
297 };
298 break;
299 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700300 xnn_params.f32.gemm = (struct gemm_parameters) {
301 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
302 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
303 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
304 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
305 .mr = 4,
306 .nr = 8,
307 };
308 break;
309 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700310 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700311 case cpuinfo_uarch_mongoose_m1:
312 case cpuinfo_uarch_mongoose_m2:
313 case cpuinfo_uarch_meerkat_m3:
314 case (cpuinfo_uarch_meerkat_m3 + 1):
315 xnn_params.f32.gemm = (struct gemm_parameters) {
316 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
317 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
318 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
319 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
320 .mr = 6,
321 .nr = 8,
322 };
323 break;
324 case cpuinfo_uarch_cortex_a53:
325 case cpuinfo_uarch_cortex_a55:
326 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700327 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
328 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
329 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
330 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
331 .mr = 6,
332 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700333 };
334 break;
335 case cpuinfo_uarch_cortex_a73:
336 xnn_params.f32.gemm = (struct gemm_parameters) {
337 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
338 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
339 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
340 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
341 .mr = 6,
342 .nr = 8,
343 };
344 break;
345 default:
346 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700347 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
348 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700349 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
350 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700351 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700352 .nr = 8,
353 };
354 break;
355 }
356 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700357 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700358 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
359 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700360 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
361 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700362 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700363 .nr = 8,
364 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700365 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700366
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700367 xnn_params.f32.gemm2 = (struct gemm_parameters) {
368 .gemm = NULL,
369 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
370 .mr = 4,
371 .nr = 2,
372 };
373 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
374 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
375 .cr = 4,
376 .mr = 4,
377 };
378 switch (cpuinfo_get_core(0)->uarch) {
379 case cpuinfo_uarch_kryo:
380 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
381 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
382 .cr = 4,
383 .mr = 9,
384 };
385 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700386#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700387 case cpuinfo_uarch_cortex_a53:
388 case cpuinfo_uarch_cortex_a55:
389 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
390 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
391 .cr = 4,
392 .mr = 9,
393 };
394 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700395#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700396 default:
397 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
398 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
399 .cr = 8,
400 .mr = 9,
401 };
402 break;
403 }
404 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
405 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
406 .cr = 4,
407 .mr = 25,
408 };
409 xnn_params.f32.avgpool = (struct avgpool_parameters) {
410 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
411 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
412 .mr = 9,
413 .qr = 8,
414 };
415 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
416 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
417 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
418 .mr = 9,
419 .qr = 8,
420 };
421 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
422 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
423 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
424 .mr = 7,
425 };
426 xnn_params.f32.maxpool = (struct maxpool_parameters) {
427 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
428 .mr = 9,
429 .qr = 8,
430 };
431 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
432 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
433 .mr = 4,
434 };
435 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
436 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
437 .mr = 9,
438 };
439 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
440 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
441 .mr = 9,
442 .qr = 8,
443 };
444 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
445 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
446 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800447 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
448 .row_tile = 2,
449 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700450 };
451 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
452 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800453 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
454 .channel_tile = 4,
455 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700456 };
457 #ifndef XNN_NO_SPNCHW_OPERATORS
458 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700459 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700460 .mr = 16,
461 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700462 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700463 xnn_params.f32.spmm2 = (struct spmm_parameters) {
464 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
465 .mr = 16,
466 .nr = 2,
467 };
468 xnn_params.f32.spmm4 = (struct spmm_parameters) {
469 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
470 .mr = 16,
471 .nr = 4,
472 };
473 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
474 .ukernel_with_symm_padding =
475 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
476 .output_channel_tile = 4,
477 .output_height_tile = 2,
478 .output_width_tile = 2,
479 };
480 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
481 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
482 .input_width_tile = 4,
483 .output_width_tile = 4,
484 .output_height_tile = 3,
485 };
486 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
487 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
488 .input_width_tile = 4,
489 .output_width_tile = 4,
490 .output_height_tile = 1,
491 };
492 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
493 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
494 .channel_tile = 4,
495 };
496 #endif // XNN_NO_SPNCHW_OPERATORS
497 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700498
499 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700500 #ifndef XNN_NO_X32_OPERATORS
501 xnn_params.x32.pad = (struct pad_parameters) {
502 .ukernel = xnn_x32_pad_x2__neon,
503 .mr = 2,
504 };
505 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
506 xnn_params.x32.zip = (struct zip_parameters) {
507 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
508 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
509 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
510 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
511 };
512 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700513
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700514#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700515 if (!cpuinfo_has_x86_sse2()) {
516 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
517 return;
518 }
519
520 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700521 #ifndef XNN_NO_Q8_OPERATORS
522 xnn_params.q8.gemm = (struct gemm_parameters) {
523 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
524 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
525 .mr = 4,
526 .nr = 4,
527 .log2_kr = 1,
528 };
529 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
530 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
531 .cr = 8,
532 .mr = 9,
533 };
534 xnn_params.q8.avgpool = (struct avgpool_parameters) {
535 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
536 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
537 .mr = 9,
538 .qr = 8,
539 };
540 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
541 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
542 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
543 .mr = 7,
544 };
545 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
546 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700547
548 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700549 #ifndef XNN_NO_U8_OPERATORS
550 xnn_params.u8.maxpool = (struct maxpool_parameters) {
551 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
552 .mr = 9,
553 .qr = 8,
554 };
555 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
556 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
557 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
558 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700559
560 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700561 #ifndef XNN_NO_X8_OPERATORS
562 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
563 xnn_params.x8.zip = (struct zip_parameters) {
564 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
565 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
566 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
567 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
568 };
569 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570
571 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700572 #ifndef XNN_NO_F32_OPERATORS
573 xnn_params.f32.gemm = (struct gemm_parameters) {
574 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
575 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
576 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
577 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
578 .mr = 4,
579 .nr = 8,
580 };
581 xnn_params.f32.gemm2 = (struct gemm_parameters) {
582 .gemm = NULL,
583 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
584 .mr = 4,
585 .nr = 2,
586 .log2_kr = 2,
587 };
588 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800589 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
590 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700591 .mr = 4,
592 };
593 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800594 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
595 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700596 .mr = 9,
597 };
598 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800599 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
600 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700601 .mr = 25,
602 };
603 xnn_params.f32.avgpool = (struct avgpool_parameters) {
604 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
605 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
606 .mr = 9,
607 .qr = 8,
608 };
609 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
610 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
611 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
612 .mr = 9,
613 .qr = 8,
614 };
615 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
616 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
617 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
618 .mr = 7,
619 };
620 xnn_params.f32.maxpool = (struct maxpool_parameters) {
621 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
622 .mr = 9,
623 .qr = 8,
624 };
625 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
626 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
627 .mr = 4,
628 };
629 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
630 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
631 .mr = 9,
632 };
633 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
634 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
635 .mr = 9,
636 .qr = 8,
637 };
638 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
639 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
640 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800641 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
642 .row_tile = 2,
643 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700644 };
645 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
646 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800647 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
648 .channel_tile = 4,
649 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700650 };
651 #ifndef XNN_NO_SPNCHW_OPERATORS
652 xnn_params.f32.spmm = (struct spmm_parameters) {
653 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
654 .mr = 4,
655 .nr = 1,
656 };
657 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
658 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
659 .input_width_tile = 4,
660 .output_width_tile = 4,
661 .output_height_tile = 1,
662 };
663 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
664 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
665 .input_width_tile = 4,
666 .output_width_tile = 4,
667 .output_height_tile = 1,
668 };
669 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
670 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
671 .channel_tile = 4,
672 };
673 #endif // XNN_NO_SPNCHW_OPERATORS
674 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700675
676 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700677 #ifndef XNN_NO_X32_OPERATORS
678 xnn_params.x32.pad = (struct pad_parameters) {
679 .ukernel = xnn_x32_pad_x2__sse2,
680 .mr = 2,
681 };
682 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
683 xnn_params.x32.zip = (struct zip_parameters) {
684 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
685 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
686 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
687 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
688 };
689 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700690
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700691#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700692 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
693 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
694 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
695 // of two infinities (must produce NaN per IEEE 754 standard).
696 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
697 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
698
XNNPACK Teamb455b122019-09-27 18:10:33 -0700699 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700700 #ifndef XNN_NO_Q8_OPERATORS
701 xnn_params.q8.gemm = (struct gemm_parameters) {
702 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
703 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
704 .mr = 2,
705 .nr = 2,
706 };
707 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
708 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
709 .cr = 1,
710 .mr = 9,
711 };
712 xnn_params.q8.avgpool = (struct avgpool_parameters) {
713 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
714 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
715 .mr = 9,
716 .qr = 8,
717 };
718 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
719 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
720 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
721 .mr = 7,
722 };
723 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
724 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700725
726 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700727 #ifndef XNN_NO_U8_OPERATORS
728 xnn_params.u8.maxpool = (struct maxpool_parameters) {
729 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
730 .mr = 9,
731 .qr = 8,
732 };
733 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
734 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
735 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
736 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700737
738 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700739 #ifndef XNN_NO_X8_OPERATORS
740 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
741 xnn_params.x8.zip = (struct zip_parameters) {
742 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
743 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
744 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
745 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
746 };
747 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700748
749 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700750 #ifndef XNN_NO_F32_OPERATORS
751 if (is_wasm_x86) {
752 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700753 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
754 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
755 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
756 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700757 .mr = 4,
758 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700759 };
760 } else {
761 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700762 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
763 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
764 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
765 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700766 .mr = 6,
767 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700768 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700769 };
770 }
771 xnn_params.f32.gemm2 = (struct gemm_parameters) {
772 .gemm = NULL,
773 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700774 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700775 .nr = 2,
776 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700777 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700778 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800779 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700780 .cr = 4,
781 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700782 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700783 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800784 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700785 .cr = 4,
786 .mr = 9,
787 };
788 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800789 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700790 .cr = 4,
791 .mr = 25,
792 };
793 xnn_params.f32.avgpool = (struct avgpool_parameters) {
794 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
795 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
796 .mr = 9,
797 .qr = 8,
798 };
799 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
800 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
801 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
802 .mr = 9,
803 .qr = 8,
804 };
805 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
806 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
807 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
808 .mr = 7,
809 };
810 xnn_params.f32.maxpool = (struct maxpool_parameters) {
811 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
812 .mr = 9,
813 .qr = 8,
814 };
815 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
816 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
817 .mr = 4,
818 };
819 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
820 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
821 .mr = 9,
822 };
823 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
824 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
825 .mr = 9,
826 .qr = 8,
827 };
828 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
829 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
830 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800831 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
832 .row_tile = 2,
833 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700834 };
835 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
836 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800837 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
838 .channel_tile = 4,
839 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700840 };
841 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700842
843 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700844 #ifndef XNN_NO_X32_OPERATORS
845 xnn_params.x32.pad = (struct pad_parameters) {
846 .ukernel = xnn_x32_pad_x2__psimd,
847 .mr = 2,
848 };
849 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
850 xnn_params.x32.zip = (struct zip_parameters) {
851 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
852 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
853 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
854 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
855 };
856 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700857
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700858#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700859 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
860 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
861 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
862 // of two infinities (must produce NaN per IEEE 754 standard).
863 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
864 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
865
866 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700867 #ifndef XNN_NO_Q8_OPERATORS
868 xnn_params.q8.gemm = (struct gemm_parameters) {
869 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
870 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
871 .mr = 2,
872 .nr = 2,
873 };
874 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
875 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
876 .cr = 1,
877 .mr = 9,
878 };
879 xnn_params.q8.avgpool = (struct avgpool_parameters) {
880 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
881 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
882 .mr = 9,
883 .qr = 8,
884 };
885 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
886 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
887 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
888 .mr = 7,
889 };
890 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
891 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700892
893 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700894 #ifndef XNN_NO_U8_OPERATORS
895 xnn_params.u8.maxpool = (struct maxpool_parameters) {
896 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
897 .mr = 9,
898 .qr = 8,
899 };
900 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
901 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
902 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
903 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700904
905 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700906 #ifndef XNN_NO_X8_OPERATORS
907 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
908 xnn_params.x8.zip = (struct zip_parameters) {
909 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
910 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
911 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
912 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
913 };
914 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700915
916 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700917 #ifndef XNN_NO_F32_OPERATORS
918 if (is_wasm_x86) {
919 xnn_params.f32.gemm = (struct gemm_parameters) {
920 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
921 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
922 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
923 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
924 .mr = 2,
925 .nr = 4,
926 };
927 } else {
928 xnn_params.f32.gemm = (struct gemm_parameters) {
929 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
930 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
931 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
932 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
933 .mr = 4,
934 .nr = 4,
935 };
936 }
937 xnn_params.f32.gemm2 = (struct gemm_parameters) {
938 .gemm = NULL,
939 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700940 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700941 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700942 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700943 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800944 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700945 .cr = 1,
946 .mr = 4,
947 };
948 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800949 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700950 .cr = 1,
951 .mr = 9,
952 };
953 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800954 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700955 .cr = 1,
956 .mr = 25,
957 };
958 xnn_params.f32.avgpool = (struct avgpool_parameters) {
959 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
960 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
961 .mr = 9,
962 .qr = 8,
963 };
964 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
965 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
966 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
967 .mr = 9,
968 .qr = 8,
969 };
970 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
971 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
972 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
973 .mr = 7,
974 };
975 xnn_params.f32.maxpool = (struct maxpool_parameters) {
976 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
977 .mr = 9,
978 .qr = 8,
979 };
980 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
981 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
982 .mr = 4,
983 };
984 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
985 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
986 .mr = 9,
987 };
988 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
989 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
990 .mr = 9,
991 .qr = 8,
992 };
993 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
994 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
995 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800996 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
997 .row_tile = 4,
998 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700999 };
1000 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
1001 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001002 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1003 .channel_tile = 1,
1004 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001005 };
1006 #ifndef XNN_NO_SPNCHW_OPERATORS
1007 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001008 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1009 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001010 .nr = 1,
1011 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001012 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1013 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1014 .mr = 8,
1015 .nr = 2,
1016 };
1017 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1018 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1019 .mr = 8,
1020 .nr = 4,
1021 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001022 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1023 .ukernel_with_symm_padding =
1024 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1025 .output_channel_tile = 4,
1026 .output_height_tile = 1,
1027 .output_width_tile = 1,
1028 };
1029 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1030 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1031 .input_width_tile = 1,
1032 .output_width_tile = 1,
1033 .output_height_tile = 1,
1034 };
1035 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1036 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1037 .input_width_tile = 1,
1038 .output_width_tile = 1,
1039 .output_height_tile = 1,
1040 };
1041 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1042 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1043 .channel_tile = 1,
1044 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001045 #endif // XNN_NO_SPNCHW_OPERATORS
1046 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001047
1048 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001049 #ifndef XNN_NO_X32_OPERATORS
1050 xnn_params.x32.pad = (struct pad_parameters) {
1051 .ukernel = xnn_x32_pad_x2__scalar,
1052 .mr = 2,
1053 };
1054 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1055 xnn_params.x32.zip = (struct zip_parameters) {
1056 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1057 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1058 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1059 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1060 };
1061 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001062
1063#else
1064 #error "Unsupported architecture"
1065#endif
1066 xnn_params.initialized = true;
1067}
1068
1069enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001070 #ifndef __EMSCRIPTEN__
1071 if (!cpuinfo_initialize()) {
1072 return xnn_status_out_of_memory;
1073 }
1074 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001075 pthread_once(&init_guard, &init);
1076 if (xnn_params.initialized) {
1077 return xnn_status_success;
1078 } else {
1079 return xnn_status_unsupported_hardware;
1080 }
1081}
1082
1083enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001084 #ifndef __EMSCRIPTEN__
1085 cpuinfo_deinitialize();
1086 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001087 return xnn_status_success;
1088}