blob: c0a5208151c39b8829fa034f8f10c7024cdfaf2e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include <pthread.h>
14
Marat Dukhand343c222019-10-07 09:22:14 -070015#ifndef __EMSCRIPTEN__
16 #include <cpuinfo.h>
17#endif
XNNPACK Teamb455b122019-09-27 18:10:33 -070018
19#include <xnnpack.h>
20#include <xnnpack/argmaxpool.h>
21#include <xnnpack/avgpool.h>
Marat Dukhan69722492019-11-11 19:55:50 -080022#include <xnnpack/bilinear.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/clamp.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070024#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/conv.h>
26#include <xnnpack/dwconv.h>
27#include <xnnpack/gavgpool.h>
28#include <xnnpack/gemm.h>
29#include <xnnpack/hswish.h>
30#include <xnnpack/igemm.h>
31#include <xnnpack/log.h>
32#include <xnnpack/lut.h>
33#include <xnnpack/maxpool.h>
34#include <xnnpack/pad.h>
35#include <xnnpack/params.h>
36#include <xnnpack/pavgpool.h>
37#include <xnnpack/prelu.h>
38#include <xnnpack/rmax.h>
39#include <xnnpack/spmm.h>
40#include <xnnpack/unpool.h>
41#include <xnnpack/vadd.h>
42#include <xnnpack/vmulcaddc.h>
43#include <xnnpack/zip.h>
44
45#ifndef XNN_ENABLE_ASSEMBLY
46 #define XNN_ENABLE_ASSEMBLY 1
47#endif
48
49static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
50
51struct xnn_parameters xnn_params = {
52 .initialized = false
53};
54
Marat Dukhan1dadbf72019-10-01 10:46:20 -070055#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070056 extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
57#endif
Marat Dukhan1dadbf72019-10-01 10:46:20 -070058#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -070059 extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
60#endif
61
62static void init(void) {
Marat Dukhan1dadbf72019-10-01 10:46:20 -070063#if XNN_ARCH_ARM
XNNPACK Teamb455b122019-09-27 18:10:33 -070064 if (!cpuinfo_has_arm_neon()) {
65 xnn_log_error("XNNPACK initialization failed: NEON is not supported");
66 return;
67 }
68
69 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -070070 #ifndef XNN_NO_Q8_OPERATORS
71 xnn_params.q8.gemm = (struct gemm_parameters) {
72 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon,
73 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x8__neon,
74 .mr = 4,
75 .nr = 8,
76 };
XNNPACK Teamb455b122019-09-27 18:10:33 -070077
Marat Dukhan8fe54e42019-10-10 14:12:59 -070078 #if XNN_ENABLE_ASSEMBLY
79 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
80 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__aarch32_neon,
81 .cr = 8,
82 .mr = 9,
83 };
84 #else
85 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
86 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
87 .cr = 8,
88 .mr = 9,
89 };
90 #endif
91 xnn_params.q8.avgpool = (struct avgpool_parameters) {
92 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
93 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
94 .mr = 9,
95 .qr = 8,
96 };
97 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
98 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
99 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
100 .mr = 7,
101 };
102 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
103 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700104
105 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700106 #ifndef XNN_NO_U8_OPERATORS
107 xnn_params.u8.maxpool = (struct maxpool_parameters) {
108 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
109 .mr = 9,
110 .qr = 8,
111 };
112 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
113 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
114 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
115 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700116
117 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700118 #ifndef XNN_NO_X8_OPERATORS
119 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
120 xnn_params.x8.zip = (struct zip_parameters) {
121 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
122 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
123 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
124 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
125 };
126 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700127
128 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700129 #ifndef XNN_NO_F32_OPERATORS
130 xnn_params.f32.gemm = (struct gemm_parameters) {
131 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_ld128,
132 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_ld128,
133 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_ld64,
134 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_ld64,
135 .mr = 4,
136 .nr = 8,
137 };
138 xnn_params.f32.gemm2 = (struct gemm_parameters) {
139 .gemm = NULL,
140 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neon_ld64,
141 .mr = 4,
142 .nr = 2,
143 };
144 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
145 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
146 .cr = 4,
147 .mr = 4,
148 };
149 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
150 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neon,
151 .cr = 4,
152 .mr = 9,
153 };
154 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
155 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
156 .cr = 4,
157 .mr = 25,
158 };
159 xnn_params.f32.avgpool = (struct avgpool_parameters) {
160 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
161 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
162 .mr = 9,
163 .qr = 8,
164 };
165 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
166 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
167 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
168 .mr = 9,
169 .qr = 8,
170 };
171 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
172 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
173 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
174 .mr = 7,
175 };
176 xnn_params.f32.maxpool = (struct maxpool_parameters) {
177 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
178 .mr = 9,
179 .qr = 8,
180 };
181 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
182 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
183 .mr = 4,
184 };
185 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
186 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
187 .mr = 9,
188 };
189 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
190 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
191 .mr = 9,
192 .qr = 8,
193 };
Marat Dukhan69722492019-11-11 19:55:50 -0800194 xnn_params.f32.bilinear = (struct bilinear_parameters) {
195 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
196 .pixel_tile = 1,
197 .channel_tile = 8,
198 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700199 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
200 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neon;
201 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800202 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
203 .row_tile = 2,
204 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700205 };
206 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
207 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800208 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
209 .channel_tile = 4,
210 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700211 };
212 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700213
214 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700215 #ifndef XNN_NO_X32_OPERATORS
216 xnn_params.x32.pad = (struct pad_parameters) {
217 .ukernel = xnn_x32_pad_x2__neon,
218 .mr = 2,
219 };
220 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
221 xnn_params.x32.zip = (struct zip_parameters) {
222 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
223 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
224 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
225 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
226 };
227 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700228
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700229#elif XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700230
231 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700232 #ifndef XNN_NO_Q8_OPERATORS
233 xnn_params.q8.gemm = (struct gemm_parameters) {
234 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_8x8__neon,
235 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_8x8__neon,
236 .mr = 8,
237 .nr = 8,
238 };
239 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
240 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__neon,
241 .cr = 8,
242 .mr = 9,
243 };
244 xnn_params.q8.avgpool = (struct avgpool_parameters) {
245 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__neon,
246 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__neon,
247 .mr = 9,
248 .qr = 8,
249 };
250 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
251 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__neon,
252 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__neon,
253 .mr = 7,
254 };
255 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__neon;
256 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700257
258 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700259 #ifndef XNN_NO_U8_OPERATORS
260 xnn_params.u8.maxpool = (struct maxpool_parameters) {
261 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__neon,
262 .mr = 9,
263 .qr = 8,
264 };
265 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__neon;
266 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
267 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__neon;
268 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700269
270 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700271 #ifndef XNN_NO_X8_OPERATORS
272 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
273 xnn_params.x8.zip = (struct zip_parameters) {
274 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__neon,
275 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__neon,
276 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__neon,
277 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__neon,
278 };
279 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700280
281 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700282 #ifndef XNN_NO_F32_OPERATORS
283 #if XNN_ENABLE_ASSEMBLY
284 switch (cpuinfo_get_core(0)->uarch) {
285 case cpuinfo_uarch_kryo:
286 xnn_params.f32.gemm = (struct gemm_parameters) {
287 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57,
288 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
289 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
290 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
291 .mr = 4,
292 .nr = 8,
293 };
294 break;
295 case cpuinfo_uarch_cortex_a57:
296 xnn_params.f32.gemm = (struct gemm_parameters) {
297 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
298 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57,
299 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
300 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57,
301 .mr = 6,
302 .nr = 8,
303 };
304 break;
305 case cpuinfo_uarch_cortex_a72:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700306 xnn_params.f32.gemm = (struct gemm_parameters) {
307 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
308 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
309 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
310 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
311 .mr = 4,
312 .nr = 8,
313 };
314 break;
315 case cpuinfo_uarch_cortex_a75:
Frank Barchard263bb092019-10-28 15:28:46 -0700316 case cpuinfo_uarch_cortex_a76:
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700317 case cpuinfo_uarch_mongoose_m1:
318 case cpuinfo_uarch_mongoose_m2:
319 case cpuinfo_uarch_meerkat_m3:
320 case (cpuinfo_uarch_meerkat_m3 + 1):
321 xnn_params.f32.gemm = (struct gemm_parameters) {
322 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
323 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
324 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
325 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
326 .mr = 6,
327 .nr = 8,
328 };
329 break;
330 case cpuinfo_uarch_cortex_a53:
331 case cpuinfo_uarch_cortex_a55:
332 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchardbd1d5d92019-10-30 15:53:30 -0700333 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
334 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a53,
335 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
336 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a53,
337 .mr = 6,
338 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700339 };
340 break;
341 case cpuinfo_uarch_cortex_a73:
342 xnn_params.f32.gemm = (struct gemm_parameters) {
343 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
344 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73,
345 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
346 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
347 .mr = 6,
348 .nr = 8,
349 };
350 break;
351 default:
352 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700353 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
354 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700355 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
356 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75,
Frank Barchard2af471b2019-10-16 19:10:32 -0700357 .mr = 6,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700358 .nr = 8,
359 };
360 break;
361 }
362 #else // XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700363 xnn_params.f32.gemm = (struct gemm_parameters) {
Frank Barchard2af471b2019-10-16 19:10:32 -0700364 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__neonfma_ld64,
365 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__neonfma_ld64,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700366 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neonfma_ld64,
367 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neonfma_ld64,
Frank Barchard2af471b2019-10-16 19:10:32 -0700368 .mr = 6,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700369 .nr = 8,
370 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700371 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -0700372
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700373 xnn_params.f32.gemm2 = (struct gemm_parameters) {
374 .gemm = NULL,
375 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__neonfma_ld64,
376 .mr = 4,
377 .nr = 2,
378 };
379 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
380 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd,
381 .cr = 4,
382 .mr = 4,
383 };
384 switch (cpuinfo_get_core(0)->uarch) {
385 case cpuinfo_uarch_kryo:
386 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
387 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__neonfma,
388 .cr = 4,
389 .mr = 9,
390 };
391 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700392#if XNN_ENABLE_ASSEMBLY
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700393 case cpuinfo_uarch_cortex_a53:
394 case cpuinfo_uarch_cortex_a55:
395 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
396 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
397 .cr = 4,
398 .mr = 9,
399 };
400 break;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700401#endif
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700402 default:
403 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
404 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__neonfma,
405 .cr = 8,
406 .mr = 9,
407 };
408 break;
409 }
410 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
411 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd,
412 .cr = 4,
413 .mr = 25,
414 };
415 xnn_params.f32.avgpool = (struct avgpool_parameters) {
416 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__neon,
417 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__neon,
418 .mr = 9,
419 .qr = 8,
420 };
421 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
422 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__neon,
423 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__neon,
424 .mr = 9,
425 .qr = 8,
426 };
427 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
428 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__neon,
429 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__neon,
430 .mr = 7,
431 };
432 xnn_params.f32.maxpool = (struct maxpool_parameters) {
433 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
434 .mr = 9,
435 .qr = 8,
436 };
437 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
438 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
439 .mr = 4,
440 };
441 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
442 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
443 .mr = 9,
444 };
445 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
446 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
447 .mr = 9,
448 .qr = 8,
449 };
Marat Dukhan69722492019-11-11 19:55:50 -0800450 xnn_params.f32.bilinear = (struct bilinear_parameters) {
451 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
452 .pixel_tile = 1,
453 .channel_tile = 8,
454 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700455 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__neon;
456 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__neonfma;
457 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800458 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__neon_2x8,
459 .row_tile = 2,
460 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700461 };
462 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
463 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800464 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
465 .channel_tile = 4,
466 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700467 };
468 #ifndef XNN_NO_SPNCHW_OPERATORS
469 xnn_params.f32.spmm = (struct spmm_parameters) {
Erich Elsen9cdade32019-10-16 05:26:59 -0700470 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700471 .mr = 16,
472 .nr = 1,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700473 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700474 xnn_params.f32.spmm2 = (struct spmm_parameters) {
475 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x2__neonfma,
476 .mr = 16,
477 .nr = 2,
478 };
479 xnn_params.f32.spmm4 = (struct spmm_parameters) {
480 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x4__neonfma,
481 .mr = 16,
482 .nr = 4,
483 };
484 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
485 .ukernel_with_symm_padding =
486 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
487 .output_channel_tile = 4,
488 .output_height_tile = 2,
489 .output_width_tile = 2,
490 };
491 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
492 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
493 .input_width_tile = 4,
494 .output_width_tile = 4,
495 .output_height_tile = 3,
496 };
497 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
498 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
499 .input_width_tile = 4,
500 .output_width_tile = 4,
501 .output_height_tile = 1,
502 };
503 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
504 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
505 .channel_tile = 4,
506 };
507 #endif // XNN_NO_SPNCHW_OPERATORS
508 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700509
510 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700511 #ifndef XNN_NO_X32_OPERATORS
512 xnn_params.x32.pad = (struct pad_parameters) {
513 .ukernel = xnn_x32_pad_x2__neon,
514 .mr = 2,
515 };
516 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
517 xnn_params.x32.zip = (struct zip_parameters) {
518 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__neon,
519 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__neon,
520 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__neon,
521 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
522 };
523 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700524
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700525#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700526 if (!cpuinfo_has_x86_sse2()) {
527 xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
528 return;
529 }
530
531 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700532 #ifndef XNN_NO_Q8_OPERATORS
533 xnn_params.q8.gemm = (struct gemm_parameters) {
534 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x4c2__sse2,
535 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_4x4c2__sse2,
536 .mr = 4,
537 .nr = 4,
538 .log2_kr = 1,
539 };
540 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
541 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up8x9__sse2,
542 .cr = 8,
543 .mr = 9,
544 };
545 xnn_params.q8.avgpool = (struct avgpool_parameters) {
546 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__sse2,
547 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__sse2,
548 .mr = 9,
549 .qr = 8,
550 };
551 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
552 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__sse2,
553 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__sse2,
554 .mr = 7,
555 };
556 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__sse2;
557 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700558
559 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700560 #ifndef XNN_NO_U8_OPERATORS
561 xnn_params.u8.maxpool = (struct maxpool_parameters) {
562 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__sse2,
563 .mr = 9,
564 .qr = 8,
565 };
566 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__sse2;
567 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
568 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__sse2;
569 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570
571 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700572 #ifndef XNN_NO_X8_OPERATORS
573 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
574 xnn_params.x8.zip = (struct zip_parameters) {
575 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__sse2,
576 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__sse2,
577 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__sse2,
578 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__sse2,
579 };
580 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700581
582 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700583 #ifndef XNN_NO_F32_OPERATORS
584 xnn_params.f32.gemm = (struct gemm_parameters) {
585 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__sse_load1,
586 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__sse_load1,
587 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__sse_load1,
588 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__sse_load1,
589 .mr = 4,
590 .nr = 8,
591 };
592 xnn_params.f32.gemm2 = (struct gemm_parameters) {
593 .gemm = NULL,
594 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__sse,
595 .mr = 4,
596 .nr = 2,
597 .log2_kr = 2,
598 };
599 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800600 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x4__sse,
601 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700602 .mr = 4,
603 };
604 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800605 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x9__sse,
606 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700607 .mr = 9,
608 };
609 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800610 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up8x25__sse,
611 .cr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700612 .mr = 25,
613 };
614 xnn_params.f32.avgpool = (struct avgpool_parameters) {
615 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__sse,
616 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__sse,
617 .mr = 9,
618 .qr = 8,
619 };
620 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
621 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__sse,
622 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__sse,
623 .mr = 9,
624 .qr = 8,
625 };
626 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
627 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__sse,
628 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__sse,
629 .mr = 7,
630 };
631 xnn_params.f32.maxpool = (struct maxpool_parameters) {
632 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__sse,
633 .mr = 9,
634 .qr = 8,
635 };
636 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
637 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__sse2,
638 .mr = 4,
639 };
640 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
641 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__sse2,
642 .mr = 9,
643 };
644 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
645 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__sse2,
646 .mr = 9,
647 .qr = 8,
648 };
Marat Dukhan69722492019-11-11 19:55:50 -0800649 xnn_params.f32.bilinear = (struct bilinear_parameters) {
650 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
651 .pixel_tile = 1,
652 .channel_tile = 8,
653 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700654 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__sse;
655 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__sse;
656 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800657 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__sse2_2x8,
658 .row_tile = 2,
659 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700660 };
661 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__sse;
662 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800663 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
664 .channel_tile = 4,
665 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700666 };
667 #ifndef XNN_NO_SPNCHW_OPERATORS
668 xnn_params.f32.spmm = (struct spmm_parameters) {
669 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
670 .mr = 4,
671 .nr = 1,
672 };
673 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
674 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
675 .input_width_tile = 4,
676 .output_width_tile = 4,
677 .output_height_tile = 1,
678 };
679 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
680 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
681 .input_width_tile = 4,
682 .output_width_tile = 4,
683 .output_height_tile = 1,
684 };
685 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
686 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
687 .channel_tile = 4,
688 };
689 #endif // XNN_NO_SPNCHW_OPERATORS
690 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700691
692 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700693 #ifndef XNN_NO_X32_OPERATORS
694 xnn_params.x32.pad = (struct pad_parameters) {
695 .ukernel = xnn_x32_pad_x2__sse2,
696 .mr = 2,
697 };
698 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
699 xnn_params.x32.zip = (struct zip_parameters) {
700 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__sse2,
701 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__sse2,
702 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__sse2,
703 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
704 };
705 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700706
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700707#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
Marat Dukhan466b5232019-10-09 11:22:20 -0700708 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
709 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
710 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
711 // of two infinities (must produce NaN per IEEE 754 standard).
712 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
713 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
714
XNNPACK Teamb455b122019-09-27 18:10:33 -0700715 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700716 #ifndef XNN_NO_Q8_OPERATORS
717 xnn_params.q8.gemm = (struct gemm_parameters) {
718 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
719 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
720 .mr = 2,
721 .nr = 2,
722 };
723 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
724 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
725 .cr = 1,
726 .mr = 9,
727 };
728 xnn_params.q8.avgpool = (struct avgpool_parameters) {
729 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
730 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
731 .mr = 9,
732 .qr = 8,
733 };
734 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
735 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
736 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
737 .mr = 7,
738 };
739 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
740 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700741
742 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700743 #ifndef XNN_NO_U8_OPERATORS
744 xnn_params.u8.maxpool = (struct maxpool_parameters) {
745 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
746 .mr = 9,
747 .qr = 8,
748 };
749 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
750 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
751 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
752 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700753
754 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700755 #ifndef XNN_NO_X8_OPERATORS
756 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
757 xnn_params.x8.zip = (struct zip_parameters) {
758 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
759 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
760 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
761 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
762 };
763 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700764
765 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700766 #ifndef XNN_NO_F32_OPERATORS
767 if (is_wasm_x86) {
768 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancb801972019-10-23 02:10:33 -0700769 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__psimd_splat,
770 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__psimd_splat,
771 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__psimd_splat,
772 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__psimd_splat,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700773 .mr = 4,
774 .nr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700775 };
776 } else {
777 xnn_params.f32.gemm = (struct gemm_parameters) {
Marat Dukhancd945c62019-10-25 11:59:50 -0700778 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__psimd,
779 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__psimd,
780 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
781 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8s4__psimd,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700782 .mr = 6,
783 .nr = 8,
Marat Dukhancd945c62019-10-25 11:59:50 -0700784 .log2_sr = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700785 };
786 }
787 xnn_params.f32.gemm2 = (struct gemm_parameters) {
788 .gemm = NULL,
789 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2c4__psimd,
Marat Dukhan466b5232019-10-09 11:22:20 -0700790 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700791 .nr = 2,
792 .log2_kr = 2,
Marat Dukhan466b5232019-10-09 11:22:20 -0700793 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700794 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800795 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x4__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700796 .cr = 4,
797 .mr = 4,
Marat Dukhan466b5232019-10-09 11:22:20 -0700798 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700799 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800800 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700801 .cr = 4,
802 .mr = 9,
803 };
804 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800805 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up4x25__psimd_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700806 .cr = 4,
807 .mr = 25,
808 };
809 xnn_params.f32.avgpool = (struct avgpool_parameters) {
810 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__psimd,
811 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__psimd,
812 .mr = 9,
813 .qr = 8,
814 };
815 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
816 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__psimd,
817 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__psimd,
818 .mr = 9,
819 .qr = 8,
820 };
821 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
822 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__psimd,
823 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__psimd,
824 .mr = 7,
825 };
826 xnn_params.f32.maxpool = (struct maxpool_parameters) {
827 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__psimd,
828 .mr = 9,
829 .qr = 8,
830 };
831 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
832 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__psimd,
833 .mr = 4,
834 };
835 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
836 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__psimd,
837 .mr = 9,
838 };
839 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
840 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__psimd,
841 .mr = 9,
842 .qr = 8,
843 };
Marat Dukhan69722492019-11-11 19:55:50 -0800844 xnn_params.f32.bilinear = (struct bilinear_parameters) {
845 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
846 .pixel_tile = 1,
847 .channel_tile = 8,
848 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700849 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__psimd;
850 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__psimd;
851 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -0800852 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__psimd_2x8,
853 .row_tile = 2,
854 .channel_tile = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700855 };
856 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__psimd;
857 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -0800858 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
859 .channel_tile = 4,
860 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700861 };
862 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700863
864 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700865 #ifndef XNN_NO_X32_OPERATORS
866 xnn_params.x32.pad = (struct pad_parameters) {
867 .ukernel = xnn_x32_pad_x2__psimd,
868 .mr = 2,
869 };
870 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__psimd;
871 xnn_params.x32.zip = (struct zip_parameters) {
872 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__psimd,
873 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__psimd,
874 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__psimd,
875 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
876 };
877 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700878
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700879#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700880 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
881 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
882 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
883 // of two infinities (must produce NaN per IEEE 754 standard).
884 static volatile uint32_t minus_inf = UINT32_C(0xFF800000);
885 const bool is_wasm_x86 = (int32_t) xnn_stub_wasm_f32_sub(minus_inf, minus_inf) < 0;
886
887 /**************************** Q8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700888 #ifndef XNN_NO_Q8_OPERATORS
889 xnn_params.q8.gemm = (struct gemm_parameters) {
890 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
891 .igemm = (xnn_igemm_ukernel_function) xnn_q8_igemm_ukernel_2x2__scalar,
892 .mr = 2,
893 .nr = 2,
894 };
895 xnn_params.q8.dwconv[0] = (struct dwconv_parameters) {
896 .up = (xnn_dwconv_up_ukernel_function) xnn_q8_dwconv_ukernel_up1x9__scalar,
897 .cr = 1,
898 .mr = 9,
899 };
900 xnn_params.q8.avgpool = (struct avgpool_parameters) {
901 .up = (xnn_avgpool_up_ukernel_function) xnn_q8_avgpool_ukernel_up9__scalar,
902 .mp = (xnn_avgpool_mp_ukernel_function) xnn_q8_avgpool_ukernel_mp9p8q__scalar,
903 .mr = 9,
904 .qr = 8,
905 };
906 xnn_params.q8.gavgpool = (struct gavgpool_parameters) {
907 .up = (xnn_gavgpool_up_ukernel_function) xnn_q8_gavgpool_ukernel_up7__scalar,
908 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_q8_gavgpool_ukernel_mp7p7q__scalar,
909 .mr = 7,
910 };
911 xnn_params.q8.vadd = (xnn_vadd_ukernel_function) xnn_q8_vadd_ukernel__scalar;
912 #endif // XNN_NO_Q8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700913
914 /**************************** U8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700915 #ifndef XNN_NO_U8_OPERATORS
916 xnn_params.u8.maxpool = (struct maxpool_parameters) {
917 .ukernel = (xnn_maxpool_ukernel_function) xnn_u8_maxpool_ukernel_9p8q__scalar,
918 .mr = 9,
919 .qr = 8,
920 };
921 xnn_params.u8.clamp = (xnn_univector_ukernel_function) xnn_u8_clamp_ukernel__scalar;
922 xnn_params.u8.lut32norm = xnn_u8_lut32norm_ukernel__scalar;
923 xnn_params.u8.rmax = xnn_u8_rmax_ukernel__scalar;
924 #endif // XNN_NO_U8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700925
926 /**************************** X8 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700927 #ifndef XNN_NO_X8_OPERATORS
928 xnn_params.x8.lut = xnn_x8_lut_ukernel__scalar;
929 xnn_params.x8.zip = (struct zip_parameters) {
930 .x2 = (xnn_zipc_ukernel_function) xnn_x8_zip_x2_ukernel__scalar,
931 .x3 = (xnn_zipc_ukernel_function) xnn_x8_zip_x3_ukernel__scalar,
932 .x4 = (xnn_zipc_ukernel_function) xnn_x8_zip_x4_ukernel__scalar,
933 .xm = (xnn_zipv_ukernel_function) xnn_x8_zip_xm_ukernel__scalar,
934 };
935 #endif // XNN_NO_X8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700936
937 /**************************** F32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700938 #ifndef XNN_NO_F32_OPERATORS
939 if (is_wasm_x86) {
940 xnn_params.f32.gemm = (struct gemm_parameters) {
941 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_2x4__scalar,
942 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_2x4__scalar,
943 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
944 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
945 .mr = 2,
946 .nr = 4,
947 };
948 } else {
949 xnn_params.f32.gemm = (struct gemm_parameters) {
950 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x4__scalar,
951 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x4__scalar,
952 .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x4__scalar,
953 .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x4__scalar,
954 .mr = 4,
955 .nr = 4,
956 };
957 }
958 xnn_params.f32.gemm2 = (struct gemm_parameters) {
959 .gemm = NULL,
960 .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x2__scalar,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700961 .mr = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700962 .nr = 2,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700963 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700964 xnn_params.f32.dwconv[0] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800965 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x4__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700966 .cr = 1,
967 .mr = 4,
968 };
969 xnn_params.f32.dwconv[1] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800970 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700971 .cr = 1,
972 .mr = 9,
973 };
974 xnn_params.f32.dwconv[2] = (struct dwconv_parameters) {
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800975 .up = (xnn_dwconv_up_ukernel_function) xnn_f32_dwconv_ukernel_up1x25__scalar_acc2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -0700976 .cr = 1,
977 .mr = 25,
978 };
979 xnn_params.f32.avgpool = (struct avgpool_parameters) {
980 .up = (xnn_avgpool_up_ukernel_function) xnn_f32_avgpool_ukernel_up9__scalar,
981 .mp = (xnn_avgpool_mp_ukernel_function) xnn_f32_avgpool_ukernel_mp9p8q__scalar,
982 .mr = 9,
983 .qr = 8,
984 };
985 xnn_params.f32.pavgpool = (struct pavgpool_parameters) {
986 .up = (xnn_pavgpool_up_ukernel_function) xnn_f32_pavgpool_ukernel_up9__scalar,
987 .mp = (xnn_pavgpool_mp_ukernel_function) xnn_f32_pavgpool_ukernel_mp9p8q__scalar,
988 .mr = 9,
989 .qr = 8,
990 };
991 xnn_params.f32.gavgpool = (struct gavgpool_parameters) {
992 .up = (xnn_gavgpool_up_ukernel_function) xnn_f32_gavgpool_ukernel_up7__scalar,
993 .mp = (xnn_gavgpool_mp_ukernel_function) xnn_f32_gavgpool_ukernel_mp7p7q__scalar,
994 .mr = 7,
995 };
996 xnn_params.f32.maxpool = (struct maxpool_parameters) {
997 .ukernel = (xnn_maxpool_ukernel_function) xnn_f32_maxpool_ukernel_9p8q__scalar,
998 .mr = 9,
999 .qr = 8,
1000 };
1001 xnn_params.f32.argmaxpool[0] = (struct argmaxpool_parameters) {
1002 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up4__scalar,
1003 .mr = 4,
1004 };
1005 xnn_params.f32.argmaxpool[1] = (struct argmaxpool_parameters) {
1006 .up = (xnn_argmaxpool_up_ukernel_function) xnn_f32_argmaxpool_ukernel_up9__scalar,
1007 .mr = 9,
1008 };
1009 xnn_params.f32.argmaxpool[2] = (struct argmaxpool_parameters) {
1010 .mp = (xnn_argmaxpool_mp_ukernel_function) xnn_f32_argmaxpool_ukernel_mp9p8q__scalar,
1011 .mr = 9,
1012 .qr = 8,
1013 };
Marat Dukhan69722492019-11-11 19:55:50 -08001014 xnn_params.f32.bilinear = (struct bilinear_parameters) {
1015 .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
1016 .pixel_tile = 1,
1017 .channel_tile = 2,
1018 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001019 xnn_params.f32.clamp = (xnn_univector_ukernel_function) xnn_f32_clamp_ukernel__scalar;
1020 xnn_params.f32.hswish = (xnn_univector_ukernel_function) xnn_f32_hswish_ukernel__scalar;
1021 xnn_params.f32.prelu = (struct prelu_parameters) {
Marat Dukhan69c3f2c2019-11-06 12:30:01 -08001022 .ukernel = (xnn_prelu_ukernel_function) xnn_f32_prelu_ukernel__scalar_2x4,
1023 .row_tile = 4,
1024 .channel_tile = 4,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001025 };
1026 xnn_params.f32.vadd = (xnn_vadd_ukernel_function) xnn_f32_vadd_ukernel__scalar;
1027 xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
Marat Dukhan49e6ee92019-11-06 15:55:29 -08001028 .ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
1029 .channel_tile = 1,
1030 .row_tile = 2,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001031 };
1032 #ifndef XNN_NO_SPNCHW_OPERATORS
1033 xnn_params.f32.spmm = (struct spmm_parameters) {
Marat Dukhanbff791e2019-10-24 11:05:37 -07001034 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
1035 .mr = 8,
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001036 .nr = 1,
1037 };
Erich Elsenc6afd9b2019-10-24 16:10:53 -07001038 xnn_params.f32.spmm2 = (struct spmm_parameters) {
1039 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x2__scalar,
1040 .mr = 8,
1041 .nr = 2,
1042 };
1043 xnn_params.f32.spmm4 = (struct spmm_parameters) {
1044 .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x4__scalar,
1045 .mr = 8,
1046 .nr = 4,
1047 };
Marat Dukhan14fe0b22019-10-23 21:20:07 -07001048 xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
1049 .ukernel_with_symm_padding =
1050 (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
1051 .output_channel_tile = 4,
1052 .output_height_tile = 1,
1053 .output_width_tile = 1,
1054 };
1055 xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
1056 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
1057 .input_width_tile = 1,
1058 .output_width_tile = 1,
1059 .output_height_tile = 1,
1060 };
1061 xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
1062 .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
1063 .input_width_tile = 1,
1064 .output_width_tile = 1,
1065 .output_height_tile = 1,
1066 };
1067 xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
1068 .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
1069 .channel_tile = 1,
1070 };
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001071 #endif // XNN_NO_SPNCHW_OPERATORS
1072 #endif // XNN_NO_F32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001073
1074 /**************************** X32 micro-kernels ****************************/
Marat Dukhan8fe54e42019-10-10 14:12:59 -07001075 #ifndef XNN_NO_X32_OPERATORS
1076 xnn_params.x32.pad = (struct pad_parameters) {
1077 .ukernel = xnn_x32_pad_x2__scalar,
1078 .mr = 2,
1079 };
1080 xnn_params.x32.unpool = (xnn_unpool_ukernel_function) xnn_x32_unpool_ukernel__scalar;
1081 xnn_params.x32.zip = (struct zip_parameters) {
1082 .x2 = (xnn_zipc_ukernel_function) xnn_x32_zip_x2_ukernel__scalar,
1083 .x3 = (xnn_zipc_ukernel_function) xnn_x32_zip_x3_ukernel__scalar,
1084 .x4 = (xnn_zipc_ukernel_function) xnn_x32_zip_x4_ukernel__scalar,
1085 .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__scalar,
1086 };
1087 #endif // XNN_NO_X32_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -07001088
1089#else
1090 #error "Unsupported architecture"
1091#endif
1092 xnn_params.initialized = true;
1093}
1094
1095enum xnn_status xnn_initialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001096 #ifndef __EMSCRIPTEN__
1097 if (!cpuinfo_initialize()) {
1098 return xnn_status_out_of_memory;
1099 }
1100 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001101 pthread_once(&init_guard, &init);
1102 if (xnn_params.initialized) {
1103 return xnn_status_success;
1104 } else {
1105 return xnn_status_unsupported_hardware;
1106 }
1107}
1108
1109enum xnn_status xnn_deinitialize(void) {
Marat Dukhand343c222019-10-07 09:22:14 -07001110 #ifndef __EMSCRIPTEN__
1111 cpuinfo_deinitialize();
1112 #endif
XNNPACK Teamb455b122019-09-27 18:10:33 -07001113 return xnn_status_success;
1114}